1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_buf_item.h" 17 #include "xfs_btree.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_alloc.h" 22 #include "xfs_log.h" 23 #include "xfs_btree_staging.h" 24 #include "xfs_ag.h" 25 #include "xfs_alloc_btree.h" 26 #include "xfs_ialloc_btree.h" 27 #include "xfs_bmap_btree.h" 28 #include "xfs_rmap_btree.h" 29 #include "xfs_refcount_btree.h" 30 31 /* 32 * Btree magic numbers. 33 */ 34 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 35 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 36 XFS_FIBT_MAGIC, 0 }, 37 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 38 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 39 XFS_REFC_CRC_MAGIC } 40 }; 41 42 uint32_t 43 xfs_btree_magic( 44 int crc, 45 xfs_btnum_t btnum) 46 { 47 uint32_t magic = xfs_magics[crc][btnum]; 48 49 /* Ensure we asked for crc for crc-only magics. */ 50 ASSERT(magic != 0); 51 return magic; 52 } 53 54 /* 55 * These sibling pointer checks are optimised for null sibling pointers. This 56 * happens a lot, and we don't need to byte swap at runtime if the sibling 57 * pointer is NULL. 58 * 59 * These are explicitly marked at inline because the cost of calling them as 60 * functions instead of inlining them is about 36 bytes extra code per call site 61 * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these 62 * two sibling check functions reduces the compiled code size by over 300 63 * bytes. 64 */ 65 static inline xfs_failaddr_t 66 xfs_btree_check_lblock_siblings( 67 struct xfs_mount *mp, 68 struct xfs_btree_cur *cur, 69 int level, 70 xfs_fsblock_t fsb, 71 __be64 dsibling) 72 { 73 xfs_fsblock_t sibling; 74 75 if (dsibling == cpu_to_be64(NULLFSBLOCK)) 76 return NULL; 77 78 sibling = be64_to_cpu(dsibling); 79 if (sibling == fsb) 80 return __this_address; 81 if (level >= 0) { 82 if (!xfs_btree_check_lptr(cur, sibling, level + 1)) 83 return __this_address; 84 } else { 85 if (!xfs_verify_fsbno(mp, sibling)) 86 return __this_address; 87 } 88 89 return NULL; 90 } 91 92 static inline xfs_failaddr_t 93 xfs_btree_check_sblock_siblings( 94 struct xfs_perag *pag, 95 struct xfs_btree_cur *cur, 96 int level, 97 xfs_agblock_t agbno, 98 __be32 dsibling) 99 { 100 xfs_agblock_t sibling; 101 102 if (dsibling == cpu_to_be32(NULLAGBLOCK)) 103 return NULL; 104 105 sibling = be32_to_cpu(dsibling); 106 if (sibling == agbno) 107 return __this_address; 108 if (level >= 0) { 109 if (!xfs_btree_check_sptr(cur, sibling, level + 1)) 110 return __this_address; 111 } else { 112 if (!xfs_verify_agbno(pag, sibling)) 113 return __this_address; 114 } 115 return NULL; 116 } 117 118 /* 119 * Check a long btree block header. Return the address of the failing check, 120 * or NULL if everything is ok. 121 */ 122 xfs_failaddr_t 123 __xfs_btree_check_lblock( 124 struct xfs_btree_cur *cur, 125 struct xfs_btree_block *block, 126 int level, 127 struct xfs_buf *bp) 128 { 129 struct xfs_mount *mp = cur->bc_mp; 130 xfs_btnum_t btnum = cur->bc_btnum; 131 int crc = xfs_has_crc(mp); 132 xfs_failaddr_t fa; 133 xfs_fsblock_t fsb = NULLFSBLOCK; 134 135 if (crc) { 136 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 137 return __this_address; 138 if (block->bb_u.l.bb_blkno != 139 cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) 140 return __this_address; 141 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 142 return __this_address; 143 } 144 145 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 146 return __this_address; 147 if (be16_to_cpu(block->bb_level) != level) 148 return __this_address; 149 if (be16_to_cpu(block->bb_numrecs) > 150 cur->bc_ops->get_maxrecs(cur, level)) 151 return __this_address; 152 153 if (bp) 154 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 155 156 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, 157 block->bb_u.l.bb_leftsib); 158 if (!fa) 159 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, 160 block->bb_u.l.bb_rightsib); 161 return fa; 162 } 163 164 /* Check a long btree block header. */ 165 static int 166 xfs_btree_check_lblock( 167 struct xfs_btree_cur *cur, 168 struct xfs_btree_block *block, 169 int level, 170 struct xfs_buf *bp) 171 { 172 struct xfs_mount *mp = cur->bc_mp; 173 xfs_failaddr_t fa; 174 175 fa = __xfs_btree_check_lblock(cur, block, level, bp); 176 if (XFS_IS_CORRUPT(mp, fa != NULL) || 177 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) { 178 if (bp) 179 trace_xfs_btree_corrupt(bp, _RET_IP_); 180 return -EFSCORRUPTED; 181 } 182 return 0; 183 } 184 185 /* 186 * Check a short btree block header. Return the address of the failing check, 187 * or NULL if everything is ok. 188 */ 189 xfs_failaddr_t 190 __xfs_btree_check_sblock( 191 struct xfs_btree_cur *cur, 192 struct xfs_btree_block *block, 193 int level, 194 struct xfs_buf *bp) 195 { 196 struct xfs_mount *mp = cur->bc_mp; 197 struct xfs_perag *pag = cur->bc_ag.pag; 198 xfs_btnum_t btnum = cur->bc_btnum; 199 int crc = xfs_has_crc(mp); 200 xfs_failaddr_t fa; 201 xfs_agblock_t agbno = NULLAGBLOCK; 202 203 if (crc) { 204 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 205 return __this_address; 206 if (block->bb_u.s.bb_blkno != 207 cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) 208 return __this_address; 209 } 210 211 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 212 return __this_address; 213 if (be16_to_cpu(block->bb_level) != level) 214 return __this_address; 215 if (be16_to_cpu(block->bb_numrecs) > 216 cur->bc_ops->get_maxrecs(cur, level)) 217 return __this_address; 218 219 if (bp) 220 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); 221 222 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, 223 block->bb_u.s.bb_leftsib); 224 if (!fa) 225 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, 226 block->bb_u.s.bb_rightsib); 227 return fa; 228 } 229 230 /* Check a short btree block header. */ 231 STATIC int 232 xfs_btree_check_sblock( 233 struct xfs_btree_cur *cur, 234 struct xfs_btree_block *block, 235 int level, 236 struct xfs_buf *bp) 237 { 238 struct xfs_mount *mp = cur->bc_mp; 239 xfs_failaddr_t fa; 240 241 fa = __xfs_btree_check_sblock(cur, block, level, bp); 242 if (XFS_IS_CORRUPT(mp, fa != NULL) || 243 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) { 244 if (bp) 245 trace_xfs_btree_corrupt(bp, _RET_IP_); 246 return -EFSCORRUPTED; 247 } 248 return 0; 249 } 250 251 /* 252 * Debug routine: check that block header is ok. 253 */ 254 int 255 xfs_btree_check_block( 256 struct xfs_btree_cur *cur, /* btree cursor */ 257 struct xfs_btree_block *block, /* generic btree block pointer */ 258 int level, /* level of the btree block */ 259 struct xfs_buf *bp) /* buffer containing block, if any */ 260 { 261 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 262 return xfs_btree_check_lblock(cur, block, level, bp); 263 else 264 return xfs_btree_check_sblock(cur, block, level, bp); 265 } 266 267 /* Check that this long pointer is valid and points within the fs. */ 268 bool 269 xfs_btree_check_lptr( 270 struct xfs_btree_cur *cur, 271 xfs_fsblock_t fsbno, 272 int level) 273 { 274 if (level <= 0) 275 return false; 276 return xfs_verify_fsbno(cur->bc_mp, fsbno); 277 } 278 279 /* Check that this short pointer is valid and points within the AG. */ 280 bool 281 xfs_btree_check_sptr( 282 struct xfs_btree_cur *cur, 283 xfs_agblock_t agbno, 284 int level) 285 { 286 if (level <= 0) 287 return false; 288 return xfs_verify_agbno(cur->bc_ag.pag, agbno); 289 } 290 291 /* 292 * Check that a given (indexed) btree pointer at a certain level of a 293 * btree is valid and doesn't point past where it should. 294 */ 295 static int 296 xfs_btree_check_ptr( 297 struct xfs_btree_cur *cur, 298 const union xfs_btree_ptr *ptr, 299 int index, 300 int level) 301 { 302 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 303 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), 304 level)) 305 return 0; 306 xfs_err(cur->bc_mp, 307 "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", 308 cur->bc_ino.ip->i_ino, 309 cur->bc_ino.whichfork, cur->bc_btnum, 310 level, index); 311 } else { 312 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), 313 level)) 314 return 0; 315 xfs_err(cur->bc_mp, 316 "AG %u: Corrupt btree %d pointer at level %d index %d.", 317 cur->bc_ag.pag->pag_agno, cur->bc_btnum, 318 level, index); 319 } 320 321 return -EFSCORRUPTED; 322 } 323 324 #ifdef DEBUG 325 # define xfs_btree_debug_check_ptr xfs_btree_check_ptr 326 #else 327 # define xfs_btree_debug_check_ptr(...) (0) 328 #endif 329 330 /* 331 * Calculate CRC on the whole btree block and stuff it into the 332 * long-form btree header. 333 * 334 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 335 * it into the buffer so recovery knows what the last modification was that made 336 * it to disk. 337 */ 338 void 339 xfs_btree_lblock_calc_crc( 340 struct xfs_buf *bp) 341 { 342 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 343 struct xfs_buf_log_item *bip = bp->b_log_item; 344 345 if (!xfs_has_crc(bp->b_mount)) 346 return; 347 if (bip) 348 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 349 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 350 } 351 352 bool 353 xfs_btree_lblock_verify_crc( 354 struct xfs_buf *bp) 355 { 356 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 357 struct xfs_mount *mp = bp->b_mount; 358 359 if (xfs_has_crc(mp)) { 360 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 361 return false; 362 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 363 } 364 365 return true; 366 } 367 368 /* 369 * Calculate CRC on the whole btree block and stuff it into the 370 * short-form btree header. 371 * 372 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 373 * it into the buffer so recovery knows what the last modification was that made 374 * it to disk. 375 */ 376 void 377 xfs_btree_sblock_calc_crc( 378 struct xfs_buf *bp) 379 { 380 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 381 struct xfs_buf_log_item *bip = bp->b_log_item; 382 383 if (!xfs_has_crc(bp->b_mount)) 384 return; 385 if (bip) 386 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 387 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 388 } 389 390 bool 391 xfs_btree_sblock_verify_crc( 392 struct xfs_buf *bp) 393 { 394 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 395 struct xfs_mount *mp = bp->b_mount; 396 397 if (xfs_has_crc(mp)) { 398 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 399 return false; 400 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 401 } 402 403 return true; 404 } 405 406 static int 407 xfs_btree_free_block( 408 struct xfs_btree_cur *cur, 409 struct xfs_buf *bp) 410 { 411 int error; 412 413 error = cur->bc_ops->free_block(cur, bp); 414 if (!error) { 415 xfs_trans_binval(cur->bc_tp, bp); 416 XFS_BTREE_STATS_INC(cur, free); 417 } 418 return error; 419 } 420 421 /* 422 * Delete the btree cursor. 423 */ 424 void 425 xfs_btree_del_cursor( 426 struct xfs_btree_cur *cur, /* btree cursor */ 427 int error) /* del because of error */ 428 { 429 int i; /* btree level */ 430 431 /* 432 * Clear the buffer pointers and release the buffers. If we're doing 433 * this because of an error, inspect all of the entries in the bc_bufs 434 * array for buffers to be unlocked. This is because some of the btree 435 * code works from level n down to 0, and if we get an error along the 436 * way we won't have initialized all the entries down to 0. 437 */ 438 for (i = 0; i < cur->bc_nlevels; i++) { 439 if (cur->bc_levels[i].bp) 440 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); 441 else if (!error) 442 break; 443 } 444 445 /* 446 * If we are doing a BMBT update, the number of unaccounted blocks 447 * allocated during this cursor life time should be zero. If it's not 448 * zero, then we should be shut down or on our way to shutdown due to 449 * cancelling a dirty transaction on error. 450 */ 451 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || 452 xfs_is_shutdown(cur->bc_mp) || error != 0); 453 if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) 454 kmem_free(cur->bc_ops); 455 if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) 456 xfs_perag_put(cur->bc_ag.pag); 457 kmem_cache_free(cur->bc_cache, cur); 458 } 459 460 /* 461 * Duplicate the btree cursor. 462 * Allocate a new one, copy the record, re-get the buffers. 463 */ 464 int /* error */ 465 xfs_btree_dup_cursor( 466 struct xfs_btree_cur *cur, /* input cursor */ 467 struct xfs_btree_cur **ncur) /* output cursor */ 468 { 469 struct xfs_buf *bp; /* btree block's buffer pointer */ 470 int error; /* error return value */ 471 int i; /* level number of btree block */ 472 xfs_mount_t *mp; /* mount structure for filesystem */ 473 struct xfs_btree_cur *new; /* new cursor value */ 474 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 475 476 tp = cur->bc_tp; 477 mp = cur->bc_mp; 478 479 /* 480 * Allocate a new cursor like the old one. 481 */ 482 new = cur->bc_ops->dup_cursor(cur); 483 484 /* 485 * Copy the record currently in the cursor. 486 */ 487 new->bc_rec = cur->bc_rec; 488 489 /* 490 * For each level current, re-get the buffer and copy the ptr value. 491 */ 492 for (i = 0; i < new->bc_nlevels; i++) { 493 new->bc_levels[i].ptr = cur->bc_levels[i].ptr; 494 new->bc_levels[i].ra = cur->bc_levels[i].ra; 495 bp = cur->bc_levels[i].bp; 496 if (bp) { 497 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 498 xfs_buf_daddr(bp), mp->m_bsize, 499 0, &bp, 500 cur->bc_ops->buf_ops); 501 if (error) { 502 xfs_btree_del_cursor(new, error); 503 *ncur = NULL; 504 return error; 505 } 506 } 507 new->bc_levels[i].bp = bp; 508 } 509 *ncur = new; 510 return 0; 511 } 512 513 /* 514 * XFS btree block layout and addressing: 515 * 516 * There are two types of blocks in the btree: leaf and non-leaf blocks. 517 * 518 * The leaf record start with a header then followed by records containing 519 * the values. A non-leaf block also starts with the same header, and 520 * then first contains lookup keys followed by an equal number of pointers 521 * to the btree blocks at the previous level. 522 * 523 * +--------+-------+-------+-------+-------+-------+-------+ 524 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 525 * +--------+-------+-------+-------+-------+-------+-------+ 526 * 527 * +--------+-------+-------+-------+-------+-------+-------+ 528 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 529 * +--------+-------+-------+-------+-------+-------+-------+ 530 * 531 * The header is called struct xfs_btree_block for reasons better left unknown 532 * and comes in different versions for short (32bit) and long (64bit) block 533 * pointers. The record and key structures are defined by the btree instances 534 * and opaque to the btree core. The block pointers are simple disk endian 535 * integers, available in a short (32bit) and long (64bit) variant. 536 * 537 * The helpers below calculate the offset of a given record, key or pointer 538 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 539 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 540 * inside the btree block is done using indices starting at one, not zero! 541 * 542 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 543 * overlapping intervals. In such a tree, records are still sorted lowest to 544 * highest and indexed by the smallest key value that refers to the record. 545 * However, nodes are different: each pointer has two associated keys -- one 546 * indexing the lowest key available in the block(s) below (the same behavior 547 * as the key in a regular btree) and another indexing the highest key 548 * available in the block(s) below. Because records are /not/ sorted by the 549 * highest key, all leaf block updates require us to compute the highest key 550 * that matches any record in the leaf and to recursively update the high keys 551 * in the nodes going further up in the tree, if necessary. Nodes look like 552 * this: 553 * 554 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 555 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 556 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 557 * 558 * To perform an interval query on an overlapped tree, perform the usual 559 * depth-first search and use the low and high keys to decide if we can skip 560 * that particular node. If a leaf node is reached, return the records that 561 * intersect the interval. Note that an interval query may return numerous 562 * entries. For a non-overlapped tree, simply search for the record associated 563 * with the lowest key and iterate forward until a non-matching record is 564 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 565 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 566 * more detail. 567 * 568 * Why do we care about overlapping intervals? Let's say you have a bunch of 569 * reverse mapping records on a reflink filesystem: 570 * 571 * 1: +- file A startblock B offset C length D -----------+ 572 * 2: +- file E startblock F offset G length H --------------+ 573 * 3: +- file I startblock F offset J length K --+ 574 * 4: +- file L... --+ 575 * 576 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 577 * we'd simply increment the length of record 1. But how do we find the record 578 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 579 * record 3 because the keys are ordered first by startblock. An interval 580 * query would return records 1 and 2 because they both overlap (B+D-1), and 581 * from that we can pick out record 1 as the appropriate left neighbor. 582 * 583 * In the non-overlapped case you can do a LE lookup and decrement the cursor 584 * because a record's interval must end before the next record. 585 */ 586 587 /* 588 * Return size of the btree block header for this btree instance. 589 */ 590 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 591 { 592 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 593 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 594 return XFS_BTREE_LBLOCK_CRC_LEN; 595 return XFS_BTREE_LBLOCK_LEN; 596 } 597 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 598 return XFS_BTREE_SBLOCK_CRC_LEN; 599 return XFS_BTREE_SBLOCK_LEN; 600 } 601 602 /* 603 * Return size of btree block pointers for this btree instance. 604 */ 605 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 606 { 607 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 608 sizeof(__be64) : sizeof(__be32); 609 } 610 611 /* 612 * Calculate offset of the n-th record in a btree block. 613 */ 614 STATIC size_t 615 xfs_btree_rec_offset( 616 struct xfs_btree_cur *cur, 617 int n) 618 { 619 return xfs_btree_block_len(cur) + 620 (n - 1) * cur->bc_ops->rec_len; 621 } 622 623 /* 624 * Calculate offset of the n-th key in a btree block. 625 */ 626 STATIC size_t 627 xfs_btree_key_offset( 628 struct xfs_btree_cur *cur, 629 int n) 630 { 631 return xfs_btree_block_len(cur) + 632 (n - 1) * cur->bc_ops->key_len; 633 } 634 635 /* 636 * Calculate offset of the n-th high key in a btree block. 637 */ 638 STATIC size_t 639 xfs_btree_high_key_offset( 640 struct xfs_btree_cur *cur, 641 int n) 642 { 643 return xfs_btree_block_len(cur) + 644 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 645 } 646 647 /* 648 * Calculate offset of the n-th block pointer in a btree block. 649 */ 650 STATIC size_t 651 xfs_btree_ptr_offset( 652 struct xfs_btree_cur *cur, 653 int n, 654 int level) 655 { 656 return xfs_btree_block_len(cur) + 657 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 658 (n - 1) * xfs_btree_ptr_len(cur); 659 } 660 661 /* 662 * Return a pointer to the n-th record in the btree block. 663 */ 664 union xfs_btree_rec * 665 xfs_btree_rec_addr( 666 struct xfs_btree_cur *cur, 667 int n, 668 struct xfs_btree_block *block) 669 { 670 return (union xfs_btree_rec *) 671 ((char *)block + xfs_btree_rec_offset(cur, n)); 672 } 673 674 /* 675 * Return a pointer to the n-th key in the btree block. 676 */ 677 union xfs_btree_key * 678 xfs_btree_key_addr( 679 struct xfs_btree_cur *cur, 680 int n, 681 struct xfs_btree_block *block) 682 { 683 return (union xfs_btree_key *) 684 ((char *)block + xfs_btree_key_offset(cur, n)); 685 } 686 687 /* 688 * Return a pointer to the n-th high key in the btree block. 689 */ 690 union xfs_btree_key * 691 xfs_btree_high_key_addr( 692 struct xfs_btree_cur *cur, 693 int n, 694 struct xfs_btree_block *block) 695 { 696 return (union xfs_btree_key *) 697 ((char *)block + xfs_btree_high_key_offset(cur, n)); 698 } 699 700 /* 701 * Return a pointer to the n-th block pointer in the btree block. 702 */ 703 union xfs_btree_ptr * 704 xfs_btree_ptr_addr( 705 struct xfs_btree_cur *cur, 706 int n, 707 struct xfs_btree_block *block) 708 { 709 int level = xfs_btree_get_level(block); 710 711 ASSERT(block->bb_level != 0); 712 713 return (union xfs_btree_ptr *) 714 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 715 } 716 717 struct xfs_ifork * 718 xfs_btree_ifork_ptr( 719 struct xfs_btree_cur *cur) 720 { 721 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 722 723 if (cur->bc_flags & XFS_BTREE_STAGING) 724 return cur->bc_ino.ifake->if_fork; 725 return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork); 726 } 727 728 /* 729 * Get the root block which is stored in the inode. 730 * 731 * For now this btree implementation assumes the btree root is always 732 * stored in the if_broot field of an inode fork. 733 */ 734 STATIC struct xfs_btree_block * 735 xfs_btree_get_iroot( 736 struct xfs_btree_cur *cur) 737 { 738 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 739 740 return (struct xfs_btree_block *)ifp->if_broot; 741 } 742 743 /* 744 * Retrieve the block pointer from the cursor at the given level. 745 * This may be an inode btree root or from a buffer. 746 */ 747 struct xfs_btree_block * /* generic btree block pointer */ 748 xfs_btree_get_block( 749 struct xfs_btree_cur *cur, /* btree cursor */ 750 int level, /* level in btree */ 751 struct xfs_buf **bpp) /* buffer containing the block */ 752 { 753 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 754 (level == cur->bc_nlevels - 1)) { 755 *bpp = NULL; 756 return xfs_btree_get_iroot(cur); 757 } 758 759 *bpp = cur->bc_levels[level].bp; 760 return XFS_BUF_TO_BLOCK(*bpp); 761 } 762 763 /* 764 * Change the cursor to point to the first record at the given level. 765 * Other levels are unaffected. 766 */ 767 STATIC int /* success=1, failure=0 */ 768 xfs_btree_firstrec( 769 struct xfs_btree_cur *cur, /* btree cursor */ 770 int level) /* level to change */ 771 { 772 struct xfs_btree_block *block; /* generic btree block pointer */ 773 struct xfs_buf *bp; /* buffer containing block */ 774 775 /* 776 * Get the block pointer for this level. 777 */ 778 block = xfs_btree_get_block(cur, level, &bp); 779 if (xfs_btree_check_block(cur, block, level, bp)) 780 return 0; 781 /* 782 * It's empty, there is no such record. 783 */ 784 if (!block->bb_numrecs) 785 return 0; 786 /* 787 * Set the ptr value to 1, that's the first record/key. 788 */ 789 cur->bc_levels[level].ptr = 1; 790 return 1; 791 } 792 793 /* 794 * Change the cursor to point to the last record in the current block 795 * at the given level. Other levels are unaffected. 796 */ 797 STATIC int /* success=1, failure=0 */ 798 xfs_btree_lastrec( 799 struct xfs_btree_cur *cur, /* btree cursor */ 800 int level) /* level to change */ 801 { 802 struct xfs_btree_block *block; /* generic btree block pointer */ 803 struct xfs_buf *bp; /* buffer containing block */ 804 805 /* 806 * Get the block pointer for this level. 807 */ 808 block = xfs_btree_get_block(cur, level, &bp); 809 if (xfs_btree_check_block(cur, block, level, bp)) 810 return 0; 811 /* 812 * It's empty, there is no such record. 813 */ 814 if (!block->bb_numrecs) 815 return 0; 816 /* 817 * Set the ptr value to numrecs, that's the last record/key. 818 */ 819 cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); 820 return 1; 821 } 822 823 /* 824 * Compute first and last byte offsets for the fields given. 825 * Interprets the offsets table, which contains struct field offsets. 826 */ 827 void 828 xfs_btree_offsets( 829 uint32_t fields, /* bitmask of fields */ 830 const short *offsets, /* table of field offsets */ 831 int nbits, /* number of bits to inspect */ 832 int *first, /* output: first byte offset */ 833 int *last) /* output: last byte offset */ 834 { 835 int i; /* current bit number */ 836 uint32_t imask; /* mask for current bit number */ 837 838 ASSERT(fields != 0); 839 /* 840 * Find the lowest bit, so the first byte offset. 841 */ 842 for (i = 0, imask = 1u; ; i++, imask <<= 1) { 843 if (imask & fields) { 844 *first = offsets[i]; 845 break; 846 } 847 } 848 /* 849 * Find the highest bit, so the last byte offset. 850 */ 851 for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) { 852 if (imask & fields) { 853 *last = offsets[i + 1] - 1; 854 break; 855 } 856 } 857 } 858 859 /* 860 * Get a buffer for the block, return it read in. 861 * Long-form addressing. 862 */ 863 int 864 xfs_btree_read_bufl( 865 struct xfs_mount *mp, /* file system mount point */ 866 struct xfs_trans *tp, /* transaction pointer */ 867 xfs_fsblock_t fsbno, /* file system block number */ 868 struct xfs_buf **bpp, /* buffer for fsbno */ 869 int refval, /* ref count value for buffer */ 870 const struct xfs_buf_ops *ops) 871 { 872 struct xfs_buf *bp; /* return value */ 873 xfs_daddr_t d; /* real disk block address */ 874 int error; 875 876 if (!xfs_verify_fsbno(mp, fsbno)) 877 return -EFSCORRUPTED; 878 d = XFS_FSB_TO_DADDR(mp, fsbno); 879 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 880 mp->m_bsize, 0, &bp, ops); 881 if (error) 882 return error; 883 if (bp) 884 xfs_buf_set_ref(bp, refval); 885 *bpp = bp; 886 return 0; 887 } 888 889 /* 890 * Read-ahead the block, don't wait for it, don't return a buffer. 891 * Long-form addressing. 892 */ 893 /* ARGSUSED */ 894 void 895 xfs_btree_reada_bufl( 896 struct xfs_mount *mp, /* file system mount point */ 897 xfs_fsblock_t fsbno, /* file system block number */ 898 xfs_extlen_t count, /* count of filesystem blocks */ 899 const struct xfs_buf_ops *ops) 900 { 901 xfs_daddr_t d; 902 903 ASSERT(fsbno != NULLFSBLOCK); 904 d = XFS_FSB_TO_DADDR(mp, fsbno); 905 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 906 } 907 908 /* 909 * Read-ahead the block, don't wait for it, don't return a buffer. 910 * Short-form addressing. 911 */ 912 /* ARGSUSED */ 913 void 914 xfs_btree_reada_bufs( 915 struct xfs_mount *mp, /* file system mount point */ 916 xfs_agnumber_t agno, /* allocation group number */ 917 xfs_agblock_t agbno, /* allocation group block number */ 918 xfs_extlen_t count, /* count of filesystem blocks */ 919 const struct xfs_buf_ops *ops) 920 { 921 xfs_daddr_t d; 922 923 ASSERT(agno != NULLAGNUMBER); 924 ASSERT(agbno != NULLAGBLOCK); 925 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 926 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 927 } 928 929 STATIC int 930 xfs_btree_readahead_lblock( 931 struct xfs_btree_cur *cur, 932 int lr, 933 struct xfs_btree_block *block) 934 { 935 int rval = 0; 936 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 937 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 938 939 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 940 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 941 cur->bc_ops->buf_ops); 942 rval++; 943 } 944 945 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 946 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 947 cur->bc_ops->buf_ops); 948 rval++; 949 } 950 951 return rval; 952 } 953 954 STATIC int 955 xfs_btree_readahead_sblock( 956 struct xfs_btree_cur *cur, 957 int lr, 958 struct xfs_btree_block *block) 959 { 960 int rval = 0; 961 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 962 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 963 964 965 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 966 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, 967 left, 1, cur->bc_ops->buf_ops); 968 rval++; 969 } 970 971 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 972 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, 973 right, 1, cur->bc_ops->buf_ops); 974 rval++; 975 } 976 977 return rval; 978 } 979 980 /* 981 * Read-ahead btree blocks, at the given level. 982 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 983 */ 984 STATIC int 985 xfs_btree_readahead( 986 struct xfs_btree_cur *cur, /* btree cursor */ 987 int lev, /* level in btree */ 988 int lr) /* left/right bits */ 989 { 990 struct xfs_btree_block *block; 991 992 /* 993 * No readahead needed if we are at the root level and the 994 * btree root is stored in the inode. 995 */ 996 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 997 (lev == cur->bc_nlevels - 1)) 998 return 0; 999 1000 if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) 1001 return 0; 1002 1003 cur->bc_levels[lev].ra |= lr; 1004 block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); 1005 1006 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1007 return xfs_btree_readahead_lblock(cur, lr, block); 1008 return xfs_btree_readahead_sblock(cur, lr, block); 1009 } 1010 1011 STATIC int 1012 xfs_btree_ptr_to_daddr( 1013 struct xfs_btree_cur *cur, 1014 const union xfs_btree_ptr *ptr, 1015 xfs_daddr_t *daddr) 1016 { 1017 xfs_fsblock_t fsbno; 1018 xfs_agblock_t agbno; 1019 int error; 1020 1021 error = xfs_btree_check_ptr(cur, ptr, 0, 1); 1022 if (error) 1023 return error; 1024 1025 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1026 fsbno = be64_to_cpu(ptr->l); 1027 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); 1028 } else { 1029 agbno = be32_to_cpu(ptr->s); 1030 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1031 agbno); 1032 } 1033 1034 return 0; 1035 } 1036 1037 /* 1038 * Readahead @count btree blocks at the given @ptr location. 1039 * 1040 * We don't need to care about long or short form btrees here as we have a 1041 * method of converting the ptr directly to a daddr available to us. 1042 */ 1043 STATIC void 1044 xfs_btree_readahead_ptr( 1045 struct xfs_btree_cur *cur, 1046 union xfs_btree_ptr *ptr, 1047 xfs_extlen_t count) 1048 { 1049 xfs_daddr_t daddr; 1050 1051 if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) 1052 return; 1053 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, 1054 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1055 } 1056 1057 /* 1058 * Set the buffer for level "lev" in the cursor to bp, releasing 1059 * any previous buffer. 1060 */ 1061 STATIC void 1062 xfs_btree_setbuf( 1063 struct xfs_btree_cur *cur, /* btree cursor */ 1064 int lev, /* level in btree */ 1065 struct xfs_buf *bp) /* new buffer to set */ 1066 { 1067 struct xfs_btree_block *b; /* btree block */ 1068 1069 if (cur->bc_levels[lev].bp) 1070 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); 1071 cur->bc_levels[lev].bp = bp; 1072 cur->bc_levels[lev].ra = 0; 1073 1074 b = XFS_BUF_TO_BLOCK(bp); 1075 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1076 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1077 cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; 1078 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1079 cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; 1080 } else { 1081 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1082 cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; 1083 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1084 cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; 1085 } 1086 } 1087 1088 bool 1089 xfs_btree_ptr_is_null( 1090 struct xfs_btree_cur *cur, 1091 const union xfs_btree_ptr *ptr) 1092 { 1093 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1094 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1095 else 1096 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1097 } 1098 1099 void 1100 xfs_btree_set_ptr_null( 1101 struct xfs_btree_cur *cur, 1102 union xfs_btree_ptr *ptr) 1103 { 1104 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1105 ptr->l = cpu_to_be64(NULLFSBLOCK); 1106 else 1107 ptr->s = cpu_to_be32(NULLAGBLOCK); 1108 } 1109 1110 /* 1111 * Get/set/init sibling pointers 1112 */ 1113 void 1114 xfs_btree_get_sibling( 1115 struct xfs_btree_cur *cur, 1116 struct xfs_btree_block *block, 1117 union xfs_btree_ptr *ptr, 1118 int lr) 1119 { 1120 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1121 1122 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1123 if (lr == XFS_BB_RIGHTSIB) 1124 ptr->l = block->bb_u.l.bb_rightsib; 1125 else 1126 ptr->l = block->bb_u.l.bb_leftsib; 1127 } else { 1128 if (lr == XFS_BB_RIGHTSIB) 1129 ptr->s = block->bb_u.s.bb_rightsib; 1130 else 1131 ptr->s = block->bb_u.s.bb_leftsib; 1132 } 1133 } 1134 1135 void 1136 xfs_btree_set_sibling( 1137 struct xfs_btree_cur *cur, 1138 struct xfs_btree_block *block, 1139 const union xfs_btree_ptr *ptr, 1140 int lr) 1141 { 1142 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1143 1144 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1145 if (lr == XFS_BB_RIGHTSIB) 1146 block->bb_u.l.bb_rightsib = ptr->l; 1147 else 1148 block->bb_u.l.bb_leftsib = ptr->l; 1149 } else { 1150 if (lr == XFS_BB_RIGHTSIB) 1151 block->bb_u.s.bb_rightsib = ptr->s; 1152 else 1153 block->bb_u.s.bb_leftsib = ptr->s; 1154 } 1155 } 1156 1157 void 1158 xfs_btree_init_block_int( 1159 struct xfs_mount *mp, 1160 struct xfs_btree_block *buf, 1161 xfs_daddr_t blkno, 1162 xfs_btnum_t btnum, 1163 __u16 level, 1164 __u16 numrecs, 1165 __u64 owner, 1166 unsigned int flags) 1167 { 1168 int crc = xfs_has_crc(mp); 1169 __u32 magic = xfs_btree_magic(crc, btnum); 1170 1171 buf->bb_magic = cpu_to_be32(magic); 1172 buf->bb_level = cpu_to_be16(level); 1173 buf->bb_numrecs = cpu_to_be16(numrecs); 1174 1175 if (flags & XFS_BTREE_LONG_PTRS) { 1176 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1177 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1178 if (crc) { 1179 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1180 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1181 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1182 buf->bb_u.l.bb_pad = 0; 1183 buf->bb_u.l.bb_lsn = 0; 1184 } 1185 } else { 1186 /* owner is a 32 bit value on short blocks */ 1187 __u32 __owner = (__u32)owner; 1188 1189 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1190 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1191 if (crc) { 1192 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1193 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1194 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1195 buf->bb_u.s.bb_lsn = 0; 1196 } 1197 } 1198 } 1199 1200 void 1201 xfs_btree_init_block( 1202 struct xfs_mount *mp, 1203 struct xfs_buf *bp, 1204 xfs_btnum_t btnum, 1205 __u16 level, 1206 __u16 numrecs, 1207 __u64 owner) 1208 { 1209 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp), 1210 btnum, level, numrecs, owner, 0); 1211 } 1212 1213 void 1214 xfs_btree_init_block_cur( 1215 struct xfs_btree_cur *cur, 1216 struct xfs_buf *bp, 1217 int level, 1218 int numrecs) 1219 { 1220 __u64 owner; 1221 1222 /* 1223 * we can pull the owner from the cursor right now as the different 1224 * owners align directly with the pointer size of the btree. This may 1225 * change in future, but is safe for current users of the generic btree 1226 * code. 1227 */ 1228 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1229 owner = cur->bc_ino.ip->i_ino; 1230 else 1231 owner = cur->bc_ag.pag->pag_agno; 1232 1233 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), 1234 xfs_buf_daddr(bp), cur->bc_btnum, level, 1235 numrecs, owner, cur->bc_flags); 1236 } 1237 1238 /* 1239 * Return true if ptr is the last record in the btree and 1240 * we need to track updates to this record. The decision 1241 * will be further refined in the update_lastrec method. 1242 */ 1243 STATIC int 1244 xfs_btree_is_lastrec( 1245 struct xfs_btree_cur *cur, 1246 struct xfs_btree_block *block, 1247 int level) 1248 { 1249 union xfs_btree_ptr ptr; 1250 1251 if (level > 0) 1252 return 0; 1253 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1254 return 0; 1255 1256 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1257 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1258 return 0; 1259 return 1; 1260 } 1261 1262 STATIC void 1263 xfs_btree_buf_to_ptr( 1264 struct xfs_btree_cur *cur, 1265 struct xfs_buf *bp, 1266 union xfs_btree_ptr *ptr) 1267 { 1268 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1269 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1270 xfs_buf_daddr(bp))); 1271 else { 1272 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1273 xfs_buf_daddr(bp))); 1274 } 1275 } 1276 1277 STATIC void 1278 xfs_btree_set_refs( 1279 struct xfs_btree_cur *cur, 1280 struct xfs_buf *bp) 1281 { 1282 switch (cur->bc_btnum) { 1283 case XFS_BTNUM_BNO: 1284 case XFS_BTNUM_CNT: 1285 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1286 break; 1287 case XFS_BTNUM_INO: 1288 case XFS_BTNUM_FINO: 1289 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1290 break; 1291 case XFS_BTNUM_BMAP: 1292 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1293 break; 1294 case XFS_BTNUM_RMAP: 1295 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1296 break; 1297 case XFS_BTNUM_REFC: 1298 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1299 break; 1300 default: 1301 ASSERT(0); 1302 } 1303 } 1304 1305 int 1306 xfs_btree_get_buf_block( 1307 struct xfs_btree_cur *cur, 1308 const union xfs_btree_ptr *ptr, 1309 struct xfs_btree_block **block, 1310 struct xfs_buf **bpp) 1311 { 1312 struct xfs_mount *mp = cur->bc_mp; 1313 xfs_daddr_t d; 1314 int error; 1315 1316 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1317 if (error) 1318 return error; 1319 error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, 1320 0, bpp); 1321 if (error) 1322 return error; 1323 1324 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1325 *block = XFS_BUF_TO_BLOCK(*bpp); 1326 return 0; 1327 } 1328 1329 /* 1330 * Read in the buffer at the given ptr and return the buffer and 1331 * the block pointer within the buffer. 1332 */ 1333 int 1334 xfs_btree_read_buf_block( 1335 struct xfs_btree_cur *cur, 1336 const union xfs_btree_ptr *ptr, 1337 int flags, 1338 struct xfs_btree_block **block, 1339 struct xfs_buf **bpp) 1340 { 1341 struct xfs_mount *mp = cur->bc_mp; 1342 xfs_daddr_t d; 1343 int error; 1344 1345 /* need to sort out how callers deal with failures first */ 1346 ASSERT(!(flags & XBF_TRYLOCK)); 1347 1348 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1349 if (error) 1350 return error; 1351 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1352 mp->m_bsize, flags, bpp, 1353 cur->bc_ops->buf_ops); 1354 if (error) 1355 return error; 1356 1357 xfs_btree_set_refs(cur, *bpp); 1358 *block = XFS_BUF_TO_BLOCK(*bpp); 1359 return 0; 1360 } 1361 1362 /* 1363 * Copy keys from one btree block to another. 1364 */ 1365 void 1366 xfs_btree_copy_keys( 1367 struct xfs_btree_cur *cur, 1368 union xfs_btree_key *dst_key, 1369 const union xfs_btree_key *src_key, 1370 int numkeys) 1371 { 1372 ASSERT(numkeys >= 0); 1373 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1374 } 1375 1376 /* 1377 * Copy records from one btree block to another. 1378 */ 1379 STATIC void 1380 xfs_btree_copy_recs( 1381 struct xfs_btree_cur *cur, 1382 union xfs_btree_rec *dst_rec, 1383 union xfs_btree_rec *src_rec, 1384 int numrecs) 1385 { 1386 ASSERT(numrecs >= 0); 1387 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1388 } 1389 1390 /* 1391 * Copy block pointers from one btree block to another. 1392 */ 1393 void 1394 xfs_btree_copy_ptrs( 1395 struct xfs_btree_cur *cur, 1396 union xfs_btree_ptr *dst_ptr, 1397 const union xfs_btree_ptr *src_ptr, 1398 int numptrs) 1399 { 1400 ASSERT(numptrs >= 0); 1401 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1402 } 1403 1404 /* 1405 * Shift keys one index left/right inside a single btree block. 1406 */ 1407 STATIC void 1408 xfs_btree_shift_keys( 1409 struct xfs_btree_cur *cur, 1410 union xfs_btree_key *key, 1411 int dir, 1412 int numkeys) 1413 { 1414 char *dst_key; 1415 1416 ASSERT(numkeys >= 0); 1417 ASSERT(dir == 1 || dir == -1); 1418 1419 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1420 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1421 } 1422 1423 /* 1424 * Shift records one index left/right inside a single btree block. 1425 */ 1426 STATIC void 1427 xfs_btree_shift_recs( 1428 struct xfs_btree_cur *cur, 1429 union xfs_btree_rec *rec, 1430 int dir, 1431 int numrecs) 1432 { 1433 char *dst_rec; 1434 1435 ASSERT(numrecs >= 0); 1436 ASSERT(dir == 1 || dir == -1); 1437 1438 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1439 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1440 } 1441 1442 /* 1443 * Shift block pointers one index left/right inside a single btree block. 1444 */ 1445 STATIC void 1446 xfs_btree_shift_ptrs( 1447 struct xfs_btree_cur *cur, 1448 union xfs_btree_ptr *ptr, 1449 int dir, 1450 int numptrs) 1451 { 1452 char *dst_ptr; 1453 1454 ASSERT(numptrs >= 0); 1455 ASSERT(dir == 1 || dir == -1); 1456 1457 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1458 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1459 } 1460 1461 /* 1462 * Log key values from the btree block. 1463 */ 1464 STATIC void 1465 xfs_btree_log_keys( 1466 struct xfs_btree_cur *cur, 1467 struct xfs_buf *bp, 1468 int first, 1469 int last) 1470 { 1471 1472 if (bp) { 1473 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1474 xfs_trans_log_buf(cur->bc_tp, bp, 1475 xfs_btree_key_offset(cur, first), 1476 xfs_btree_key_offset(cur, last + 1) - 1); 1477 } else { 1478 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1479 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1480 } 1481 } 1482 1483 /* 1484 * Log record values from the btree block. 1485 */ 1486 void 1487 xfs_btree_log_recs( 1488 struct xfs_btree_cur *cur, 1489 struct xfs_buf *bp, 1490 int first, 1491 int last) 1492 { 1493 1494 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1495 xfs_trans_log_buf(cur->bc_tp, bp, 1496 xfs_btree_rec_offset(cur, first), 1497 xfs_btree_rec_offset(cur, last + 1) - 1); 1498 1499 } 1500 1501 /* 1502 * Log block pointer fields from a btree block (nonleaf). 1503 */ 1504 STATIC void 1505 xfs_btree_log_ptrs( 1506 struct xfs_btree_cur *cur, /* btree cursor */ 1507 struct xfs_buf *bp, /* buffer containing btree block */ 1508 int first, /* index of first pointer to log */ 1509 int last) /* index of last pointer to log */ 1510 { 1511 1512 if (bp) { 1513 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1514 int level = xfs_btree_get_level(block); 1515 1516 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1517 xfs_trans_log_buf(cur->bc_tp, bp, 1518 xfs_btree_ptr_offset(cur, first, level), 1519 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1520 } else { 1521 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1522 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1523 } 1524 1525 } 1526 1527 /* 1528 * Log fields from a btree block header. 1529 */ 1530 void 1531 xfs_btree_log_block( 1532 struct xfs_btree_cur *cur, /* btree cursor */ 1533 struct xfs_buf *bp, /* buffer containing btree block */ 1534 uint32_t fields) /* mask of fields: XFS_BB_... */ 1535 { 1536 int first; /* first byte offset logged */ 1537 int last; /* last byte offset logged */ 1538 static const short soffsets[] = { /* table of offsets (short) */ 1539 offsetof(struct xfs_btree_block, bb_magic), 1540 offsetof(struct xfs_btree_block, bb_level), 1541 offsetof(struct xfs_btree_block, bb_numrecs), 1542 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1543 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1544 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1545 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1546 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1547 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1548 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1549 XFS_BTREE_SBLOCK_CRC_LEN 1550 }; 1551 static const short loffsets[] = { /* table of offsets (long) */ 1552 offsetof(struct xfs_btree_block, bb_magic), 1553 offsetof(struct xfs_btree_block, bb_level), 1554 offsetof(struct xfs_btree_block, bb_numrecs), 1555 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1556 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1557 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1558 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1559 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1560 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1561 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1562 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1563 XFS_BTREE_LBLOCK_CRC_LEN 1564 }; 1565 1566 if (bp) { 1567 int nbits; 1568 1569 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1570 /* 1571 * We don't log the CRC when updating a btree 1572 * block but instead recreate it during log 1573 * recovery. As the log buffers have checksums 1574 * of their own this is safe and avoids logging a crc 1575 * update in a lot of places. 1576 */ 1577 if (fields == XFS_BB_ALL_BITS) 1578 fields = XFS_BB_ALL_BITS_CRC; 1579 nbits = XFS_BB_NUM_BITS_CRC; 1580 } else { 1581 nbits = XFS_BB_NUM_BITS; 1582 } 1583 xfs_btree_offsets(fields, 1584 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1585 loffsets : soffsets, 1586 nbits, &first, &last); 1587 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1588 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1589 } else { 1590 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1591 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1592 } 1593 } 1594 1595 /* 1596 * Increment cursor by one record at the level. 1597 * For nonzero levels the leaf-ward information is untouched. 1598 */ 1599 int /* error */ 1600 xfs_btree_increment( 1601 struct xfs_btree_cur *cur, 1602 int level, 1603 int *stat) /* success/failure */ 1604 { 1605 struct xfs_btree_block *block; 1606 union xfs_btree_ptr ptr; 1607 struct xfs_buf *bp; 1608 int error; /* error return value */ 1609 int lev; 1610 1611 ASSERT(level < cur->bc_nlevels); 1612 1613 /* Read-ahead to the right at this level. */ 1614 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1615 1616 /* Get a pointer to the btree block. */ 1617 block = xfs_btree_get_block(cur, level, &bp); 1618 1619 #ifdef DEBUG 1620 error = xfs_btree_check_block(cur, block, level, bp); 1621 if (error) 1622 goto error0; 1623 #endif 1624 1625 /* We're done if we remain in the block after the increment. */ 1626 if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) 1627 goto out1; 1628 1629 /* Fail if we just went off the right edge of the tree. */ 1630 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1631 if (xfs_btree_ptr_is_null(cur, &ptr)) 1632 goto out0; 1633 1634 XFS_BTREE_STATS_INC(cur, increment); 1635 1636 /* 1637 * March up the tree incrementing pointers. 1638 * Stop when we don't go off the right edge of a block. 1639 */ 1640 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1641 block = xfs_btree_get_block(cur, lev, &bp); 1642 1643 #ifdef DEBUG 1644 error = xfs_btree_check_block(cur, block, lev, bp); 1645 if (error) 1646 goto error0; 1647 #endif 1648 1649 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) 1650 break; 1651 1652 /* Read-ahead the right block for the next loop. */ 1653 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1654 } 1655 1656 /* 1657 * If we went off the root then we are either seriously 1658 * confused or have the tree root in an inode. 1659 */ 1660 if (lev == cur->bc_nlevels) { 1661 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1662 goto out0; 1663 ASSERT(0); 1664 error = -EFSCORRUPTED; 1665 goto error0; 1666 } 1667 ASSERT(lev < cur->bc_nlevels); 1668 1669 /* 1670 * Now walk back down the tree, fixing up the cursor's buffer 1671 * pointers and key numbers. 1672 */ 1673 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1674 union xfs_btree_ptr *ptrp; 1675 1676 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); 1677 --lev; 1678 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1679 if (error) 1680 goto error0; 1681 1682 xfs_btree_setbuf(cur, lev, bp); 1683 cur->bc_levels[lev].ptr = 1; 1684 } 1685 out1: 1686 *stat = 1; 1687 return 0; 1688 1689 out0: 1690 *stat = 0; 1691 return 0; 1692 1693 error0: 1694 return error; 1695 } 1696 1697 /* 1698 * Decrement cursor by one record at the level. 1699 * For nonzero levels the leaf-ward information is untouched. 1700 */ 1701 int /* error */ 1702 xfs_btree_decrement( 1703 struct xfs_btree_cur *cur, 1704 int level, 1705 int *stat) /* success/failure */ 1706 { 1707 struct xfs_btree_block *block; 1708 struct xfs_buf *bp; 1709 int error; /* error return value */ 1710 int lev; 1711 union xfs_btree_ptr ptr; 1712 1713 ASSERT(level < cur->bc_nlevels); 1714 1715 /* Read-ahead to the left at this level. */ 1716 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1717 1718 /* We're done if we remain in the block after the decrement. */ 1719 if (--cur->bc_levels[level].ptr > 0) 1720 goto out1; 1721 1722 /* Get a pointer to the btree block. */ 1723 block = xfs_btree_get_block(cur, level, &bp); 1724 1725 #ifdef DEBUG 1726 error = xfs_btree_check_block(cur, block, level, bp); 1727 if (error) 1728 goto error0; 1729 #endif 1730 1731 /* Fail if we just went off the left edge of the tree. */ 1732 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1733 if (xfs_btree_ptr_is_null(cur, &ptr)) 1734 goto out0; 1735 1736 XFS_BTREE_STATS_INC(cur, decrement); 1737 1738 /* 1739 * March up the tree decrementing pointers. 1740 * Stop when we don't go off the left edge of a block. 1741 */ 1742 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1743 if (--cur->bc_levels[lev].ptr > 0) 1744 break; 1745 /* Read-ahead the left block for the next loop. */ 1746 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1747 } 1748 1749 /* 1750 * If we went off the root then we are seriously confused. 1751 * or the root of the tree is in an inode. 1752 */ 1753 if (lev == cur->bc_nlevels) { 1754 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1755 goto out0; 1756 ASSERT(0); 1757 error = -EFSCORRUPTED; 1758 goto error0; 1759 } 1760 ASSERT(lev < cur->bc_nlevels); 1761 1762 /* 1763 * Now walk back down the tree, fixing up the cursor's buffer 1764 * pointers and key numbers. 1765 */ 1766 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1767 union xfs_btree_ptr *ptrp; 1768 1769 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); 1770 --lev; 1771 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1772 if (error) 1773 goto error0; 1774 xfs_btree_setbuf(cur, lev, bp); 1775 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); 1776 } 1777 out1: 1778 *stat = 1; 1779 return 0; 1780 1781 out0: 1782 *stat = 0; 1783 return 0; 1784 1785 error0: 1786 return error; 1787 } 1788 1789 int 1790 xfs_btree_lookup_get_block( 1791 struct xfs_btree_cur *cur, /* btree cursor */ 1792 int level, /* level in the btree */ 1793 const union xfs_btree_ptr *pp, /* ptr to btree block */ 1794 struct xfs_btree_block **blkp) /* return btree block */ 1795 { 1796 struct xfs_buf *bp; /* buffer pointer for btree block */ 1797 xfs_daddr_t daddr; 1798 int error = 0; 1799 1800 /* special case the root block if in an inode */ 1801 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1802 (level == cur->bc_nlevels - 1)) { 1803 *blkp = xfs_btree_get_iroot(cur); 1804 return 0; 1805 } 1806 1807 /* 1808 * If the old buffer at this level for the disk address we are 1809 * looking for re-use it. 1810 * 1811 * Otherwise throw it away and get a new one. 1812 */ 1813 bp = cur->bc_levels[level].bp; 1814 error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); 1815 if (error) 1816 return error; 1817 if (bp && xfs_buf_daddr(bp) == daddr) { 1818 *blkp = XFS_BUF_TO_BLOCK(bp); 1819 return 0; 1820 } 1821 1822 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1823 if (error) 1824 return error; 1825 1826 /* Check the inode owner since the verifiers don't. */ 1827 if (xfs_has_crc(cur->bc_mp) && 1828 !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && 1829 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1830 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1831 cur->bc_ino.ip->i_ino) 1832 goto out_bad; 1833 1834 /* Did we get the level we were looking for? */ 1835 if (be16_to_cpu((*blkp)->bb_level) != level) 1836 goto out_bad; 1837 1838 /* Check that internal nodes have at least one record. */ 1839 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1840 goto out_bad; 1841 1842 xfs_btree_setbuf(cur, level, bp); 1843 return 0; 1844 1845 out_bad: 1846 *blkp = NULL; 1847 xfs_buf_mark_corrupt(bp); 1848 xfs_trans_brelse(cur->bc_tp, bp); 1849 return -EFSCORRUPTED; 1850 } 1851 1852 /* 1853 * Get current search key. For level 0 we don't actually have a key 1854 * structure so we make one up from the record. For all other levels 1855 * we just return the right key. 1856 */ 1857 STATIC union xfs_btree_key * 1858 xfs_lookup_get_search_key( 1859 struct xfs_btree_cur *cur, 1860 int level, 1861 int keyno, 1862 struct xfs_btree_block *block, 1863 union xfs_btree_key *kp) 1864 { 1865 if (level == 0) { 1866 cur->bc_ops->init_key_from_rec(kp, 1867 xfs_btree_rec_addr(cur, keyno, block)); 1868 return kp; 1869 } 1870 1871 return xfs_btree_key_addr(cur, keyno, block); 1872 } 1873 1874 /* 1875 * Lookup the record. The cursor is made to point to it, based on dir. 1876 * stat is set to 0 if can't find any such record, 1 for success. 1877 */ 1878 int /* error */ 1879 xfs_btree_lookup( 1880 struct xfs_btree_cur *cur, /* btree cursor */ 1881 xfs_lookup_t dir, /* <=, ==, or >= */ 1882 int *stat) /* success/failure */ 1883 { 1884 struct xfs_btree_block *block; /* current btree block */ 1885 int64_t diff; /* difference for the current key */ 1886 int error; /* error return value */ 1887 int keyno; /* current key number */ 1888 int level; /* level in the btree */ 1889 union xfs_btree_ptr *pp; /* ptr to btree block */ 1890 union xfs_btree_ptr ptr; /* ptr to btree block */ 1891 1892 XFS_BTREE_STATS_INC(cur, lookup); 1893 1894 /* No such thing as a zero-level tree. */ 1895 if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) 1896 return -EFSCORRUPTED; 1897 1898 block = NULL; 1899 keyno = 0; 1900 1901 /* initialise start pointer from cursor */ 1902 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1903 pp = &ptr; 1904 1905 /* 1906 * Iterate over each level in the btree, starting at the root. 1907 * For each level above the leaves, find the key we need, based 1908 * on the lookup record, then follow the corresponding block 1909 * pointer down to the next level. 1910 */ 1911 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1912 /* Get the block we need to do the lookup on. */ 1913 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1914 if (error) 1915 goto error0; 1916 1917 if (diff == 0) { 1918 /* 1919 * If we already had a key match at a higher level, we 1920 * know we need to use the first entry in this block. 1921 */ 1922 keyno = 1; 1923 } else { 1924 /* Otherwise search this block. Do a binary search. */ 1925 1926 int high; /* high entry number */ 1927 int low; /* low entry number */ 1928 1929 /* Set low and high entry numbers, 1-based. */ 1930 low = 1; 1931 high = xfs_btree_get_numrecs(block); 1932 if (!high) { 1933 /* Block is empty, must be an empty leaf. */ 1934 if (level != 0 || cur->bc_nlevels != 1) { 1935 XFS_CORRUPTION_ERROR(__func__, 1936 XFS_ERRLEVEL_LOW, 1937 cur->bc_mp, block, 1938 sizeof(*block)); 1939 return -EFSCORRUPTED; 1940 } 1941 1942 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; 1943 *stat = 0; 1944 return 0; 1945 } 1946 1947 /* Binary search the block. */ 1948 while (low <= high) { 1949 union xfs_btree_key key; 1950 union xfs_btree_key *kp; 1951 1952 XFS_BTREE_STATS_INC(cur, compare); 1953 1954 /* keyno is average of low and high. */ 1955 keyno = (low + high) >> 1; 1956 1957 /* Get current search key */ 1958 kp = xfs_lookup_get_search_key(cur, level, 1959 keyno, block, &key); 1960 1961 /* 1962 * Compute difference to get next direction: 1963 * - less than, move right 1964 * - greater than, move left 1965 * - equal, we're done 1966 */ 1967 diff = cur->bc_ops->key_diff(cur, kp); 1968 if (diff < 0) 1969 low = keyno + 1; 1970 else if (diff > 0) 1971 high = keyno - 1; 1972 else 1973 break; 1974 } 1975 } 1976 1977 /* 1978 * If there are more levels, set up for the next level 1979 * by getting the block number and filling in the cursor. 1980 */ 1981 if (level > 0) { 1982 /* 1983 * If we moved left, need the previous key number, 1984 * unless there isn't one. 1985 */ 1986 if (diff > 0 && --keyno < 1) 1987 keyno = 1; 1988 pp = xfs_btree_ptr_addr(cur, keyno, block); 1989 1990 error = xfs_btree_debug_check_ptr(cur, pp, 0, level); 1991 if (error) 1992 goto error0; 1993 1994 cur->bc_levels[level].ptr = keyno; 1995 } 1996 } 1997 1998 /* Done with the search. See if we need to adjust the results. */ 1999 if (dir != XFS_LOOKUP_LE && diff < 0) { 2000 keyno++; 2001 /* 2002 * If ge search and we went off the end of the block, but it's 2003 * not the last block, we're in the wrong block. 2004 */ 2005 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 2006 if (dir == XFS_LOOKUP_GE && 2007 keyno > xfs_btree_get_numrecs(block) && 2008 !xfs_btree_ptr_is_null(cur, &ptr)) { 2009 int i; 2010 2011 cur->bc_levels[0].ptr = keyno; 2012 error = xfs_btree_increment(cur, 0, &i); 2013 if (error) 2014 goto error0; 2015 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) 2016 return -EFSCORRUPTED; 2017 *stat = 1; 2018 return 0; 2019 } 2020 } else if (dir == XFS_LOOKUP_LE && diff > 0) 2021 keyno--; 2022 cur->bc_levels[0].ptr = keyno; 2023 2024 /* Return if we succeeded or not. */ 2025 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 2026 *stat = 0; 2027 else if (dir != XFS_LOOKUP_EQ || diff == 0) 2028 *stat = 1; 2029 else 2030 *stat = 0; 2031 return 0; 2032 2033 error0: 2034 return error; 2035 } 2036 2037 /* Find the high key storage area from a regular key. */ 2038 union xfs_btree_key * 2039 xfs_btree_high_key_from_key( 2040 struct xfs_btree_cur *cur, 2041 union xfs_btree_key *key) 2042 { 2043 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2044 return (union xfs_btree_key *)((char *)key + 2045 (cur->bc_ops->key_len / 2)); 2046 } 2047 2048 /* Determine the low (and high if overlapped) keys of a leaf block */ 2049 STATIC void 2050 xfs_btree_get_leaf_keys( 2051 struct xfs_btree_cur *cur, 2052 struct xfs_btree_block *block, 2053 union xfs_btree_key *key) 2054 { 2055 union xfs_btree_key max_hkey; 2056 union xfs_btree_key hkey; 2057 union xfs_btree_rec *rec; 2058 union xfs_btree_key *high; 2059 int n; 2060 2061 rec = xfs_btree_rec_addr(cur, 1, block); 2062 cur->bc_ops->init_key_from_rec(key, rec); 2063 2064 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2065 2066 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2067 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2068 rec = xfs_btree_rec_addr(cur, n, block); 2069 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2070 if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey)) 2071 max_hkey = hkey; 2072 } 2073 2074 high = xfs_btree_high_key_from_key(cur, key); 2075 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2076 } 2077 } 2078 2079 /* Determine the low (and high if overlapped) keys of a node block */ 2080 STATIC void 2081 xfs_btree_get_node_keys( 2082 struct xfs_btree_cur *cur, 2083 struct xfs_btree_block *block, 2084 union xfs_btree_key *key) 2085 { 2086 union xfs_btree_key *hkey; 2087 union xfs_btree_key *max_hkey; 2088 union xfs_btree_key *high; 2089 int n; 2090 2091 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2092 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2093 cur->bc_ops->key_len / 2); 2094 2095 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2096 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2097 hkey = xfs_btree_high_key_addr(cur, n, block); 2098 if (xfs_btree_keycmp_gt(cur, hkey, max_hkey)) 2099 max_hkey = hkey; 2100 } 2101 2102 high = xfs_btree_high_key_from_key(cur, key); 2103 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2104 } else { 2105 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2106 cur->bc_ops->key_len); 2107 } 2108 } 2109 2110 /* Derive the keys for any btree block. */ 2111 void 2112 xfs_btree_get_keys( 2113 struct xfs_btree_cur *cur, 2114 struct xfs_btree_block *block, 2115 union xfs_btree_key *key) 2116 { 2117 if (be16_to_cpu(block->bb_level) == 0) 2118 xfs_btree_get_leaf_keys(cur, block, key); 2119 else 2120 xfs_btree_get_node_keys(cur, block, key); 2121 } 2122 2123 /* 2124 * Decide if we need to update the parent keys of a btree block. For 2125 * a standard btree this is only necessary if we're updating the first 2126 * record/key. For an overlapping btree, we must always update the 2127 * keys because the highest key can be in any of the records or keys 2128 * in the block. 2129 */ 2130 static inline bool 2131 xfs_btree_needs_key_update( 2132 struct xfs_btree_cur *cur, 2133 int ptr) 2134 { 2135 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2136 } 2137 2138 /* 2139 * Update the low and high parent keys of the given level, progressing 2140 * towards the root. If force_all is false, stop if the keys for a given 2141 * level do not need updating. 2142 */ 2143 STATIC int 2144 __xfs_btree_updkeys( 2145 struct xfs_btree_cur *cur, 2146 int level, 2147 struct xfs_btree_block *block, 2148 struct xfs_buf *bp0, 2149 bool force_all) 2150 { 2151 union xfs_btree_key key; /* keys from current level */ 2152 union xfs_btree_key *lkey; /* keys from the next level up */ 2153 union xfs_btree_key *hkey; 2154 union xfs_btree_key *nlkey; /* keys from the next level up */ 2155 union xfs_btree_key *nhkey; 2156 struct xfs_buf *bp; 2157 int ptr; 2158 2159 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2160 2161 /* Exit if there aren't any parent levels to update. */ 2162 if (level + 1 >= cur->bc_nlevels) 2163 return 0; 2164 2165 trace_xfs_btree_updkeys(cur, level, bp0); 2166 2167 lkey = &key; 2168 hkey = xfs_btree_high_key_from_key(cur, lkey); 2169 xfs_btree_get_keys(cur, block, lkey); 2170 for (level++; level < cur->bc_nlevels; level++) { 2171 #ifdef DEBUG 2172 int error; 2173 #endif 2174 block = xfs_btree_get_block(cur, level, &bp); 2175 trace_xfs_btree_updkeys(cur, level, bp); 2176 #ifdef DEBUG 2177 error = xfs_btree_check_block(cur, block, level, bp); 2178 if (error) 2179 return error; 2180 #endif 2181 ptr = cur->bc_levels[level].ptr; 2182 nlkey = xfs_btree_key_addr(cur, ptr, block); 2183 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2184 if (!force_all && 2185 xfs_btree_keycmp_eq(cur, nlkey, lkey) && 2186 xfs_btree_keycmp_eq(cur, nhkey, hkey)) 2187 break; 2188 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2189 xfs_btree_log_keys(cur, bp, ptr, ptr); 2190 if (level + 1 >= cur->bc_nlevels) 2191 break; 2192 xfs_btree_get_node_keys(cur, block, lkey); 2193 } 2194 2195 return 0; 2196 } 2197 2198 /* Update all the keys from some level in cursor back to the root. */ 2199 STATIC int 2200 xfs_btree_updkeys_force( 2201 struct xfs_btree_cur *cur, 2202 int level) 2203 { 2204 struct xfs_buf *bp; 2205 struct xfs_btree_block *block; 2206 2207 block = xfs_btree_get_block(cur, level, &bp); 2208 return __xfs_btree_updkeys(cur, level, block, bp, true); 2209 } 2210 2211 /* 2212 * Update the parent keys of the given level, progressing towards the root. 2213 */ 2214 STATIC int 2215 xfs_btree_update_keys( 2216 struct xfs_btree_cur *cur, 2217 int level) 2218 { 2219 struct xfs_btree_block *block; 2220 struct xfs_buf *bp; 2221 union xfs_btree_key *kp; 2222 union xfs_btree_key key; 2223 int ptr; 2224 2225 ASSERT(level >= 0); 2226 2227 block = xfs_btree_get_block(cur, level, &bp); 2228 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2229 return __xfs_btree_updkeys(cur, level, block, bp, false); 2230 2231 /* 2232 * Go up the tree from this level toward the root. 2233 * At each level, update the key value to the value input. 2234 * Stop when we reach a level where the cursor isn't pointing 2235 * at the first entry in the block. 2236 */ 2237 xfs_btree_get_keys(cur, block, &key); 2238 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2239 #ifdef DEBUG 2240 int error; 2241 #endif 2242 block = xfs_btree_get_block(cur, level, &bp); 2243 #ifdef DEBUG 2244 error = xfs_btree_check_block(cur, block, level, bp); 2245 if (error) 2246 return error; 2247 #endif 2248 ptr = cur->bc_levels[level].ptr; 2249 kp = xfs_btree_key_addr(cur, ptr, block); 2250 xfs_btree_copy_keys(cur, kp, &key, 1); 2251 xfs_btree_log_keys(cur, bp, ptr, ptr); 2252 } 2253 2254 return 0; 2255 } 2256 2257 /* 2258 * Update the record referred to by cur to the value in the 2259 * given record. This either works (return 0) or gets an 2260 * EFSCORRUPTED error. 2261 */ 2262 int 2263 xfs_btree_update( 2264 struct xfs_btree_cur *cur, 2265 union xfs_btree_rec *rec) 2266 { 2267 struct xfs_btree_block *block; 2268 struct xfs_buf *bp; 2269 int error; 2270 int ptr; 2271 union xfs_btree_rec *rp; 2272 2273 /* Pick up the current block. */ 2274 block = xfs_btree_get_block(cur, 0, &bp); 2275 2276 #ifdef DEBUG 2277 error = xfs_btree_check_block(cur, block, 0, bp); 2278 if (error) 2279 goto error0; 2280 #endif 2281 /* Get the address of the rec to be updated. */ 2282 ptr = cur->bc_levels[0].ptr; 2283 rp = xfs_btree_rec_addr(cur, ptr, block); 2284 2285 /* Fill in the new contents and log them. */ 2286 xfs_btree_copy_recs(cur, rp, rec, 1); 2287 xfs_btree_log_recs(cur, bp, ptr, ptr); 2288 2289 /* 2290 * If we are tracking the last record in the tree and 2291 * we are at the far right edge of the tree, update it. 2292 */ 2293 if (xfs_btree_is_lastrec(cur, block, 0)) { 2294 cur->bc_ops->update_lastrec(cur, block, rec, 2295 ptr, LASTREC_UPDATE); 2296 } 2297 2298 /* Pass new key value up to our parent. */ 2299 if (xfs_btree_needs_key_update(cur, ptr)) { 2300 error = xfs_btree_update_keys(cur, 0); 2301 if (error) 2302 goto error0; 2303 } 2304 2305 return 0; 2306 2307 error0: 2308 return error; 2309 } 2310 2311 /* 2312 * Move 1 record left from cur/level if possible. 2313 * Update cur to reflect the new path. 2314 */ 2315 STATIC int /* error */ 2316 xfs_btree_lshift( 2317 struct xfs_btree_cur *cur, 2318 int level, 2319 int *stat) /* success/failure */ 2320 { 2321 struct xfs_buf *lbp; /* left buffer pointer */ 2322 struct xfs_btree_block *left; /* left btree block */ 2323 int lrecs; /* left record count */ 2324 struct xfs_buf *rbp; /* right buffer pointer */ 2325 struct xfs_btree_block *right; /* right btree block */ 2326 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2327 int rrecs; /* right record count */ 2328 union xfs_btree_ptr lptr; /* left btree pointer */ 2329 union xfs_btree_key *rkp = NULL; /* right btree key */ 2330 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2331 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2332 int error; /* error return value */ 2333 int i; 2334 2335 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2336 level == cur->bc_nlevels - 1) 2337 goto out0; 2338 2339 /* Set up variables for this block as "right". */ 2340 right = xfs_btree_get_block(cur, level, &rbp); 2341 2342 #ifdef DEBUG 2343 error = xfs_btree_check_block(cur, right, level, rbp); 2344 if (error) 2345 goto error0; 2346 #endif 2347 2348 /* If we've got no left sibling then we can't shift an entry left. */ 2349 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2350 if (xfs_btree_ptr_is_null(cur, &lptr)) 2351 goto out0; 2352 2353 /* 2354 * If the cursor entry is the one that would be moved, don't 2355 * do it... it's too complicated. 2356 */ 2357 if (cur->bc_levels[level].ptr <= 1) 2358 goto out0; 2359 2360 /* Set up the left neighbor as "left". */ 2361 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2362 if (error) 2363 goto error0; 2364 2365 /* If it's full, it can't take another entry. */ 2366 lrecs = xfs_btree_get_numrecs(left); 2367 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2368 goto out0; 2369 2370 rrecs = xfs_btree_get_numrecs(right); 2371 2372 /* 2373 * We add one entry to the left side and remove one for the right side. 2374 * Account for it here, the changes will be updated on disk and logged 2375 * later. 2376 */ 2377 lrecs++; 2378 rrecs--; 2379 2380 XFS_BTREE_STATS_INC(cur, lshift); 2381 XFS_BTREE_STATS_ADD(cur, moves, 1); 2382 2383 /* 2384 * If non-leaf, copy a key and a ptr to the left block. 2385 * Log the changes to the left block. 2386 */ 2387 if (level > 0) { 2388 /* It's a non-leaf. Move keys and pointers. */ 2389 union xfs_btree_key *lkp; /* left btree key */ 2390 union xfs_btree_ptr *lpp; /* left address pointer */ 2391 2392 lkp = xfs_btree_key_addr(cur, lrecs, left); 2393 rkp = xfs_btree_key_addr(cur, 1, right); 2394 2395 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2396 rpp = xfs_btree_ptr_addr(cur, 1, right); 2397 2398 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); 2399 if (error) 2400 goto error0; 2401 2402 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2403 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2404 2405 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2406 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2407 2408 ASSERT(cur->bc_ops->keys_inorder(cur, 2409 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2410 } else { 2411 /* It's a leaf. Move records. */ 2412 union xfs_btree_rec *lrp; /* left record pointer */ 2413 2414 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2415 rrp = xfs_btree_rec_addr(cur, 1, right); 2416 2417 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2418 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2419 2420 ASSERT(cur->bc_ops->recs_inorder(cur, 2421 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2422 } 2423 2424 xfs_btree_set_numrecs(left, lrecs); 2425 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2426 2427 xfs_btree_set_numrecs(right, rrecs); 2428 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2429 2430 /* 2431 * Slide the contents of right down one entry. 2432 */ 2433 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2434 if (level > 0) { 2435 /* It's a nonleaf. operate on keys and ptrs */ 2436 for (i = 0; i < rrecs; i++) { 2437 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); 2438 if (error) 2439 goto error0; 2440 } 2441 2442 xfs_btree_shift_keys(cur, 2443 xfs_btree_key_addr(cur, 2, right), 2444 -1, rrecs); 2445 xfs_btree_shift_ptrs(cur, 2446 xfs_btree_ptr_addr(cur, 2, right), 2447 -1, rrecs); 2448 2449 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2450 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2451 } else { 2452 /* It's a leaf. operate on records */ 2453 xfs_btree_shift_recs(cur, 2454 xfs_btree_rec_addr(cur, 2, right), 2455 -1, rrecs); 2456 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2457 } 2458 2459 /* 2460 * Using a temporary cursor, update the parent key values of the 2461 * block on the left. 2462 */ 2463 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2464 error = xfs_btree_dup_cursor(cur, &tcur); 2465 if (error) 2466 goto error0; 2467 i = xfs_btree_firstrec(tcur, level); 2468 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2469 error = -EFSCORRUPTED; 2470 goto error0; 2471 } 2472 2473 error = xfs_btree_decrement(tcur, level, &i); 2474 if (error) 2475 goto error1; 2476 2477 /* Update the parent high keys of the left block, if needed. */ 2478 error = xfs_btree_update_keys(tcur, level); 2479 if (error) 2480 goto error1; 2481 2482 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2483 } 2484 2485 /* Update the parent keys of the right block. */ 2486 error = xfs_btree_update_keys(cur, level); 2487 if (error) 2488 goto error0; 2489 2490 /* Slide the cursor value left one. */ 2491 cur->bc_levels[level].ptr--; 2492 2493 *stat = 1; 2494 return 0; 2495 2496 out0: 2497 *stat = 0; 2498 return 0; 2499 2500 error0: 2501 return error; 2502 2503 error1: 2504 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2505 return error; 2506 } 2507 2508 /* 2509 * Move 1 record right from cur/level if possible. 2510 * Update cur to reflect the new path. 2511 */ 2512 STATIC int /* error */ 2513 xfs_btree_rshift( 2514 struct xfs_btree_cur *cur, 2515 int level, 2516 int *stat) /* success/failure */ 2517 { 2518 struct xfs_buf *lbp; /* left buffer pointer */ 2519 struct xfs_btree_block *left; /* left btree block */ 2520 struct xfs_buf *rbp; /* right buffer pointer */ 2521 struct xfs_btree_block *right; /* right btree block */ 2522 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2523 union xfs_btree_ptr rptr; /* right block pointer */ 2524 union xfs_btree_key *rkp; /* right btree key */ 2525 int rrecs; /* right record count */ 2526 int lrecs; /* left record count */ 2527 int error; /* error return value */ 2528 int i; /* loop counter */ 2529 2530 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2531 (level == cur->bc_nlevels - 1)) 2532 goto out0; 2533 2534 /* Set up variables for this block as "left". */ 2535 left = xfs_btree_get_block(cur, level, &lbp); 2536 2537 #ifdef DEBUG 2538 error = xfs_btree_check_block(cur, left, level, lbp); 2539 if (error) 2540 goto error0; 2541 #endif 2542 2543 /* If we've got no right sibling then we can't shift an entry right. */ 2544 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2545 if (xfs_btree_ptr_is_null(cur, &rptr)) 2546 goto out0; 2547 2548 /* 2549 * If the cursor entry is the one that would be moved, don't 2550 * do it... it's too complicated. 2551 */ 2552 lrecs = xfs_btree_get_numrecs(left); 2553 if (cur->bc_levels[level].ptr >= lrecs) 2554 goto out0; 2555 2556 /* Set up the right neighbor as "right". */ 2557 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2558 if (error) 2559 goto error0; 2560 2561 /* If it's full, it can't take another entry. */ 2562 rrecs = xfs_btree_get_numrecs(right); 2563 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2564 goto out0; 2565 2566 XFS_BTREE_STATS_INC(cur, rshift); 2567 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2568 2569 /* 2570 * Make a hole at the start of the right neighbor block, then 2571 * copy the last left block entry to the hole. 2572 */ 2573 if (level > 0) { 2574 /* It's a nonleaf. make a hole in the keys and ptrs */ 2575 union xfs_btree_key *lkp; 2576 union xfs_btree_ptr *lpp; 2577 union xfs_btree_ptr *rpp; 2578 2579 lkp = xfs_btree_key_addr(cur, lrecs, left); 2580 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2581 rkp = xfs_btree_key_addr(cur, 1, right); 2582 rpp = xfs_btree_ptr_addr(cur, 1, right); 2583 2584 for (i = rrecs - 1; i >= 0; i--) { 2585 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 2586 if (error) 2587 goto error0; 2588 } 2589 2590 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2591 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2592 2593 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); 2594 if (error) 2595 goto error0; 2596 2597 /* Now put the new data in, and log it. */ 2598 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2599 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2600 2601 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2602 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2603 2604 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2605 xfs_btree_key_addr(cur, 2, right))); 2606 } else { 2607 /* It's a leaf. make a hole in the records */ 2608 union xfs_btree_rec *lrp; 2609 union xfs_btree_rec *rrp; 2610 2611 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2612 rrp = xfs_btree_rec_addr(cur, 1, right); 2613 2614 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2615 2616 /* Now put the new data in, and log it. */ 2617 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2618 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2619 } 2620 2621 /* 2622 * Decrement and log left's numrecs, bump and log right's numrecs. 2623 */ 2624 xfs_btree_set_numrecs(left, --lrecs); 2625 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2626 2627 xfs_btree_set_numrecs(right, ++rrecs); 2628 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2629 2630 /* 2631 * Using a temporary cursor, update the parent key values of the 2632 * block on the right. 2633 */ 2634 error = xfs_btree_dup_cursor(cur, &tcur); 2635 if (error) 2636 goto error0; 2637 i = xfs_btree_lastrec(tcur, level); 2638 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2639 error = -EFSCORRUPTED; 2640 goto error0; 2641 } 2642 2643 error = xfs_btree_increment(tcur, level, &i); 2644 if (error) 2645 goto error1; 2646 2647 /* Update the parent high keys of the left block, if needed. */ 2648 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2649 error = xfs_btree_update_keys(cur, level); 2650 if (error) 2651 goto error1; 2652 } 2653 2654 /* Update the parent keys of the right block. */ 2655 error = xfs_btree_update_keys(tcur, level); 2656 if (error) 2657 goto error1; 2658 2659 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2660 2661 *stat = 1; 2662 return 0; 2663 2664 out0: 2665 *stat = 0; 2666 return 0; 2667 2668 error0: 2669 return error; 2670 2671 error1: 2672 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2673 return error; 2674 } 2675 2676 /* 2677 * Split cur/level block in half. 2678 * Return new block number and the key to its first 2679 * record (to be inserted into parent). 2680 */ 2681 STATIC int /* error */ 2682 __xfs_btree_split( 2683 struct xfs_btree_cur *cur, 2684 int level, 2685 union xfs_btree_ptr *ptrp, 2686 union xfs_btree_key *key, 2687 struct xfs_btree_cur **curp, 2688 int *stat) /* success/failure */ 2689 { 2690 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2691 struct xfs_buf *lbp; /* left buffer pointer */ 2692 struct xfs_btree_block *left; /* left btree block */ 2693 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2694 struct xfs_buf *rbp; /* right buffer pointer */ 2695 struct xfs_btree_block *right; /* right btree block */ 2696 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2697 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2698 struct xfs_btree_block *rrblock; /* right-right btree block */ 2699 int lrecs; 2700 int rrecs; 2701 int src_index; 2702 int error; /* error return value */ 2703 int i; 2704 2705 XFS_BTREE_STATS_INC(cur, split); 2706 2707 /* Set up left block (current one). */ 2708 left = xfs_btree_get_block(cur, level, &lbp); 2709 2710 #ifdef DEBUG 2711 error = xfs_btree_check_block(cur, left, level, lbp); 2712 if (error) 2713 goto error0; 2714 #endif 2715 2716 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2717 2718 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2719 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2720 if (error) 2721 goto error0; 2722 if (*stat == 0) 2723 goto out0; 2724 XFS_BTREE_STATS_INC(cur, alloc); 2725 2726 /* Set up the new block as "right". */ 2727 error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2728 if (error) 2729 goto error0; 2730 2731 /* Fill in the btree header for the new right block. */ 2732 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2733 2734 /* 2735 * Split the entries between the old and the new block evenly. 2736 * Make sure that if there's an odd number of entries now, that 2737 * each new block will have the same number of entries. 2738 */ 2739 lrecs = xfs_btree_get_numrecs(left); 2740 rrecs = lrecs / 2; 2741 if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) 2742 rrecs++; 2743 src_index = (lrecs - rrecs + 1); 2744 2745 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2746 2747 /* Adjust numrecs for the later get_*_keys() calls. */ 2748 lrecs -= rrecs; 2749 xfs_btree_set_numrecs(left, lrecs); 2750 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2751 2752 /* 2753 * Copy btree block entries from the left block over to the 2754 * new block, the right. Update the right block and log the 2755 * changes. 2756 */ 2757 if (level > 0) { 2758 /* It's a non-leaf. Move keys and pointers. */ 2759 union xfs_btree_key *lkp; /* left btree key */ 2760 union xfs_btree_ptr *lpp; /* left address pointer */ 2761 union xfs_btree_key *rkp; /* right btree key */ 2762 union xfs_btree_ptr *rpp; /* right address pointer */ 2763 2764 lkp = xfs_btree_key_addr(cur, src_index, left); 2765 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2766 rkp = xfs_btree_key_addr(cur, 1, right); 2767 rpp = xfs_btree_ptr_addr(cur, 1, right); 2768 2769 for (i = src_index; i < rrecs; i++) { 2770 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 2771 if (error) 2772 goto error0; 2773 } 2774 2775 /* Copy the keys & pointers to the new block. */ 2776 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2777 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2778 2779 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2780 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2781 2782 /* Stash the keys of the new block for later insertion. */ 2783 xfs_btree_get_node_keys(cur, right, key); 2784 } else { 2785 /* It's a leaf. Move records. */ 2786 union xfs_btree_rec *lrp; /* left record pointer */ 2787 union xfs_btree_rec *rrp; /* right record pointer */ 2788 2789 lrp = xfs_btree_rec_addr(cur, src_index, left); 2790 rrp = xfs_btree_rec_addr(cur, 1, right); 2791 2792 /* Copy records to the new block. */ 2793 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2794 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2795 2796 /* Stash the keys of the new block for later insertion. */ 2797 xfs_btree_get_leaf_keys(cur, right, key); 2798 } 2799 2800 /* 2801 * Find the left block number by looking in the buffer. 2802 * Adjust sibling pointers. 2803 */ 2804 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2805 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2806 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2807 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2808 2809 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2810 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2811 2812 /* 2813 * If there's a block to the new block's right, make that block 2814 * point back to right instead of to left. 2815 */ 2816 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2817 error = xfs_btree_read_buf_block(cur, &rrptr, 2818 0, &rrblock, &rrbp); 2819 if (error) 2820 goto error0; 2821 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2822 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2823 } 2824 2825 /* Update the parent high keys of the left block, if needed. */ 2826 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2827 error = xfs_btree_update_keys(cur, level); 2828 if (error) 2829 goto error0; 2830 } 2831 2832 /* 2833 * If the cursor is really in the right block, move it there. 2834 * If it's just pointing past the last entry in left, then we'll 2835 * insert there, so don't change anything in that case. 2836 */ 2837 if (cur->bc_levels[level].ptr > lrecs + 1) { 2838 xfs_btree_setbuf(cur, level, rbp); 2839 cur->bc_levels[level].ptr -= lrecs; 2840 } 2841 /* 2842 * If there are more levels, we'll need another cursor which refers 2843 * the right block, no matter where this cursor was. 2844 */ 2845 if (level + 1 < cur->bc_nlevels) { 2846 error = xfs_btree_dup_cursor(cur, curp); 2847 if (error) 2848 goto error0; 2849 (*curp)->bc_levels[level + 1].ptr++; 2850 } 2851 *ptrp = rptr; 2852 *stat = 1; 2853 return 0; 2854 out0: 2855 *stat = 0; 2856 return 0; 2857 2858 error0: 2859 return error; 2860 } 2861 2862 #ifdef __KERNEL__ 2863 struct xfs_btree_split_args { 2864 struct xfs_btree_cur *cur; 2865 int level; 2866 union xfs_btree_ptr *ptrp; 2867 union xfs_btree_key *key; 2868 struct xfs_btree_cur **curp; 2869 int *stat; /* success/failure */ 2870 int result; 2871 bool kswapd; /* allocation in kswapd context */ 2872 struct completion *done; 2873 struct work_struct work; 2874 }; 2875 2876 /* 2877 * Stack switching interfaces for allocation 2878 */ 2879 static void 2880 xfs_btree_split_worker( 2881 struct work_struct *work) 2882 { 2883 struct xfs_btree_split_args *args = container_of(work, 2884 struct xfs_btree_split_args, work); 2885 unsigned long pflags; 2886 unsigned long new_pflags = 0; 2887 2888 /* 2889 * we are in a transaction context here, but may also be doing work 2890 * in kswapd context, and hence we may need to inherit that state 2891 * temporarily to ensure that we don't block waiting for memory reclaim 2892 * in any way. 2893 */ 2894 if (args->kswapd) 2895 new_pflags |= PF_MEMALLOC | PF_KSWAPD; 2896 2897 current_set_flags_nested(&pflags, new_pflags); 2898 xfs_trans_set_context(args->cur->bc_tp); 2899 2900 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2901 args->key, args->curp, args->stat); 2902 2903 xfs_trans_clear_context(args->cur->bc_tp); 2904 current_restore_flags_nested(&pflags, new_pflags); 2905 2906 /* 2907 * Do not access args after complete() has run here. We don't own args 2908 * and the owner may run and free args before we return here. 2909 */ 2910 complete(args->done); 2911 2912 } 2913 2914 /* 2915 * BMBT split requests often come in with little stack to work on so we push 2916 * them off to a worker thread so there is lots of stack to use. For the other 2917 * btree types, just call directly to avoid the context switch overhead here. 2918 * 2919 * Care must be taken here - the work queue rescuer thread introduces potential 2920 * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new 2921 * AGFs to allocate blocks. A task being run by the rescuer could attempt to 2922 * lock an AGF that is already locked by a task queued to run by the rescuer, 2923 * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to 2924 * release it until the current thread it is running gains the lock. 2925 * 2926 * To avoid this issue, we only ever queue BMBT splits that don't have an AGF 2927 * already locked to allocate from. The only place that doesn't hold an AGF 2928 * locked is unwritten extent conversion at IO completion, but that has already 2929 * been offloaded to a worker thread and hence has no stack consumption issues 2930 * we have to worry about. 2931 */ 2932 STATIC int /* error */ 2933 xfs_btree_split( 2934 struct xfs_btree_cur *cur, 2935 int level, 2936 union xfs_btree_ptr *ptrp, 2937 union xfs_btree_key *key, 2938 struct xfs_btree_cur **curp, 2939 int *stat) /* success/failure */ 2940 { 2941 struct xfs_btree_split_args args; 2942 DECLARE_COMPLETION_ONSTACK(done); 2943 2944 if (cur->bc_btnum != XFS_BTNUM_BMAP || 2945 cur->bc_tp->t_highest_agno == NULLAGNUMBER) 2946 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2947 2948 args.cur = cur; 2949 args.level = level; 2950 args.ptrp = ptrp; 2951 args.key = key; 2952 args.curp = curp; 2953 args.stat = stat; 2954 args.done = &done; 2955 args.kswapd = current_is_kswapd(); 2956 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2957 queue_work(xfs_alloc_wq, &args.work); 2958 wait_for_completion(&done); 2959 destroy_work_on_stack(&args.work); 2960 return args.result; 2961 } 2962 #else 2963 #define xfs_btree_split __xfs_btree_split 2964 #endif /* __KERNEL__ */ 2965 2966 2967 /* 2968 * Copy the old inode root contents into a real block and make the 2969 * broot point to it. 2970 */ 2971 int /* error */ 2972 xfs_btree_new_iroot( 2973 struct xfs_btree_cur *cur, /* btree cursor */ 2974 int *logflags, /* logging flags for inode */ 2975 int *stat) /* return status - 0 fail */ 2976 { 2977 struct xfs_buf *cbp; /* buffer for cblock */ 2978 struct xfs_btree_block *block; /* btree block */ 2979 struct xfs_btree_block *cblock; /* child btree block */ 2980 union xfs_btree_key *ckp; /* child key pointer */ 2981 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2982 union xfs_btree_key *kp; /* pointer to btree key */ 2983 union xfs_btree_ptr *pp; /* pointer to block addr */ 2984 union xfs_btree_ptr nptr; /* new block addr */ 2985 int level; /* btree level */ 2986 int error; /* error return code */ 2987 int i; /* loop counter */ 2988 2989 XFS_BTREE_STATS_INC(cur, newroot); 2990 2991 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2992 2993 level = cur->bc_nlevels - 1; 2994 2995 block = xfs_btree_get_iroot(cur); 2996 pp = xfs_btree_ptr_addr(cur, 1, block); 2997 2998 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2999 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 3000 if (error) 3001 goto error0; 3002 if (*stat == 0) 3003 return 0; 3004 3005 XFS_BTREE_STATS_INC(cur, alloc); 3006 3007 /* Copy the root into a real block. */ 3008 error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 3009 if (error) 3010 goto error0; 3011 3012 /* 3013 * we can't just memcpy() the root in for CRC enabled btree blocks. 3014 * In that case have to also ensure the blkno remains correct 3015 */ 3016 memcpy(cblock, block, xfs_btree_block_len(cur)); 3017 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 3018 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp)); 3019 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 3020 cblock->bb_u.l.bb_blkno = bno; 3021 else 3022 cblock->bb_u.s.bb_blkno = bno; 3023 } 3024 3025 be16_add_cpu(&block->bb_level, 1); 3026 xfs_btree_set_numrecs(block, 1); 3027 cur->bc_nlevels++; 3028 ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); 3029 cur->bc_levels[level + 1].ptr = 1; 3030 3031 kp = xfs_btree_key_addr(cur, 1, block); 3032 ckp = xfs_btree_key_addr(cur, 1, cblock); 3033 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 3034 3035 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3036 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 3037 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3038 if (error) 3039 goto error0; 3040 } 3041 3042 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 3043 3044 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); 3045 if (error) 3046 goto error0; 3047 3048 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 3049 3050 xfs_iroot_realloc(cur->bc_ino.ip, 3051 1 - xfs_btree_get_numrecs(cblock), 3052 cur->bc_ino.whichfork); 3053 3054 xfs_btree_setbuf(cur, level, cbp); 3055 3056 /* 3057 * Do all this logging at the end so that 3058 * the root is at the right level. 3059 */ 3060 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3061 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3062 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3063 3064 *logflags |= 3065 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork); 3066 *stat = 1; 3067 return 0; 3068 error0: 3069 return error; 3070 } 3071 3072 /* 3073 * Allocate a new root block, fill it in. 3074 */ 3075 STATIC int /* error */ 3076 xfs_btree_new_root( 3077 struct xfs_btree_cur *cur, /* btree cursor */ 3078 int *stat) /* success/failure */ 3079 { 3080 struct xfs_btree_block *block; /* one half of the old root block */ 3081 struct xfs_buf *bp; /* buffer containing block */ 3082 int error; /* error return value */ 3083 struct xfs_buf *lbp; /* left buffer pointer */ 3084 struct xfs_btree_block *left; /* left btree block */ 3085 struct xfs_buf *nbp; /* new (root) buffer */ 3086 struct xfs_btree_block *new; /* new (root) btree block */ 3087 int nptr; /* new value for key index, 1 or 2 */ 3088 struct xfs_buf *rbp; /* right buffer pointer */ 3089 struct xfs_btree_block *right; /* right btree block */ 3090 union xfs_btree_ptr rptr; 3091 union xfs_btree_ptr lptr; 3092 3093 XFS_BTREE_STATS_INC(cur, newroot); 3094 3095 /* initialise our start point from the cursor */ 3096 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3097 3098 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3099 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3100 if (error) 3101 goto error0; 3102 if (*stat == 0) 3103 goto out0; 3104 XFS_BTREE_STATS_INC(cur, alloc); 3105 3106 /* Set up the new block. */ 3107 error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3108 if (error) 3109 goto error0; 3110 3111 /* Set the root in the holding structure increasing the level by 1. */ 3112 cur->bc_ops->set_root(cur, &lptr, 1); 3113 3114 /* 3115 * At the previous root level there are now two blocks: the old root, 3116 * and the new block generated when it was split. We don't know which 3117 * one the cursor is pointing at, so we set up variables "left" and 3118 * "right" for each case. 3119 */ 3120 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3121 3122 #ifdef DEBUG 3123 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3124 if (error) 3125 goto error0; 3126 #endif 3127 3128 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3129 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3130 /* Our block is left, pick up the right block. */ 3131 lbp = bp; 3132 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3133 left = block; 3134 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3135 if (error) 3136 goto error0; 3137 bp = rbp; 3138 nptr = 1; 3139 } else { 3140 /* Our block is right, pick up the left block. */ 3141 rbp = bp; 3142 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3143 right = block; 3144 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3145 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3146 if (error) 3147 goto error0; 3148 bp = lbp; 3149 nptr = 2; 3150 } 3151 3152 /* Fill in the new block's btree header and log it. */ 3153 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3154 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3155 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3156 !xfs_btree_ptr_is_null(cur, &rptr)); 3157 3158 /* Fill in the key data in the new root. */ 3159 if (xfs_btree_get_level(left) > 0) { 3160 /* 3161 * Get the keys for the left block's keys and put them directly 3162 * in the parent block. Do the same for the right block. 3163 */ 3164 xfs_btree_get_node_keys(cur, left, 3165 xfs_btree_key_addr(cur, 1, new)); 3166 xfs_btree_get_node_keys(cur, right, 3167 xfs_btree_key_addr(cur, 2, new)); 3168 } else { 3169 /* 3170 * Get the keys for the left block's records and put them 3171 * directly in the parent block. Do the same for the right 3172 * block. 3173 */ 3174 xfs_btree_get_leaf_keys(cur, left, 3175 xfs_btree_key_addr(cur, 1, new)); 3176 xfs_btree_get_leaf_keys(cur, right, 3177 xfs_btree_key_addr(cur, 2, new)); 3178 } 3179 xfs_btree_log_keys(cur, nbp, 1, 2); 3180 3181 /* Fill in the pointer data in the new root. */ 3182 xfs_btree_copy_ptrs(cur, 3183 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3184 xfs_btree_copy_ptrs(cur, 3185 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3186 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3187 3188 /* Fix up the cursor. */ 3189 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3190 cur->bc_levels[cur->bc_nlevels].ptr = nptr; 3191 cur->bc_nlevels++; 3192 ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); 3193 *stat = 1; 3194 return 0; 3195 error0: 3196 return error; 3197 out0: 3198 *stat = 0; 3199 return 0; 3200 } 3201 3202 STATIC int 3203 xfs_btree_make_block_unfull( 3204 struct xfs_btree_cur *cur, /* btree cursor */ 3205 int level, /* btree level */ 3206 int numrecs,/* # of recs in block */ 3207 int *oindex,/* old tree index */ 3208 int *index, /* new tree index */ 3209 union xfs_btree_ptr *nptr, /* new btree ptr */ 3210 struct xfs_btree_cur **ncur, /* new btree cursor */ 3211 union xfs_btree_key *key, /* key of new block */ 3212 int *stat) 3213 { 3214 int error = 0; 3215 3216 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3217 level == cur->bc_nlevels - 1) { 3218 struct xfs_inode *ip = cur->bc_ino.ip; 3219 3220 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3221 /* A root block that can be made bigger. */ 3222 xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork); 3223 *stat = 1; 3224 } else { 3225 /* A root block that needs replacing */ 3226 int logflags = 0; 3227 3228 error = xfs_btree_new_iroot(cur, &logflags, stat); 3229 if (error || *stat == 0) 3230 return error; 3231 3232 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3233 } 3234 3235 return 0; 3236 } 3237 3238 /* First, try shifting an entry to the right neighbor. */ 3239 error = xfs_btree_rshift(cur, level, stat); 3240 if (error || *stat) 3241 return error; 3242 3243 /* Next, try shifting an entry to the left neighbor. */ 3244 error = xfs_btree_lshift(cur, level, stat); 3245 if (error) 3246 return error; 3247 3248 if (*stat) { 3249 *oindex = *index = cur->bc_levels[level].ptr; 3250 return 0; 3251 } 3252 3253 /* 3254 * Next, try splitting the current block in half. 3255 * 3256 * If this works we have to re-set our variables because we 3257 * could be in a different block now. 3258 */ 3259 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3260 if (error || *stat == 0) 3261 return error; 3262 3263 3264 *index = cur->bc_levels[level].ptr; 3265 return 0; 3266 } 3267 3268 /* 3269 * Insert one record/level. Return information to the caller 3270 * allowing the next level up to proceed if necessary. 3271 */ 3272 STATIC int 3273 xfs_btree_insrec( 3274 struct xfs_btree_cur *cur, /* btree cursor */ 3275 int level, /* level to insert record at */ 3276 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3277 union xfs_btree_rec *rec, /* record to insert */ 3278 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3279 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3280 int *stat) /* success/failure */ 3281 { 3282 struct xfs_btree_block *block; /* btree block */ 3283 struct xfs_buf *bp; /* buffer for block */ 3284 union xfs_btree_ptr nptr; /* new block ptr */ 3285 struct xfs_btree_cur *ncur = NULL; /* new btree cursor */ 3286 union xfs_btree_key nkey; /* new block key */ 3287 union xfs_btree_key *lkey; 3288 int optr; /* old key/record index */ 3289 int ptr; /* key/record index */ 3290 int numrecs;/* number of records */ 3291 int error; /* error return value */ 3292 int i; 3293 xfs_daddr_t old_bn; 3294 3295 ncur = NULL; 3296 lkey = &nkey; 3297 3298 /* 3299 * If we have an external root pointer, and we've made it to the 3300 * root level, allocate a new root block and we're done. 3301 */ 3302 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3303 (level >= cur->bc_nlevels)) { 3304 error = xfs_btree_new_root(cur, stat); 3305 xfs_btree_set_ptr_null(cur, ptrp); 3306 3307 return error; 3308 } 3309 3310 /* If we're off the left edge, return failure. */ 3311 ptr = cur->bc_levels[level].ptr; 3312 if (ptr == 0) { 3313 *stat = 0; 3314 return 0; 3315 } 3316 3317 optr = ptr; 3318 3319 XFS_BTREE_STATS_INC(cur, insrec); 3320 3321 /* Get pointers to the btree buffer and block. */ 3322 block = xfs_btree_get_block(cur, level, &bp); 3323 old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL; 3324 numrecs = xfs_btree_get_numrecs(block); 3325 3326 #ifdef DEBUG 3327 error = xfs_btree_check_block(cur, block, level, bp); 3328 if (error) 3329 goto error0; 3330 3331 /* Check that the new entry is being inserted in the right place. */ 3332 if (ptr <= numrecs) { 3333 if (level == 0) { 3334 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3335 xfs_btree_rec_addr(cur, ptr, block))); 3336 } else { 3337 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3338 xfs_btree_key_addr(cur, ptr, block))); 3339 } 3340 } 3341 #endif 3342 3343 /* 3344 * If the block is full, we can't insert the new entry until we 3345 * make the block un-full. 3346 */ 3347 xfs_btree_set_ptr_null(cur, &nptr); 3348 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3349 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3350 &optr, &ptr, &nptr, &ncur, lkey, stat); 3351 if (error || *stat == 0) 3352 goto error0; 3353 } 3354 3355 /* 3356 * The current block may have changed if the block was 3357 * previously full and we have just made space in it. 3358 */ 3359 block = xfs_btree_get_block(cur, level, &bp); 3360 numrecs = xfs_btree_get_numrecs(block); 3361 3362 #ifdef DEBUG 3363 error = xfs_btree_check_block(cur, block, level, bp); 3364 if (error) 3365 goto error0; 3366 #endif 3367 3368 /* 3369 * At this point we know there's room for our new entry in the block 3370 * we're pointing at. 3371 */ 3372 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3373 3374 if (level > 0) { 3375 /* It's a nonleaf. make a hole in the keys and ptrs */ 3376 union xfs_btree_key *kp; 3377 union xfs_btree_ptr *pp; 3378 3379 kp = xfs_btree_key_addr(cur, ptr, block); 3380 pp = xfs_btree_ptr_addr(cur, ptr, block); 3381 3382 for (i = numrecs - ptr; i >= 0; i--) { 3383 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3384 if (error) 3385 goto error0; 3386 } 3387 3388 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3389 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3390 3391 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); 3392 if (error) 3393 goto error0; 3394 3395 /* Now put the new data in, bump numrecs and log it. */ 3396 xfs_btree_copy_keys(cur, kp, key, 1); 3397 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3398 numrecs++; 3399 xfs_btree_set_numrecs(block, numrecs); 3400 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3401 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3402 #ifdef DEBUG 3403 if (ptr < numrecs) { 3404 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3405 xfs_btree_key_addr(cur, ptr + 1, block))); 3406 } 3407 #endif 3408 } else { 3409 /* It's a leaf. make a hole in the records */ 3410 union xfs_btree_rec *rp; 3411 3412 rp = xfs_btree_rec_addr(cur, ptr, block); 3413 3414 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3415 3416 /* Now put the new data in, bump numrecs and log it. */ 3417 xfs_btree_copy_recs(cur, rp, rec, 1); 3418 xfs_btree_set_numrecs(block, ++numrecs); 3419 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3420 #ifdef DEBUG 3421 if (ptr < numrecs) { 3422 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3423 xfs_btree_rec_addr(cur, ptr + 1, block))); 3424 } 3425 #endif 3426 } 3427 3428 /* Log the new number of records in the btree header. */ 3429 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3430 3431 /* 3432 * If we just inserted into a new tree block, we have to 3433 * recalculate nkey here because nkey is out of date. 3434 * 3435 * Otherwise we're just updating an existing block (having shoved 3436 * some records into the new tree block), so use the regular key 3437 * update mechanism. 3438 */ 3439 if (bp && xfs_buf_daddr(bp) != old_bn) { 3440 xfs_btree_get_keys(cur, block, lkey); 3441 } else if (xfs_btree_needs_key_update(cur, optr)) { 3442 error = xfs_btree_update_keys(cur, level); 3443 if (error) 3444 goto error0; 3445 } 3446 3447 /* 3448 * If we are tracking the last record in the tree and 3449 * we are at the far right edge of the tree, update it. 3450 */ 3451 if (xfs_btree_is_lastrec(cur, block, level)) { 3452 cur->bc_ops->update_lastrec(cur, block, rec, 3453 ptr, LASTREC_INSREC); 3454 } 3455 3456 /* 3457 * Return the new block number, if any. 3458 * If there is one, give back a record value and a cursor too. 3459 */ 3460 *ptrp = nptr; 3461 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3462 xfs_btree_copy_keys(cur, key, lkey, 1); 3463 *curp = ncur; 3464 } 3465 3466 *stat = 1; 3467 return 0; 3468 3469 error0: 3470 if (ncur) 3471 xfs_btree_del_cursor(ncur, error); 3472 return error; 3473 } 3474 3475 /* 3476 * Insert the record at the point referenced by cur. 3477 * 3478 * A multi-level split of the tree on insert will invalidate the original 3479 * cursor. All callers of this function should assume that the cursor is 3480 * no longer valid and revalidate it. 3481 */ 3482 int 3483 xfs_btree_insert( 3484 struct xfs_btree_cur *cur, 3485 int *stat) 3486 { 3487 int error; /* error return value */ 3488 int i; /* result value, 0 for failure */ 3489 int level; /* current level number in btree */ 3490 union xfs_btree_ptr nptr; /* new block number (split result) */ 3491 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3492 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3493 union xfs_btree_key bkey; /* key of block to insert */ 3494 union xfs_btree_key *key; 3495 union xfs_btree_rec rec; /* record to insert */ 3496 3497 level = 0; 3498 ncur = NULL; 3499 pcur = cur; 3500 key = &bkey; 3501 3502 xfs_btree_set_ptr_null(cur, &nptr); 3503 3504 /* Make a key out of the record data to be inserted, and save it. */ 3505 cur->bc_ops->init_rec_from_cur(cur, &rec); 3506 cur->bc_ops->init_key_from_rec(key, &rec); 3507 3508 /* 3509 * Loop going up the tree, starting at the leaf level. 3510 * Stop when we don't get a split block, that must mean that 3511 * the insert is finished with this level. 3512 */ 3513 do { 3514 /* 3515 * Insert nrec/nptr into this level of the tree. 3516 * Note if we fail, nptr will be null. 3517 */ 3518 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3519 &ncur, &i); 3520 if (error) { 3521 if (pcur != cur) 3522 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3523 goto error0; 3524 } 3525 3526 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3527 error = -EFSCORRUPTED; 3528 goto error0; 3529 } 3530 level++; 3531 3532 /* 3533 * See if the cursor we just used is trash. 3534 * Can't trash the caller's cursor, but otherwise we should 3535 * if ncur is a new cursor or we're about to be done. 3536 */ 3537 if (pcur != cur && 3538 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3539 /* Save the state from the cursor before we trash it */ 3540 if (cur->bc_ops->update_cursor) 3541 cur->bc_ops->update_cursor(pcur, cur); 3542 cur->bc_nlevels = pcur->bc_nlevels; 3543 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3544 } 3545 /* If we got a new cursor, switch to it. */ 3546 if (ncur) { 3547 pcur = ncur; 3548 ncur = NULL; 3549 } 3550 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3551 3552 *stat = i; 3553 return 0; 3554 error0: 3555 return error; 3556 } 3557 3558 /* 3559 * Try to merge a non-leaf block back into the inode root. 3560 * 3561 * Note: the killroot names comes from the fact that we're effectively 3562 * killing the old root block. But because we can't just delete the 3563 * inode we have to copy the single block it was pointing to into the 3564 * inode. 3565 */ 3566 STATIC int 3567 xfs_btree_kill_iroot( 3568 struct xfs_btree_cur *cur) 3569 { 3570 int whichfork = cur->bc_ino.whichfork; 3571 struct xfs_inode *ip = cur->bc_ino.ip; 3572 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 3573 struct xfs_btree_block *block; 3574 struct xfs_btree_block *cblock; 3575 union xfs_btree_key *kp; 3576 union xfs_btree_key *ckp; 3577 union xfs_btree_ptr *pp; 3578 union xfs_btree_ptr *cpp; 3579 struct xfs_buf *cbp; 3580 int level; 3581 int index; 3582 int numrecs; 3583 int error; 3584 #ifdef DEBUG 3585 union xfs_btree_ptr ptr; 3586 #endif 3587 int i; 3588 3589 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3590 ASSERT(cur->bc_nlevels > 1); 3591 3592 /* 3593 * Don't deal with the root block needs to be a leaf case. 3594 * We're just going to turn the thing back into extents anyway. 3595 */ 3596 level = cur->bc_nlevels - 1; 3597 if (level == 1) 3598 goto out0; 3599 3600 /* 3601 * Give up if the root has multiple children. 3602 */ 3603 block = xfs_btree_get_iroot(cur); 3604 if (xfs_btree_get_numrecs(block) != 1) 3605 goto out0; 3606 3607 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3608 numrecs = xfs_btree_get_numrecs(cblock); 3609 3610 /* 3611 * Only do this if the next level will fit. 3612 * Then the data must be copied up to the inode, 3613 * instead of freeing the root you free the next level. 3614 */ 3615 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3616 goto out0; 3617 3618 XFS_BTREE_STATS_INC(cur, killroot); 3619 3620 #ifdef DEBUG 3621 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3622 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3623 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3624 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3625 #endif 3626 3627 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3628 if (index) { 3629 xfs_iroot_realloc(cur->bc_ino.ip, index, 3630 cur->bc_ino.whichfork); 3631 block = ifp->if_broot; 3632 } 3633 3634 be16_add_cpu(&block->bb_numrecs, index); 3635 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3636 3637 kp = xfs_btree_key_addr(cur, 1, block); 3638 ckp = xfs_btree_key_addr(cur, 1, cblock); 3639 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3640 3641 pp = xfs_btree_ptr_addr(cur, 1, block); 3642 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3643 3644 for (i = 0; i < numrecs; i++) { 3645 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); 3646 if (error) 3647 return error; 3648 } 3649 3650 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3651 3652 error = xfs_btree_free_block(cur, cbp); 3653 if (error) 3654 return error; 3655 3656 cur->bc_levels[level - 1].bp = NULL; 3657 be16_add_cpu(&block->bb_level, -1); 3658 xfs_trans_log_inode(cur->bc_tp, ip, 3659 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); 3660 cur->bc_nlevels--; 3661 out0: 3662 return 0; 3663 } 3664 3665 /* 3666 * Kill the current root node, and replace it with it's only child node. 3667 */ 3668 STATIC int 3669 xfs_btree_kill_root( 3670 struct xfs_btree_cur *cur, 3671 struct xfs_buf *bp, 3672 int level, 3673 union xfs_btree_ptr *newroot) 3674 { 3675 int error; 3676 3677 XFS_BTREE_STATS_INC(cur, killroot); 3678 3679 /* 3680 * Update the root pointer, decreasing the level by 1 and then 3681 * free the old root. 3682 */ 3683 cur->bc_ops->set_root(cur, newroot, -1); 3684 3685 error = xfs_btree_free_block(cur, bp); 3686 if (error) 3687 return error; 3688 3689 cur->bc_levels[level].bp = NULL; 3690 cur->bc_levels[level].ra = 0; 3691 cur->bc_nlevels--; 3692 3693 return 0; 3694 } 3695 3696 STATIC int 3697 xfs_btree_dec_cursor( 3698 struct xfs_btree_cur *cur, 3699 int level, 3700 int *stat) 3701 { 3702 int error; 3703 int i; 3704 3705 if (level > 0) { 3706 error = xfs_btree_decrement(cur, level, &i); 3707 if (error) 3708 return error; 3709 } 3710 3711 *stat = 1; 3712 return 0; 3713 } 3714 3715 /* 3716 * Single level of the btree record deletion routine. 3717 * Delete record pointed to by cur/level. 3718 * Remove the record from its block then rebalance the tree. 3719 * Return 0 for error, 1 for done, 2 to go on to the next level. 3720 */ 3721 STATIC int /* error */ 3722 xfs_btree_delrec( 3723 struct xfs_btree_cur *cur, /* btree cursor */ 3724 int level, /* level removing record from */ 3725 int *stat) /* fail/done/go-on */ 3726 { 3727 struct xfs_btree_block *block; /* btree block */ 3728 union xfs_btree_ptr cptr; /* current block ptr */ 3729 struct xfs_buf *bp; /* buffer for block */ 3730 int error; /* error return value */ 3731 int i; /* loop counter */ 3732 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3733 struct xfs_buf *lbp; /* left buffer pointer */ 3734 struct xfs_btree_block *left; /* left btree block */ 3735 int lrecs = 0; /* left record count */ 3736 int ptr; /* key/record index */ 3737 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3738 struct xfs_buf *rbp; /* right buffer pointer */ 3739 struct xfs_btree_block *right; /* right btree block */ 3740 struct xfs_btree_block *rrblock; /* right-right btree block */ 3741 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3742 int rrecs = 0; /* right record count */ 3743 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3744 int numrecs; /* temporary numrec count */ 3745 3746 tcur = NULL; 3747 3748 /* Get the index of the entry being deleted, check for nothing there. */ 3749 ptr = cur->bc_levels[level].ptr; 3750 if (ptr == 0) { 3751 *stat = 0; 3752 return 0; 3753 } 3754 3755 /* Get the buffer & block containing the record or key/ptr. */ 3756 block = xfs_btree_get_block(cur, level, &bp); 3757 numrecs = xfs_btree_get_numrecs(block); 3758 3759 #ifdef DEBUG 3760 error = xfs_btree_check_block(cur, block, level, bp); 3761 if (error) 3762 goto error0; 3763 #endif 3764 3765 /* Fail if we're off the end of the block. */ 3766 if (ptr > numrecs) { 3767 *stat = 0; 3768 return 0; 3769 } 3770 3771 XFS_BTREE_STATS_INC(cur, delrec); 3772 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3773 3774 /* Excise the entries being deleted. */ 3775 if (level > 0) { 3776 /* It's a nonleaf. operate on keys and ptrs */ 3777 union xfs_btree_key *lkp; 3778 union xfs_btree_ptr *lpp; 3779 3780 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3781 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3782 3783 for (i = 0; i < numrecs - ptr; i++) { 3784 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 3785 if (error) 3786 goto error0; 3787 } 3788 3789 if (ptr < numrecs) { 3790 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3791 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3792 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3793 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3794 } 3795 } else { 3796 /* It's a leaf. operate on records */ 3797 if (ptr < numrecs) { 3798 xfs_btree_shift_recs(cur, 3799 xfs_btree_rec_addr(cur, ptr + 1, block), 3800 -1, numrecs - ptr); 3801 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3802 } 3803 } 3804 3805 /* 3806 * Decrement and log the number of entries in the block. 3807 */ 3808 xfs_btree_set_numrecs(block, --numrecs); 3809 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3810 3811 /* 3812 * If we are tracking the last record in the tree and 3813 * we are at the far right edge of the tree, update it. 3814 */ 3815 if (xfs_btree_is_lastrec(cur, block, level)) { 3816 cur->bc_ops->update_lastrec(cur, block, NULL, 3817 ptr, LASTREC_DELREC); 3818 } 3819 3820 /* 3821 * We're at the root level. First, shrink the root block in-memory. 3822 * Try to get rid of the next level down. If we can't then there's 3823 * nothing left to do. 3824 */ 3825 if (level == cur->bc_nlevels - 1) { 3826 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3827 xfs_iroot_realloc(cur->bc_ino.ip, -1, 3828 cur->bc_ino.whichfork); 3829 3830 error = xfs_btree_kill_iroot(cur); 3831 if (error) 3832 goto error0; 3833 3834 error = xfs_btree_dec_cursor(cur, level, stat); 3835 if (error) 3836 goto error0; 3837 *stat = 1; 3838 return 0; 3839 } 3840 3841 /* 3842 * If this is the root level, and there's only one entry left, 3843 * and it's NOT the leaf level, then we can get rid of this 3844 * level. 3845 */ 3846 if (numrecs == 1 && level > 0) { 3847 union xfs_btree_ptr *pp; 3848 /* 3849 * pp is still set to the first pointer in the block. 3850 * Make it the new root of the btree. 3851 */ 3852 pp = xfs_btree_ptr_addr(cur, 1, block); 3853 error = xfs_btree_kill_root(cur, bp, level, pp); 3854 if (error) 3855 goto error0; 3856 } else if (level > 0) { 3857 error = xfs_btree_dec_cursor(cur, level, stat); 3858 if (error) 3859 goto error0; 3860 } 3861 *stat = 1; 3862 return 0; 3863 } 3864 3865 /* 3866 * If we deleted the leftmost entry in the block, update the 3867 * key values above us in the tree. 3868 */ 3869 if (xfs_btree_needs_key_update(cur, ptr)) { 3870 error = xfs_btree_update_keys(cur, level); 3871 if (error) 3872 goto error0; 3873 } 3874 3875 /* 3876 * If the number of records remaining in the block is at least 3877 * the minimum, we're done. 3878 */ 3879 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3880 error = xfs_btree_dec_cursor(cur, level, stat); 3881 if (error) 3882 goto error0; 3883 return 0; 3884 } 3885 3886 /* 3887 * Otherwise, we have to move some records around to keep the 3888 * tree balanced. Look at the left and right sibling blocks to 3889 * see if we can re-balance by moving only one record. 3890 */ 3891 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3892 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3893 3894 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3895 /* 3896 * One child of root, need to get a chance to copy its contents 3897 * into the root and delete it. Can't go up to next level, 3898 * there's nothing to delete there. 3899 */ 3900 if (xfs_btree_ptr_is_null(cur, &rptr) && 3901 xfs_btree_ptr_is_null(cur, &lptr) && 3902 level == cur->bc_nlevels - 2) { 3903 error = xfs_btree_kill_iroot(cur); 3904 if (!error) 3905 error = xfs_btree_dec_cursor(cur, level, stat); 3906 if (error) 3907 goto error0; 3908 return 0; 3909 } 3910 } 3911 3912 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3913 !xfs_btree_ptr_is_null(cur, &lptr)); 3914 3915 /* 3916 * Duplicate the cursor so our btree manipulations here won't 3917 * disrupt the next level up. 3918 */ 3919 error = xfs_btree_dup_cursor(cur, &tcur); 3920 if (error) 3921 goto error0; 3922 3923 /* 3924 * If there's a right sibling, see if it's ok to shift an entry 3925 * out of it. 3926 */ 3927 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3928 /* 3929 * Move the temp cursor to the last entry in the next block. 3930 * Actually any entry but the first would suffice. 3931 */ 3932 i = xfs_btree_lastrec(tcur, level); 3933 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3934 error = -EFSCORRUPTED; 3935 goto error0; 3936 } 3937 3938 error = xfs_btree_increment(tcur, level, &i); 3939 if (error) 3940 goto error0; 3941 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3942 error = -EFSCORRUPTED; 3943 goto error0; 3944 } 3945 3946 i = xfs_btree_lastrec(tcur, level); 3947 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3948 error = -EFSCORRUPTED; 3949 goto error0; 3950 } 3951 3952 /* Grab a pointer to the block. */ 3953 right = xfs_btree_get_block(tcur, level, &rbp); 3954 #ifdef DEBUG 3955 error = xfs_btree_check_block(tcur, right, level, rbp); 3956 if (error) 3957 goto error0; 3958 #endif 3959 /* Grab the current block number, for future use. */ 3960 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3961 3962 /* 3963 * If right block is full enough so that removing one entry 3964 * won't make it too empty, and left-shifting an entry out 3965 * of right to us works, we're done. 3966 */ 3967 if (xfs_btree_get_numrecs(right) - 1 >= 3968 cur->bc_ops->get_minrecs(tcur, level)) { 3969 error = xfs_btree_lshift(tcur, level, &i); 3970 if (error) 3971 goto error0; 3972 if (i) { 3973 ASSERT(xfs_btree_get_numrecs(block) >= 3974 cur->bc_ops->get_minrecs(tcur, level)); 3975 3976 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3977 tcur = NULL; 3978 3979 error = xfs_btree_dec_cursor(cur, level, stat); 3980 if (error) 3981 goto error0; 3982 return 0; 3983 } 3984 } 3985 3986 /* 3987 * Otherwise, grab the number of records in right for 3988 * future reference, and fix up the temp cursor to point 3989 * to our block again (last record). 3990 */ 3991 rrecs = xfs_btree_get_numrecs(right); 3992 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3993 i = xfs_btree_firstrec(tcur, level); 3994 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3995 error = -EFSCORRUPTED; 3996 goto error0; 3997 } 3998 3999 error = xfs_btree_decrement(tcur, level, &i); 4000 if (error) 4001 goto error0; 4002 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4003 error = -EFSCORRUPTED; 4004 goto error0; 4005 } 4006 } 4007 } 4008 4009 /* 4010 * If there's a left sibling, see if it's ok to shift an entry 4011 * out of it. 4012 */ 4013 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 4014 /* 4015 * Move the temp cursor to the first entry in the 4016 * previous block. 4017 */ 4018 i = xfs_btree_firstrec(tcur, level); 4019 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4020 error = -EFSCORRUPTED; 4021 goto error0; 4022 } 4023 4024 error = xfs_btree_decrement(tcur, level, &i); 4025 if (error) 4026 goto error0; 4027 i = xfs_btree_firstrec(tcur, level); 4028 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4029 error = -EFSCORRUPTED; 4030 goto error0; 4031 } 4032 4033 /* Grab a pointer to the block. */ 4034 left = xfs_btree_get_block(tcur, level, &lbp); 4035 #ifdef DEBUG 4036 error = xfs_btree_check_block(cur, left, level, lbp); 4037 if (error) 4038 goto error0; 4039 #endif 4040 /* Grab the current block number, for future use. */ 4041 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 4042 4043 /* 4044 * If left block is full enough so that removing one entry 4045 * won't make it too empty, and right-shifting an entry out 4046 * of left to us works, we're done. 4047 */ 4048 if (xfs_btree_get_numrecs(left) - 1 >= 4049 cur->bc_ops->get_minrecs(tcur, level)) { 4050 error = xfs_btree_rshift(tcur, level, &i); 4051 if (error) 4052 goto error0; 4053 if (i) { 4054 ASSERT(xfs_btree_get_numrecs(block) >= 4055 cur->bc_ops->get_minrecs(tcur, level)); 4056 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4057 tcur = NULL; 4058 if (level == 0) 4059 cur->bc_levels[0].ptr++; 4060 4061 *stat = 1; 4062 return 0; 4063 } 4064 } 4065 4066 /* 4067 * Otherwise, grab the number of records in right for 4068 * future reference. 4069 */ 4070 lrecs = xfs_btree_get_numrecs(left); 4071 } 4072 4073 /* Delete the temp cursor, we're done with it. */ 4074 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4075 tcur = NULL; 4076 4077 /* If here, we need to do a join to keep the tree balanced. */ 4078 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4079 4080 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4081 lrecs + xfs_btree_get_numrecs(block) <= 4082 cur->bc_ops->get_maxrecs(cur, level)) { 4083 /* 4084 * Set "right" to be the starting block, 4085 * "left" to be the left neighbor. 4086 */ 4087 rptr = cptr; 4088 right = block; 4089 rbp = bp; 4090 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4091 if (error) 4092 goto error0; 4093 4094 /* 4095 * If that won't work, see if we can join with the right neighbor block. 4096 */ 4097 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4098 rrecs + xfs_btree_get_numrecs(block) <= 4099 cur->bc_ops->get_maxrecs(cur, level)) { 4100 /* 4101 * Set "left" to be the starting block, 4102 * "right" to be the right neighbor. 4103 */ 4104 lptr = cptr; 4105 left = block; 4106 lbp = bp; 4107 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4108 if (error) 4109 goto error0; 4110 4111 /* 4112 * Otherwise, we can't fix the imbalance. 4113 * Just return. This is probably a logic error, but it's not fatal. 4114 */ 4115 } else { 4116 error = xfs_btree_dec_cursor(cur, level, stat); 4117 if (error) 4118 goto error0; 4119 return 0; 4120 } 4121 4122 rrecs = xfs_btree_get_numrecs(right); 4123 lrecs = xfs_btree_get_numrecs(left); 4124 4125 /* 4126 * We're now going to join "left" and "right" by moving all the stuff 4127 * in "right" to "left" and deleting "right". 4128 */ 4129 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4130 if (level > 0) { 4131 /* It's a non-leaf. Move keys and pointers. */ 4132 union xfs_btree_key *lkp; /* left btree key */ 4133 union xfs_btree_ptr *lpp; /* left address pointer */ 4134 union xfs_btree_key *rkp; /* right btree key */ 4135 union xfs_btree_ptr *rpp; /* right address pointer */ 4136 4137 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4138 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4139 rkp = xfs_btree_key_addr(cur, 1, right); 4140 rpp = xfs_btree_ptr_addr(cur, 1, right); 4141 4142 for (i = 1; i < rrecs; i++) { 4143 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 4144 if (error) 4145 goto error0; 4146 } 4147 4148 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4149 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4150 4151 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4152 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4153 } else { 4154 /* It's a leaf. Move records. */ 4155 union xfs_btree_rec *lrp; /* left record pointer */ 4156 union xfs_btree_rec *rrp; /* right record pointer */ 4157 4158 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4159 rrp = xfs_btree_rec_addr(cur, 1, right); 4160 4161 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4162 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4163 } 4164 4165 XFS_BTREE_STATS_INC(cur, join); 4166 4167 /* 4168 * Fix up the number of records and right block pointer in the 4169 * surviving block, and log it. 4170 */ 4171 xfs_btree_set_numrecs(left, lrecs + rrecs); 4172 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB); 4173 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4174 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4175 4176 /* If there is a right sibling, point it to the remaining block. */ 4177 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4178 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4179 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4180 if (error) 4181 goto error0; 4182 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4183 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4184 } 4185 4186 /* Free the deleted block. */ 4187 error = xfs_btree_free_block(cur, rbp); 4188 if (error) 4189 goto error0; 4190 4191 /* 4192 * If we joined with the left neighbor, set the buffer in the 4193 * cursor to the left block, and fix up the index. 4194 */ 4195 if (bp != lbp) { 4196 cur->bc_levels[level].bp = lbp; 4197 cur->bc_levels[level].ptr += lrecs; 4198 cur->bc_levels[level].ra = 0; 4199 } 4200 /* 4201 * If we joined with the right neighbor and there's a level above 4202 * us, increment the cursor at that level. 4203 */ 4204 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4205 (level + 1 < cur->bc_nlevels)) { 4206 error = xfs_btree_increment(cur, level + 1, &i); 4207 if (error) 4208 goto error0; 4209 } 4210 4211 /* 4212 * Readjust the ptr at this level if it's not a leaf, since it's 4213 * still pointing at the deletion point, which makes the cursor 4214 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4215 * We can't use decrement because it would change the next level up. 4216 */ 4217 if (level > 0) 4218 cur->bc_levels[level].ptr--; 4219 4220 /* 4221 * We combined blocks, so we have to update the parent keys if the 4222 * btree supports overlapped intervals. However, 4223 * bc_levels[level + 1].ptr points to the old block so that the caller 4224 * knows which record to delete. Therefore, the caller must be savvy 4225 * enough to call updkeys for us if we return stat == 2. The other 4226 * exit points from this function don't require deletions further up 4227 * the tree, so they can call updkeys directly. 4228 */ 4229 4230 /* Return value means the next level up has something to do. */ 4231 *stat = 2; 4232 return 0; 4233 4234 error0: 4235 if (tcur) 4236 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4237 return error; 4238 } 4239 4240 /* 4241 * Delete the record pointed to by cur. 4242 * The cursor refers to the place where the record was (could be inserted) 4243 * when the operation returns. 4244 */ 4245 int /* error */ 4246 xfs_btree_delete( 4247 struct xfs_btree_cur *cur, 4248 int *stat) /* success/failure */ 4249 { 4250 int error; /* error return value */ 4251 int level; 4252 int i; 4253 bool joined = false; 4254 4255 /* 4256 * Go up the tree, starting at leaf level. 4257 * 4258 * If 2 is returned then a join was done; go to the next level. 4259 * Otherwise we are done. 4260 */ 4261 for (level = 0, i = 2; i == 2; level++) { 4262 error = xfs_btree_delrec(cur, level, &i); 4263 if (error) 4264 goto error0; 4265 if (i == 2) 4266 joined = true; 4267 } 4268 4269 /* 4270 * If we combined blocks as part of deleting the record, delrec won't 4271 * have updated the parent high keys so we have to do that here. 4272 */ 4273 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4274 error = xfs_btree_updkeys_force(cur, 0); 4275 if (error) 4276 goto error0; 4277 } 4278 4279 if (i == 0) { 4280 for (level = 1; level < cur->bc_nlevels; level++) { 4281 if (cur->bc_levels[level].ptr == 0) { 4282 error = xfs_btree_decrement(cur, level, &i); 4283 if (error) 4284 goto error0; 4285 break; 4286 } 4287 } 4288 } 4289 4290 *stat = i; 4291 return 0; 4292 error0: 4293 return error; 4294 } 4295 4296 /* 4297 * Get the data from the pointed-to record. 4298 */ 4299 int /* error */ 4300 xfs_btree_get_rec( 4301 struct xfs_btree_cur *cur, /* btree cursor */ 4302 union xfs_btree_rec **recp, /* output: btree record */ 4303 int *stat) /* output: success/failure */ 4304 { 4305 struct xfs_btree_block *block; /* btree block */ 4306 struct xfs_buf *bp; /* buffer pointer */ 4307 int ptr; /* record number */ 4308 #ifdef DEBUG 4309 int error; /* error return value */ 4310 #endif 4311 4312 ptr = cur->bc_levels[0].ptr; 4313 block = xfs_btree_get_block(cur, 0, &bp); 4314 4315 #ifdef DEBUG 4316 error = xfs_btree_check_block(cur, block, 0, bp); 4317 if (error) 4318 return error; 4319 #endif 4320 4321 /* 4322 * Off the right end or left end, return failure. 4323 */ 4324 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4325 *stat = 0; 4326 return 0; 4327 } 4328 4329 /* 4330 * Point to the record and extract its data. 4331 */ 4332 *recp = xfs_btree_rec_addr(cur, ptr, block); 4333 *stat = 1; 4334 return 0; 4335 } 4336 4337 /* Visit a block in a btree. */ 4338 STATIC int 4339 xfs_btree_visit_block( 4340 struct xfs_btree_cur *cur, 4341 int level, 4342 xfs_btree_visit_blocks_fn fn, 4343 void *data) 4344 { 4345 struct xfs_btree_block *block; 4346 struct xfs_buf *bp; 4347 union xfs_btree_ptr rptr; 4348 int error; 4349 4350 /* do right sibling readahead */ 4351 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4352 block = xfs_btree_get_block(cur, level, &bp); 4353 4354 /* process the block */ 4355 error = fn(cur, level, data); 4356 if (error) 4357 return error; 4358 4359 /* now read rh sibling block for next iteration */ 4360 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4361 if (xfs_btree_ptr_is_null(cur, &rptr)) 4362 return -ENOENT; 4363 4364 /* 4365 * We only visit blocks once in this walk, so we have to avoid the 4366 * internal xfs_btree_lookup_get_block() optimisation where it will 4367 * return the same block without checking if the right sibling points 4368 * back to us and creates a cyclic reference in the btree. 4369 */ 4370 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4371 if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp, 4372 xfs_buf_daddr(bp))) 4373 return -EFSCORRUPTED; 4374 } else { 4375 if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp, 4376 xfs_buf_daddr(bp))) 4377 return -EFSCORRUPTED; 4378 } 4379 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4380 } 4381 4382 4383 /* Visit every block in a btree. */ 4384 int 4385 xfs_btree_visit_blocks( 4386 struct xfs_btree_cur *cur, 4387 xfs_btree_visit_blocks_fn fn, 4388 unsigned int flags, 4389 void *data) 4390 { 4391 union xfs_btree_ptr lptr; 4392 int level; 4393 struct xfs_btree_block *block = NULL; 4394 int error = 0; 4395 4396 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4397 4398 /* for each level */ 4399 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4400 /* grab the left hand block */ 4401 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4402 if (error) 4403 return error; 4404 4405 /* readahead the left most block for the next level down */ 4406 if (level > 0) { 4407 union xfs_btree_ptr *ptr; 4408 4409 ptr = xfs_btree_ptr_addr(cur, 1, block); 4410 xfs_btree_readahead_ptr(cur, ptr, 1); 4411 4412 /* save for the next iteration of the loop */ 4413 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4414 4415 if (!(flags & XFS_BTREE_VISIT_LEAVES)) 4416 continue; 4417 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) { 4418 continue; 4419 } 4420 4421 /* for each buffer in the level */ 4422 do { 4423 error = xfs_btree_visit_block(cur, level, fn, data); 4424 } while (!error); 4425 4426 if (error != -ENOENT) 4427 return error; 4428 } 4429 4430 return 0; 4431 } 4432 4433 /* 4434 * Change the owner of a btree. 4435 * 4436 * The mechanism we use here is ordered buffer logging. Because we don't know 4437 * how many buffers were are going to need to modify, we don't really want to 4438 * have to make transaction reservations for the worst case of every buffer in a 4439 * full size btree as that may be more space that we can fit in the log.... 4440 * 4441 * We do the btree walk in the most optimal manner possible - we have sibling 4442 * pointers so we can just walk all the blocks on each level from left to right 4443 * in a single pass, and then move to the next level and do the same. We can 4444 * also do readahead on the sibling pointers to get IO moving more quickly, 4445 * though for slow disks this is unlikely to make much difference to performance 4446 * as the amount of CPU work we have to do before moving to the next block is 4447 * relatively small. 4448 * 4449 * For each btree block that we load, modify the owner appropriately, set the 4450 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4451 * we mark the region we change dirty so that if the buffer is relogged in 4452 * a subsequent transaction the changes we make here as an ordered buffer are 4453 * correctly relogged in that transaction. If we are in recovery context, then 4454 * just queue the modified buffer as delayed write buffer so the transaction 4455 * recovery completion writes the changes to disk. 4456 */ 4457 struct xfs_btree_block_change_owner_info { 4458 uint64_t new_owner; 4459 struct list_head *buffer_list; 4460 }; 4461 4462 static int 4463 xfs_btree_block_change_owner( 4464 struct xfs_btree_cur *cur, 4465 int level, 4466 void *data) 4467 { 4468 struct xfs_btree_block_change_owner_info *bbcoi = data; 4469 struct xfs_btree_block *block; 4470 struct xfs_buf *bp; 4471 4472 /* modify the owner */ 4473 block = xfs_btree_get_block(cur, level, &bp); 4474 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4475 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4476 return 0; 4477 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4478 } else { 4479 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4480 return 0; 4481 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4482 } 4483 4484 /* 4485 * If the block is a root block hosted in an inode, we might not have a 4486 * buffer pointer here and we shouldn't attempt to log the change as the 4487 * information is already held in the inode and discarded when the root 4488 * block is formatted into the on-disk inode fork. We still change it, 4489 * though, so everything is consistent in memory. 4490 */ 4491 if (!bp) { 4492 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4493 ASSERT(level == cur->bc_nlevels - 1); 4494 return 0; 4495 } 4496 4497 if (cur->bc_tp) { 4498 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4499 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4500 return -EAGAIN; 4501 } 4502 } else { 4503 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4504 } 4505 4506 return 0; 4507 } 4508 4509 int 4510 xfs_btree_change_owner( 4511 struct xfs_btree_cur *cur, 4512 uint64_t new_owner, 4513 struct list_head *buffer_list) 4514 { 4515 struct xfs_btree_block_change_owner_info bbcoi; 4516 4517 bbcoi.new_owner = new_owner; 4518 bbcoi.buffer_list = buffer_list; 4519 4520 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4521 XFS_BTREE_VISIT_ALL, &bbcoi); 4522 } 4523 4524 /* Verify the v5 fields of a long-format btree block. */ 4525 xfs_failaddr_t 4526 xfs_btree_lblock_v5hdr_verify( 4527 struct xfs_buf *bp, 4528 uint64_t owner) 4529 { 4530 struct xfs_mount *mp = bp->b_mount; 4531 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4532 4533 if (!xfs_has_crc(mp)) 4534 return __this_address; 4535 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4536 return __this_address; 4537 if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) 4538 return __this_address; 4539 if (owner != XFS_RMAP_OWN_UNKNOWN && 4540 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4541 return __this_address; 4542 return NULL; 4543 } 4544 4545 /* Verify a long-format btree block. */ 4546 xfs_failaddr_t 4547 xfs_btree_lblock_verify( 4548 struct xfs_buf *bp, 4549 unsigned int max_recs) 4550 { 4551 struct xfs_mount *mp = bp->b_mount; 4552 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4553 xfs_fsblock_t fsb; 4554 xfs_failaddr_t fa; 4555 4556 /* numrecs verification */ 4557 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4558 return __this_address; 4559 4560 /* sibling pointer verification */ 4561 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 4562 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, 4563 block->bb_u.l.bb_leftsib); 4564 if (!fa) 4565 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, 4566 block->bb_u.l.bb_rightsib); 4567 return fa; 4568 } 4569 4570 /** 4571 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4572 * btree block 4573 * 4574 * @bp: buffer containing the btree block 4575 */ 4576 xfs_failaddr_t 4577 xfs_btree_sblock_v5hdr_verify( 4578 struct xfs_buf *bp) 4579 { 4580 struct xfs_mount *mp = bp->b_mount; 4581 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4582 struct xfs_perag *pag = bp->b_pag; 4583 4584 if (!xfs_has_crc(mp)) 4585 return __this_address; 4586 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4587 return __this_address; 4588 if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) 4589 return __this_address; 4590 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4591 return __this_address; 4592 return NULL; 4593 } 4594 4595 /** 4596 * xfs_btree_sblock_verify() -- verify a short-format btree block 4597 * 4598 * @bp: buffer containing the btree block 4599 * @max_recs: maximum records allowed in this btree node 4600 */ 4601 xfs_failaddr_t 4602 xfs_btree_sblock_verify( 4603 struct xfs_buf *bp, 4604 unsigned int max_recs) 4605 { 4606 struct xfs_mount *mp = bp->b_mount; 4607 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4608 xfs_agblock_t agbno; 4609 xfs_failaddr_t fa; 4610 4611 /* numrecs verification */ 4612 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4613 return __this_address; 4614 4615 /* sibling pointer verification */ 4616 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); 4617 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, 4618 block->bb_u.s.bb_leftsib); 4619 if (!fa) 4620 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, 4621 block->bb_u.s.bb_rightsib); 4622 return fa; 4623 } 4624 4625 /* 4626 * For the given limits on leaf and keyptr records per block, calculate the 4627 * height of the tree needed to index the number of leaf records. 4628 */ 4629 unsigned int 4630 xfs_btree_compute_maxlevels( 4631 const unsigned int *limits, 4632 unsigned long long records) 4633 { 4634 unsigned long long level_blocks = howmany_64(records, limits[0]); 4635 unsigned int height = 1; 4636 4637 while (level_blocks > 1) { 4638 level_blocks = howmany_64(level_blocks, limits[1]); 4639 height++; 4640 } 4641 4642 return height; 4643 } 4644 4645 /* 4646 * For the given limits on leaf and keyptr records per block, calculate the 4647 * number of blocks needed to index the given number of leaf records. 4648 */ 4649 unsigned long long 4650 xfs_btree_calc_size( 4651 const unsigned int *limits, 4652 unsigned long long records) 4653 { 4654 unsigned long long level_blocks = howmany_64(records, limits[0]); 4655 unsigned long long blocks = level_blocks; 4656 4657 while (level_blocks > 1) { 4658 level_blocks = howmany_64(level_blocks, limits[1]); 4659 blocks += level_blocks; 4660 } 4661 4662 return blocks; 4663 } 4664 4665 /* 4666 * Given a number of available blocks for the btree to consume with records and 4667 * pointers, calculate the height of the tree needed to index all the records 4668 * that space can hold based on the number of pointers each interior node 4669 * holds. 4670 * 4671 * We start by assuming a single level tree consumes a single block, then track 4672 * the number of blocks each node level consumes until we no longer have space 4673 * to store the next node level. At this point, we are indexing all the leaf 4674 * blocks in the space, and there's no more free space to split the tree any 4675 * further. That's our maximum btree height. 4676 */ 4677 unsigned int 4678 xfs_btree_space_to_height( 4679 const unsigned int *limits, 4680 unsigned long long leaf_blocks) 4681 { 4682 /* 4683 * The root btree block can have fewer than minrecs pointers in it 4684 * because the tree might not be big enough to require that amount of 4685 * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. 4686 */ 4687 unsigned long long node_blocks = 2; 4688 unsigned long long blocks_left = leaf_blocks - 1; 4689 unsigned int height = 1; 4690 4691 if (leaf_blocks < 1) 4692 return 0; 4693 4694 while (node_blocks < blocks_left) { 4695 blocks_left -= node_blocks; 4696 node_blocks *= limits[1]; 4697 height++; 4698 } 4699 4700 return height; 4701 } 4702 4703 /* 4704 * Query a regular btree for all records overlapping a given interval. 4705 * Start with a LE lookup of the key of low_rec and return all records 4706 * until we find a record with a key greater than the key of high_rec. 4707 */ 4708 STATIC int 4709 xfs_btree_simple_query_range( 4710 struct xfs_btree_cur *cur, 4711 const union xfs_btree_key *low_key, 4712 const union xfs_btree_key *high_key, 4713 xfs_btree_query_range_fn fn, 4714 void *priv) 4715 { 4716 union xfs_btree_rec *recp; 4717 union xfs_btree_key rec_key; 4718 int stat; 4719 bool firstrec = true; 4720 int error; 4721 4722 ASSERT(cur->bc_ops->init_high_key_from_rec); 4723 ASSERT(cur->bc_ops->diff_two_keys); 4724 4725 /* 4726 * Find the leftmost record. The btree cursor must be set 4727 * to the low record used to generate low_key. 4728 */ 4729 stat = 0; 4730 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4731 if (error) 4732 goto out; 4733 4734 /* Nothing? See if there's anything to the right. */ 4735 if (!stat) { 4736 error = xfs_btree_increment(cur, 0, &stat); 4737 if (error) 4738 goto out; 4739 } 4740 4741 while (stat) { 4742 /* Find the record. */ 4743 error = xfs_btree_get_rec(cur, &recp, &stat); 4744 if (error || !stat) 4745 break; 4746 4747 /* Skip if low_key > high_key(rec). */ 4748 if (firstrec) { 4749 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4750 firstrec = false; 4751 if (xfs_btree_keycmp_gt(cur, low_key, &rec_key)) 4752 goto advloop; 4753 } 4754 4755 /* Stop if low_key(rec) > high_key. */ 4756 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4757 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key)) 4758 break; 4759 4760 /* Callback */ 4761 error = fn(cur, recp, priv); 4762 if (error) 4763 break; 4764 4765 advloop: 4766 /* Move on to the next record. */ 4767 error = xfs_btree_increment(cur, 0, &stat); 4768 if (error) 4769 break; 4770 } 4771 4772 out: 4773 return error; 4774 } 4775 4776 /* 4777 * Query an overlapped interval btree for all records overlapping a given 4778 * interval. This function roughly follows the algorithm given in 4779 * "Interval Trees" of _Introduction to Algorithms_, which is section 4780 * 14.3 in the 2nd and 3rd editions. 4781 * 4782 * First, generate keys for the low and high records passed in. 4783 * 4784 * For any leaf node, generate the high and low keys for the record. 4785 * If the record keys overlap with the query low/high keys, pass the 4786 * record to the function iterator. 4787 * 4788 * For any internal node, compare the low and high keys of each 4789 * pointer against the query low/high keys. If there's an overlap, 4790 * follow the pointer. 4791 * 4792 * As an optimization, we stop scanning a block when we find a low key 4793 * that is greater than the query's high key. 4794 */ 4795 STATIC int 4796 xfs_btree_overlapped_query_range( 4797 struct xfs_btree_cur *cur, 4798 const union xfs_btree_key *low_key, 4799 const union xfs_btree_key *high_key, 4800 xfs_btree_query_range_fn fn, 4801 void *priv) 4802 { 4803 union xfs_btree_ptr ptr; 4804 union xfs_btree_ptr *pp; 4805 union xfs_btree_key rec_key; 4806 union xfs_btree_key rec_hkey; 4807 union xfs_btree_key *lkp; 4808 union xfs_btree_key *hkp; 4809 union xfs_btree_rec *recp; 4810 struct xfs_btree_block *block; 4811 int level; 4812 struct xfs_buf *bp; 4813 int i; 4814 int error; 4815 4816 /* Load the root of the btree. */ 4817 level = cur->bc_nlevels - 1; 4818 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4819 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4820 if (error) 4821 return error; 4822 xfs_btree_get_block(cur, level, &bp); 4823 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4824 #ifdef DEBUG 4825 error = xfs_btree_check_block(cur, block, level, bp); 4826 if (error) 4827 goto out; 4828 #endif 4829 cur->bc_levels[level].ptr = 1; 4830 4831 while (level < cur->bc_nlevels) { 4832 block = xfs_btree_get_block(cur, level, &bp); 4833 4834 /* End of node, pop back towards the root. */ 4835 if (cur->bc_levels[level].ptr > 4836 be16_to_cpu(block->bb_numrecs)) { 4837 pop_up: 4838 if (level < cur->bc_nlevels - 1) 4839 cur->bc_levels[level + 1].ptr++; 4840 level++; 4841 continue; 4842 } 4843 4844 if (level == 0) { 4845 /* Handle a leaf node. */ 4846 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, 4847 block); 4848 4849 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4850 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4851 4852 /* 4853 * If (query's high key < record's low key), then there 4854 * are no more interesting records in this block. Pop 4855 * up to the leaf level to find more record blocks. 4856 * 4857 * If (record's high key >= query's low key) and 4858 * (query's high key >= record's low key), then 4859 * this record overlaps the query range; callback. 4860 */ 4861 if (xfs_btree_keycmp_lt(cur, high_key, &rec_key)) 4862 goto pop_up; 4863 if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) { 4864 error = fn(cur, recp, priv); 4865 if (error) 4866 break; 4867 } 4868 cur->bc_levels[level].ptr++; 4869 continue; 4870 } 4871 4872 /* Handle an internal node. */ 4873 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); 4874 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, 4875 block); 4876 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); 4877 4878 /* 4879 * If (query's high key < pointer's low key), then there are no 4880 * more interesting keys in this block. Pop up one leaf level 4881 * to continue looking for records. 4882 * 4883 * If (pointer's high key >= query's low key) and 4884 * (query's high key >= pointer's low key), then 4885 * this record overlaps the query range; follow pointer. 4886 */ 4887 if (xfs_btree_keycmp_lt(cur, high_key, lkp)) 4888 goto pop_up; 4889 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) { 4890 level--; 4891 error = xfs_btree_lookup_get_block(cur, level, pp, 4892 &block); 4893 if (error) 4894 goto out; 4895 xfs_btree_get_block(cur, level, &bp); 4896 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4897 #ifdef DEBUG 4898 error = xfs_btree_check_block(cur, block, level, bp); 4899 if (error) 4900 goto out; 4901 #endif 4902 cur->bc_levels[level].ptr = 1; 4903 continue; 4904 } 4905 cur->bc_levels[level].ptr++; 4906 } 4907 4908 out: 4909 /* 4910 * If we don't end this function with the cursor pointing at a record 4911 * block, a subsequent non-error cursor deletion will not release 4912 * node-level buffers, causing a buffer leak. This is quite possible 4913 * with a zero-results range query, so release the buffers if we 4914 * failed to return any results. 4915 */ 4916 if (cur->bc_levels[0].bp == NULL) { 4917 for (i = 0; i < cur->bc_nlevels; i++) { 4918 if (cur->bc_levels[i].bp) { 4919 xfs_trans_brelse(cur->bc_tp, 4920 cur->bc_levels[i].bp); 4921 cur->bc_levels[i].bp = NULL; 4922 cur->bc_levels[i].ptr = 0; 4923 cur->bc_levels[i].ra = 0; 4924 } 4925 } 4926 } 4927 4928 return error; 4929 } 4930 4931 static inline void 4932 xfs_btree_key_from_irec( 4933 struct xfs_btree_cur *cur, 4934 union xfs_btree_key *key, 4935 const union xfs_btree_irec *irec) 4936 { 4937 union xfs_btree_rec rec; 4938 4939 cur->bc_rec = *irec; 4940 cur->bc_ops->init_rec_from_cur(cur, &rec); 4941 cur->bc_ops->init_key_from_rec(key, &rec); 4942 } 4943 4944 /* 4945 * Query a btree for all records overlapping a given interval of keys. The 4946 * supplied function will be called with each record found; return one of the 4947 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4948 * code. This function returns -ECANCELED, zero, or a negative error code. 4949 */ 4950 int 4951 xfs_btree_query_range( 4952 struct xfs_btree_cur *cur, 4953 const union xfs_btree_irec *low_rec, 4954 const union xfs_btree_irec *high_rec, 4955 xfs_btree_query_range_fn fn, 4956 void *priv) 4957 { 4958 union xfs_btree_key low_key; 4959 union xfs_btree_key high_key; 4960 4961 /* Find the keys of both ends of the interval. */ 4962 xfs_btree_key_from_irec(cur, &high_key, high_rec); 4963 xfs_btree_key_from_irec(cur, &low_key, low_rec); 4964 4965 /* Enforce low key <= high key. */ 4966 if (!xfs_btree_keycmp_le(cur, &low_key, &high_key)) 4967 return -EINVAL; 4968 4969 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4970 return xfs_btree_simple_query_range(cur, &low_key, 4971 &high_key, fn, priv); 4972 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4973 fn, priv); 4974 } 4975 4976 /* Query a btree for all records. */ 4977 int 4978 xfs_btree_query_all( 4979 struct xfs_btree_cur *cur, 4980 xfs_btree_query_range_fn fn, 4981 void *priv) 4982 { 4983 union xfs_btree_key low_key; 4984 union xfs_btree_key high_key; 4985 4986 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 4987 memset(&low_key, 0, sizeof(low_key)); 4988 memset(&high_key, 0xFF, sizeof(high_key)); 4989 4990 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 4991 } 4992 4993 static int 4994 xfs_btree_count_blocks_helper( 4995 struct xfs_btree_cur *cur, 4996 int level, 4997 void *data) 4998 { 4999 xfs_extlen_t *blocks = data; 5000 (*blocks)++; 5001 5002 return 0; 5003 } 5004 5005 /* Count the blocks in a btree and return the result in *blocks. */ 5006 int 5007 xfs_btree_count_blocks( 5008 struct xfs_btree_cur *cur, 5009 xfs_extlen_t *blocks) 5010 { 5011 *blocks = 0; 5012 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 5013 XFS_BTREE_VISIT_ALL, blocks); 5014 } 5015 5016 /* Compare two btree pointers. */ 5017 int64_t 5018 xfs_btree_diff_two_ptrs( 5019 struct xfs_btree_cur *cur, 5020 const union xfs_btree_ptr *a, 5021 const union xfs_btree_ptr *b) 5022 { 5023 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 5024 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 5025 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 5026 } 5027 5028 struct xfs_btree_has_records { 5029 /* Keys for the start and end of the range we want to know about. */ 5030 union xfs_btree_key start_key; 5031 union xfs_btree_key end_key; 5032 5033 /* Mask for key comparisons, if desired. */ 5034 const union xfs_btree_key *key_mask; 5035 5036 /* Highest record key we've seen so far. */ 5037 union xfs_btree_key high_key; 5038 5039 enum xbtree_recpacking outcome; 5040 }; 5041 5042 STATIC int 5043 xfs_btree_has_records_helper( 5044 struct xfs_btree_cur *cur, 5045 const union xfs_btree_rec *rec, 5046 void *priv) 5047 { 5048 union xfs_btree_key rec_key; 5049 union xfs_btree_key rec_high_key; 5050 struct xfs_btree_has_records *info = priv; 5051 enum xbtree_key_contig key_contig; 5052 5053 cur->bc_ops->init_key_from_rec(&rec_key, rec); 5054 5055 if (info->outcome == XBTREE_RECPACKING_EMPTY) { 5056 info->outcome = XBTREE_RECPACKING_SPARSE; 5057 5058 /* 5059 * If the first record we find does not overlap the start key, 5060 * then there is a hole at the start of the search range. 5061 * Classify this as sparse and stop immediately. 5062 */ 5063 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key, 5064 info->key_mask)) 5065 return -ECANCELED; 5066 } else { 5067 /* 5068 * If a subsequent record does not overlap with the any record 5069 * we've seen so far, there is a hole in the middle of the 5070 * search range. Classify this as sparse and stop. 5071 * If the keys overlap and this btree does not allow overlap, 5072 * signal corruption. 5073 */ 5074 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key, 5075 &rec_key, info->key_mask); 5076 if (key_contig == XBTREE_KEY_OVERLAP && 5077 !(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 5078 return -EFSCORRUPTED; 5079 if (key_contig == XBTREE_KEY_GAP) 5080 return -ECANCELED; 5081 } 5082 5083 /* 5084 * If high_key(rec) is larger than any other high key we've seen, 5085 * remember it for later. 5086 */ 5087 cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec); 5088 if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key, 5089 info->key_mask)) 5090 info->high_key = rec_high_key; /* struct copy */ 5091 5092 return 0; 5093 } 5094 5095 /* 5096 * Scan part of the keyspace of a btree and tell us if that keyspace does not 5097 * map to any records; is fully mapped to records; or is partially mapped to 5098 * records. This is the btree record equivalent to determining if a file is 5099 * sparse. 5100 * 5101 * For most btree types, the record scan should use all available btree key 5102 * fields to compare the keys encountered. These callers should pass NULL for 5103 * @mask. However, some callers (e.g. scanning physical space in the rmapbt) 5104 * want to ignore some part of the btree record keyspace when performing the 5105 * comparison. These callers should pass in a union xfs_btree_key object with 5106 * the fields that *should* be a part of the comparison set to any nonzero 5107 * value, and the rest zeroed. 5108 */ 5109 int 5110 xfs_btree_has_records( 5111 struct xfs_btree_cur *cur, 5112 const union xfs_btree_irec *low, 5113 const union xfs_btree_irec *high, 5114 const union xfs_btree_key *mask, 5115 enum xbtree_recpacking *outcome) 5116 { 5117 struct xfs_btree_has_records info = { 5118 .outcome = XBTREE_RECPACKING_EMPTY, 5119 .key_mask = mask, 5120 }; 5121 int error; 5122 5123 /* Not all btrees support this operation. */ 5124 if (!cur->bc_ops->keys_contiguous) { 5125 ASSERT(0); 5126 return -EOPNOTSUPP; 5127 } 5128 5129 xfs_btree_key_from_irec(cur, &info.start_key, low); 5130 xfs_btree_key_from_irec(cur, &info.end_key, high); 5131 5132 error = xfs_btree_query_range(cur, low, high, 5133 xfs_btree_has_records_helper, &info); 5134 if (error == -ECANCELED) 5135 goto out; 5136 if (error) 5137 return error; 5138 5139 if (info.outcome == XBTREE_RECPACKING_EMPTY) 5140 goto out; 5141 5142 /* 5143 * If the largest high_key(rec) we saw during the walk is greater than 5144 * the end of the search range, classify this as full. Otherwise, 5145 * there is a hole at the end of the search range. 5146 */ 5147 if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key, 5148 mask)) 5149 info.outcome = XBTREE_RECPACKING_FULL; 5150 5151 out: 5152 *outcome = info.outcome; 5153 return 0; 5154 } 5155 5156 /* Are there more records in this btree? */ 5157 bool 5158 xfs_btree_has_more_records( 5159 struct xfs_btree_cur *cur) 5160 { 5161 struct xfs_btree_block *block; 5162 struct xfs_buf *bp; 5163 5164 block = xfs_btree_get_block(cur, 0, &bp); 5165 5166 /* There are still records in this block. */ 5167 if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) 5168 return true; 5169 5170 /* There are more record blocks. */ 5171 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 5172 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); 5173 else 5174 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); 5175 } 5176 5177 /* Set up all the btree cursor caches. */ 5178 int __init 5179 xfs_btree_init_cur_caches(void) 5180 { 5181 int error; 5182 5183 error = xfs_allocbt_init_cur_cache(); 5184 if (error) 5185 return error; 5186 error = xfs_inobt_init_cur_cache(); 5187 if (error) 5188 goto err; 5189 error = xfs_bmbt_init_cur_cache(); 5190 if (error) 5191 goto err; 5192 error = xfs_rmapbt_init_cur_cache(); 5193 if (error) 5194 goto err; 5195 error = xfs_refcountbt_init_cur_cache(); 5196 if (error) 5197 goto err; 5198 5199 return 0; 5200 err: 5201 xfs_btree_destroy_cur_caches(); 5202 return error; 5203 } 5204 5205 /* Destroy all the btree cursor caches, if they've been allocated. */ 5206 void 5207 xfs_btree_destroy_cur_caches(void) 5208 { 5209 xfs_allocbt_destroy_cur_cache(); 5210 xfs_inobt_destroy_cur_cache(); 5211 xfs_bmbt_destroy_cur_cache(); 5212 xfs_rmapbt_destroy_cur_cache(); 5213 xfs_refcountbt_destroy_cur_cache(); 5214 } 5215 5216 /* Move the btree cursor before the first record. */ 5217 int 5218 xfs_btree_goto_left_edge( 5219 struct xfs_btree_cur *cur) 5220 { 5221 int stat = 0; 5222 int error; 5223 5224 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 5225 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 5226 if (error) 5227 return error; 5228 if (!stat) 5229 return 0; 5230 5231 error = xfs_btree_decrement(cur, 0, &stat); 5232 if (error) 5233 return error; 5234 if (stat != 0) { 5235 ASSERT(0); 5236 return -EFSCORRUPTED; 5237 } 5238 5239 return 0; 5240 } 5241