1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_inode.h" 16 #include "xfs_trans.h" 17 #include "xfs_inode_item.h" 18 #include "xfs_buf_item.h" 19 #include "xfs_btree.h" 20 #include "xfs_errortag.h" 21 #include "xfs_error.h" 22 #include "xfs_trace.h" 23 #include "xfs_cksum.h" 24 #include "xfs_alloc.h" 25 #include "xfs_log.h" 26 27 /* 28 * Cursor allocation zone. 29 */ 30 kmem_zone_t *xfs_btree_cur_zone; 31 32 /* 33 * Btree magic numbers. 34 */ 35 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 36 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 37 XFS_FIBT_MAGIC, 0 }, 38 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 39 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 40 XFS_REFC_CRC_MAGIC } 41 }; 42 43 uint32_t 44 xfs_btree_magic( 45 int crc, 46 xfs_btnum_t btnum) 47 { 48 uint32_t magic = xfs_magics[crc][btnum]; 49 50 /* Ensure we asked for crc for crc-only magics. */ 51 ASSERT(magic != 0); 52 return magic; 53 } 54 55 /* 56 * Check a long btree block header. Return the address of the failing check, 57 * or NULL if everything is ok. 58 */ 59 xfs_failaddr_t 60 __xfs_btree_check_lblock( 61 struct xfs_btree_cur *cur, 62 struct xfs_btree_block *block, 63 int level, 64 struct xfs_buf *bp) 65 { 66 struct xfs_mount *mp = cur->bc_mp; 67 xfs_btnum_t btnum = cur->bc_btnum; 68 int crc = xfs_sb_version_hascrc(&mp->m_sb); 69 70 if (crc) { 71 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 72 return __this_address; 73 if (block->bb_u.l.bb_blkno != 74 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 75 return __this_address; 76 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 77 return __this_address; 78 } 79 80 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 81 return __this_address; 82 if (be16_to_cpu(block->bb_level) != level) 83 return __this_address; 84 if (be16_to_cpu(block->bb_numrecs) > 85 cur->bc_ops->get_maxrecs(cur, level)) 86 return __this_address; 87 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 88 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), 89 level + 1)) 90 return __this_address; 91 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 92 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), 93 level + 1)) 94 return __this_address; 95 96 return NULL; 97 } 98 99 /* Check a long btree block header. */ 100 static int 101 xfs_btree_check_lblock( 102 struct xfs_btree_cur *cur, 103 struct xfs_btree_block *block, 104 int level, 105 struct xfs_buf *bp) 106 { 107 struct xfs_mount *mp = cur->bc_mp; 108 xfs_failaddr_t fa; 109 110 fa = __xfs_btree_check_lblock(cur, block, level, bp); 111 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 112 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { 113 if (bp) 114 trace_xfs_btree_corrupt(bp, _RET_IP_); 115 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 116 return -EFSCORRUPTED; 117 } 118 return 0; 119 } 120 121 /* 122 * Check a short btree block header. Return the address of the failing check, 123 * or NULL if everything is ok. 124 */ 125 xfs_failaddr_t 126 __xfs_btree_check_sblock( 127 struct xfs_btree_cur *cur, 128 struct xfs_btree_block *block, 129 int level, 130 struct xfs_buf *bp) 131 { 132 struct xfs_mount *mp = cur->bc_mp; 133 xfs_btnum_t btnum = cur->bc_btnum; 134 int crc = xfs_sb_version_hascrc(&mp->m_sb); 135 136 if (crc) { 137 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 138 return __this_address; 139 if (block->bb_u.s.bb_blkno != 140 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 141 return __this_address; 142 } 143 144 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 145 return __this_address; 146 if (be16_to_cpu(block->bb_level) != level) 147 return __this_address; 148 if (be16_to_cpu(block->bb_numrecs) > 149 cur->bc_ops->get_maxrecs(cur, level)) 150 return __this_address; 151 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 152 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), 153 level + 1)) 154 return __this_address; 155 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 156 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), 157 level + 1)) 158 return __this_address; 159 160 return NULL; 161 } 162 163 /* Check a short btree block header. */ 164 STATIC int 165 xfs_btree_check_sblock( 166 struct xfs_btree_cur *cur, 167 struct xfs_btree_block *block, 168 int level, 169 struct xfs_buf *bp) 170 { 171 struct xfs_mount *mp = cur->bc_mp; 172 xfs_failaddr_t fa; 173 174 fa = __xfs_btree_check_sblock(cur, block, level, bp); 175 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 176 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { 177 if (bp) 178 trace_xfs_btree_corrupt(bp, _RET_IP_); 179 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 180 return -EFSCORRUPTED; 181 } 182 return 0; 183 } 184 185 /* 186 * Debug routine: check that block header is ok. 187 */ 188 int 189 xfs_btree_check_block( 190 struct xfs_btree_cur *cur, /* btree cursor */ 191 struct xfs_btree_block *block, /* generic btree block pointer */ 192 int level, /* level of the btree block */ 193 struct xfs_buf *bp) /* buffer containing block, if any */ 194 { 195 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 196 return xfs_btree_check_lblock(cur, block, level, bp); 197 else 198 return xfs_btree_check_sblock(cur, block, level, bp); 199 } 200 201 /* Check that this long pointer is valid and points within the fs. */ 202 bool 203 xfs_btree_check_lptr( 204 struct xfs_btree_cur *cur, 205 xfs_fsblock_t fsbno, 206 int level) 207 { 208 if (level <= 0) 209 return false; 210 return xfs_verify_fsbno(cur->bc_mp, fsbno); 211 } 212 213 /* Check that this short pointer is valid and points within the AG. */ 214 bool 215 xfs_btree_check_sptr( 216 struct xfs_btree_cur *cur, 217 xfs_agblock_t agbno, 218 int level) 219 { 220 if (level <= 0) 221 return false; 222 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); 223 } 224 225 /* 226 * Check that a given (indexed) btree pointer at a certain level of a 227 * btree is valid and doesn't point past where it should. 228 */ 229 static int 230 xfs_btree_check_ptr( 231 struct xfs_btree_cur *cur, 232 union xfs_btree_ptr *ptr, 233 int index, 234 int level) 235 { 236 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 237 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), 238 level)) 239 return 0; 240 xfs_err(cur->bc_mp, 241 "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", 242 cur->bc_private.b.ip->i_ino, 243 cur->bc_private.b.whichfork, cur->bc_btnum, 244 level, index); 245 } else { 246 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), 247 level)) 248 return 0; 249 xfs_err(cur->bc_mp, 250 "AG %u: Corrupt btree %d pointer at level %d index %d.", 251 cur->bc_private.a.agno, cur->bc_btnum, 252 level, index); 253 } 254 255 return -EFSCORRUPTED; 256 } 257 258 #ifdef DEBUG 259 # define xfs_btree_debug_check_ptr xfs_btree_check_ptr 260 #else 261 # define xfs_btree_debug_check_ptr(...) (0) 262 #endif 263 264 /* 265 * Calculate CRC on the whole btree block and stuff it into the 266 * long-form btree header. 267 * 268 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 269 * it into the buffer so recovery knows what the last modification was that made 270 * it to disk. 271 */ 272 void 273 xfs_btree_lblock_calc_crc( 274 struct xfs_buf *bp) 275 { 276 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 277 struct xfs_buf_log_item *bip = bp->b_log_item; 278 279 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 280 return; 281 if (bip) 282 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 283 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 284 } 285 286 bool 287 xfs_btree_lblock_verify_crc( 288 struct xfs_buf *bp) 289 { 290 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 291 struct xfs_mount *mp = bp->b_target->bt_mount; 292 293 if (xfs_sb_version_hascrc(&mp->m_sb)) { 294 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 295 return false; 296 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 297 } 298 299 return true; 300 } 301 302 /* 303 * Calculate CRC on the whole btree block and stuff it into the 304 * short-form btree header. 305 * 306 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 307 * it into the buffer so recovery knows what the last modification was that made 308 * it to disk. 309 */ 310 void 311 xfs_btree_sblock_calc_crc( 312 struct xfs_buf *bp) 313 { 314 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 315 struct xfs_buf_log_item *bip = bp->b_log_item; 316 317 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 318 return; 319 if (bip) 320 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 321 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 322 } 323 324 bool 325 xfs_btree_sblock_verify_crc( 326 struct xfs_buf *bp) 327 { 328 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 329 struct xfs_mount *mp = bp->b_target->bt_mount; 330 331 if (xfs_sb_version_hascrc(&mp->m_sb)) { 332 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 333 return false; 334 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 335 } 336 337 return true; 338 } 339 340 static int 341 xfs_btree_free_block( 342 struct xfs_btree_cur *cur, 343 struct xfs_buf *bp) 344 { 345 int error; 346 347 error = cur->bc_ops->free_block(cur, bp); 348 if (!error) { 349 xfs_trans_binval(cur->bc_tp, bp); 350 XFS_BTREE_STATS_INC(cur, free); 351 } 352 return error; 353 } 354 355 /* 356 * Delete the btree cursor. 357 */ 358 void 359 xfs_btree_del_cursor( 360 xfs_btree_cur_t *cur, /* btree cursor */ 361 int error) /* del because of error */ 362 { 363 int i; /* btree level */ 364 365 /* 366 * Clear the buffer pointers, and release the buffers. 367 * If we're doing this in the face of an error, we 368 * need to make sure to inspect all of the entries 369 * in the bc_bufs array for buffers to be unlocked. 370 * This is because some of the btree code works from 371 * level n down to 0, and if we get an error along 372 * the way we won't have initialized all the entries 373 * down to 0. 374 */ 375 for (i = 0; i < cur->bc_nlevels; i++) { 376 if (cur->bc_bufs[i]) 377 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 378 else if (!error) 379 break; 380 } 381 /* 382 * Can't free a bmap cursor without having dealt with the 383 * allocated indirect blocks' accounting. 384 */ 385 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 386 cur->bc_private.b.allocated == 0); 387 /* 388 * Free the cursor. 389 */ 390 kmem_zone_free(xfs_btree_cur_zone, cur); 391 } 392 393 /* 394 * Duplicate the btree cursor. 395 * Allocate a new one, copy the record, re-get the buffers. 396 */ 397 int /* error */ 398 xfs_btree_dup_cursor( 399 xfs_btree_cur_t *cur, /* input cursor */ 400 xfs_btree_cur_t **ncur) /* output cursor */ 401 { 402 xfs_buf_t *bp; /* btree block's buffer pointer */ 403 int error; /* error return value */ 404 int i; /* level number of btree block */ 405 xfs_mount_t *mp; /* mount structure for filesystem */ 406 xfs_btree_cur_t *new; /* new cursor value */ 407 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 408 409 tp = cur->bc_tp; 410 mp = cur->bc_mp; 411 412 /* 413 * Allocate a new cursor like the old one. 414 */ 415 new = cur->bc_ops->dup_cursor(cur); 416 417 /* 418 * Copy the record currently in the cursor. 419 */ 420 new->bc_rec = cur->bc_rec; 421 422 /* 423 * For each level current, re-get the buffer and copy the ptr value. 424 */ 425 for (i = 0; i < new->bc_nlevels; i++) { 426 new->bc_ptrs[i] = cur->bc_ptrs[i]; 427 new->bc_ra[i] = cur->bc_ra[i]; 428 bp = cur->bc_bufs[i]; 429 if (bp) { 430 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 431 XFS_BUF_ADDR(bp), mp->m_bsize, 432 0, &bp, 433 cur->bc_ops->buf_ops); 434 if (error) { 435 xfs_btree_del_cursor(new, error); 436 *ncur = NULL; 437 return error; 438 } 439 } 440 new->bc_bufs[i] = bp; 441 } 442 *ncur = new; 443 return 0; 444 } 445 446 /* 447 * XFS btree block layout and addressing: 448 * 449 * There are two types of blocks in the btree: leaf and non-leaf blocks. 450 * 451 * The leaf record start with a header then followed by records containing 452 * the values. A non-leaf block also starts with the same header, and 453 * then first contains lookup keys followed by an equal number of pointers 454 * to the btree blocks at the previous level. 455 * 456 * +--------+-------+-------+-------+-------+-------+-------+ 457 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 458 * +--------+-------+-------+-------+-------+-------+-------+ 459 * 460 * +--------+-------+-------+-------+-------+-------+-------+ 461 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 462 * +--------+-------+-------+-------+-------+-------+-------+ 463 * 464 * The header is called struct xfs_btree_block for reasons better left unknown 465 * and comes in different versions for short (32bit) and long (64bit) block 466 * pointers. The record and key structures are defined by the btree instances 467 * and opaque to the btree core. The block pointers are simple disk endian 468 * integers, available in a short (32bit) and long (64bit) variant. 469 * 470 * The helpers below calculate the offset of a given record, key or pointer 471 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 472 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 473 * inside the btree block is done using indices starting at one, not zero! 474 * 475 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 476 * overlapping intervals. In such a tree, records are still sorted lowest to 477 * highest and indexed by the smallest key value that refers to the record. 478 * However, nodes are different: each pointer has two associated keys -- one 479 * indexing the lowest key available in the block(s) below (the same behavior 480 * as the key in a regular btree) and another indexing the highest key 481 * available in the block(s) below. Because records are /not/ sorted by the 482 * highest key, all leaf block updates require us to compute the highest key 483 * that matches any record in the leaf and to recursively update the high keys 484 * in the nodes going further up in the tree, if necessary. Nodes look like 485 * this: 486 * 487 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 488 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 489 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 490 * 491 * To perform an interval query on an overlapped tree, perform the usual 492 * depth-first search and use the low and high keys to decide if we can skip 493 * that particular node. If a leaf node is reached, return the records that 494 * intersect the interval. Note that an interval query may return numerous 495 * entries. For a non-overlapped tree, simply search for the record associated 496 * with the lowest key and iterate forward until a non-matching record is 497 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 498 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 499 * more detail. 500 * 501 * Why do we care about overlapping intervals? Let's say you have a bunch of 502 * reverse mapping records on a reflink filesystem: 503 * 504 * 1: +- file A startblock B offset C length D -----------+ 505 * 2: +- file E startblock F offset G length H --------------+ 506 * 3: +- file I startblock F offset J length K --+ 507 * 4: +- file L... --+ 508 * 509 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 510 * we'd simply increment the length of record 1. But how do we find the record 511 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 512 * record 3 because the keys are ordered first by startblock. An interval 513 * query would return records 1 and 2 because they both overlap (B+D-1), and 514 * from that we can pick out record 1 as the appropriate left neighbor. 515 * 516 * In the non-overlapped case you can do a LE lookup and decrement the cursor 517 * because a record's interval must end before the next record. 518 */ 519 520 /* 521 * Return size of the btree block header for this btree instance. 522 */ 523 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 524 { 525 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 526 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 527 return XFS_BTREE_LBLOCK_CRC_LEN; 528 return XFS_BTREE_LBLOCK_LEN; 529 } 530 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 531 return XFS_BTREE_SBLOCK_CRC_LEN; 532 return XFS_BTREE_SBLOCK_LEN; 533 } 534 535 /* 536 * Return size of btree block pointers for this btree instance. 537 */ 538 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 539 { 540 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 541 sizeof(__be64) : sizeof(__be32); 542 } 543 544 /* 545 * Calculate offset of the n-th record in a btree block. 546 */ 547 STATIC size_t 548 xfs_btree_rec_offset( 549 struct xfs_btree_cur *cur, 550 int n) 551 { 552 return xfs_btree_block_len(cur) + 553 (n - 1) * cur->bc_ops->rec_len; 554 } 555 556 /* 557 * Calculate offset of the n-th key in a btree block. 558 */ 559 STATIC size_t 560 xfs_btree_key_offset( 561 struct xfs_btree_cur *cur, 562 int n) 563 { 564 return xfs_btree_block_len(cur) + 565 (n - 1) * cur->bc_ops->key_len; 566 } 567 568 /* 569 * Calculate offset of the n-th high key in a btree block. 570 */ 571 STATIC size_t 572 xfs_btree_high_key_offset( 573 struct xfs_btree_cur *cur, 574 int n) 575 { 576 return xfs_btree_block_len(cur) + 577 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 578 } 579 580 /* 581 * Calculate offset of the n-th block pointer in a btree block. 582 */ 583 STATIC size_t 584 xfs_btree_ptr_offset( 585 struct xfs_btree_cur *cur, 586 int n, 587 int level) 588 { 589 return xfs_btree_block_len(cur) + 590 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 591 (n - 1) * xfs_btree_ptr_len(cur); 592 } 593 594 /* 595 * Return a pointer to the n-th record in the btree block. 596 */ 597 union xfs_btree_rec * 598 xfs_btree_rec_addr( 599 struct xfs_btree_cur *cur, 600 int n, 601 struct xfs_btree_block *block) 602 { 603 return (union xfs_btree_rec *) 604 ((char *)block + xfs_btree_rec_offset(cur, n)); 605 } 606 607 /* 608 * Return a pointer to the n-th key in the btree block. 609 */ 610 union xfs_btree_key * 611 xfs_btree_key_addr( 612 struct xfs_btree_cur *cur, 613 int n, 614 struct xfs_btree_block *block) 615 { 616 return (union xfs_btree_key *) 617 ((char *)block + xfs_btree_key_offset(cur, n)); 618 } 619 620 /* 621 * Return a pointer to the n-th high key in the btree block. 622 */ 623 union xfs_btree_key * 624 xfs_btree_high_key_addr( 625 struct xfs_btree_cur *cur, 626 int n, 627 struct xfs_btree_block *block) 628 { 629 return (union xfs_btree_key *) 630 ((char *)block + xfs_btree_high_key_offset(cur, n)); 631 } 632 633 /* 634 * Return a pointer to the n-th block pointer in the btree block. 635 */ 636 union xfs_btree_ptr * 637 xfs_btree_ptr_addr( 638 struct xfs_btree_cur *cur, 639 int n, 640 struct xfs_btree_block *block) 641 { 642 int level = xfs_btree_get_level(block); 643 644 ASSERT(block->bb_level != 0); 645 646 return (union xfs_btree_ptr *) 647 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 648 } 649 650 /* 651 * Get the root block which is stored in the inode. 652 * 653 * For now this btree implementation assumes the btree root is always 654 * stored in the if_broot field of an inode fork. 655 */ 656 STATIC struct xfs_btree_block * 657 xfs_btree_get_iroot( 658 struct xfs_btree_cur *cur) 659 { 660 struct xfs_ifork *ifp; 661 662 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 663 return (struct xfs_btree_block *)ifp->if_broot; 664 } 665 666 /* 667 * Retrieve the block pointer from the cursor at the given level. 668 * This may be an inode btree root or from a buffer. 669 */ 670 struct xfs_btree_block * /* generic btree block pointer */ 671 xfs_btree_get_block( 672 struct xfs_btree_cur *cur, /* btree cursor */ 673 int level, /* level in btree */ 674 struct xfs_buf **bpp) /* buffer containing the block */ 675 { 676 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 677 (level == cur->bc_nlevels - 1)) { 678 *bpp = NULL; 679 return xfs_btree_get_iroot(cur); 680 } 681 682 *bpp = cur->bc_bufs[level]; 683 return XFS_BUF_TO_BLOCK(*bpp); 684 } 685 686 /* 687 * Get a buffer for the block, return it with no data read. 688 * Long-form addressing. 689 */ 690 xfs_buf_t * /* buffer for fsbno */ 691 xfs_btree_get_bufl( 692 xfs_mount_t *mp, /* file system mount point */ 693 xfs_trans_t *tp, /* transaction pointer */ 694 xfs_fsblock_t fsbno, /* file system block number */ 695 uint lock) /* lock flags for get_buf */ 696 { 697 xfs_daddr_t d; /* real disk block address */ 698 699 ASSERT(fsbno != NULLFSBLOCK); 700 d = XFS_FSB_TO_DADDR(mp, fsbno); 701 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 702 } 703 704 /* 705 * Get a buffer for the block, return it with no data read. 706 * Short-form addressing. 707 */ 708 xfs_buf_t * /* buffer for agno/agbno */ 709 xfs_btree_get_bufs( 710 xfs_mount_t *mp, /* file system mount point */ 711 xfs_trans_t *tp, /* transaction pointer */ 712 xfs_agnumber_t agno, /* allocation group number */ 713 xfs_agblock_t agbno, /* allocation group block number */ 714 uint lock) /* lock flags for get_buf */ 715 { 716 xfs_daddr_t d; /* real disk block address */ 717 718 ASSERT(agno != NULLAGNUMBER); 719 ASSERT(agbno != NULLAGBLOCK); 720 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 721 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 722 } 723 724 /* 725 * Check for the cursor referring to the last block at the given level. 726 */ 727 int /* 1=is last block, 0=not last block */ 728 xfs_btree_islastblock( 729 xfs_btree_cur_t *cur, /* btree cursor */ 730 int level) /* level to check */ 731 { 732 struct xfs_btree_block *block; /* generic btree block pointer */ 733 xfs_buf_t *bp; /* buffer containing block */ 734 735 block = xfs_btree_get_block(cur, level, &bp); 736 xfs_btree_check_block(cur, block, level, bp); 737 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 738 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 739 else 740 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 741 } 742 743 /* 744 * Change the cursor to point to the first record at the given level. 745 * Other levels are unaffected. 746 */ 747 STATIC int /* success=1, failure=0 */ 748 xfs_btree_firstrec( 749 xfs_btree_cur_t *cur, /* btree cursor */ 750 int level) /* level to change */ 751 { 752 struct xfs_btree_block *block; /* generic btree block pointer */ 753 xfs_buf_t *bp; /* buffer containing block */ 754 755 /* 756 * Get the block pointer for this level. 757 */ 758 block = xfs_btree_get_block(cur, level, &bp); 759 if (xfs_btree_check_block(cur, block, level, bp)) 760 return 0; 761 /* 762 * It's empty, there is no such record. 763 */ 764 if (!block->bb_numrecs) 765 return 0; 766 /* 767 * Set the ptr value to 1, that's the first record/key. 768 */ 769 cur->bc_ptrs[level] = 1; 770 return 1; 771 } 772 773 /* 774 * Change the cursor to point to the last record in the current block 775 * at the given level. Other levels are unaffected. 776 */ 777 STATIC int /* success=1, failure=0 */ 778 xfs_btree_lastrec( 779 xfs_btree_cur_t *cur, /* btree cursor */ 780 int level) /* level to change */ 781 { 782 struct xfs_btree_block *block; /* generic btree block pointer */ 783 xfs_buf_t *bp; /* buffer containing block */ 784 785 /* 786 * Get the block pointer for this level. 787 */ 788 block = xfs_btree_get_block(cur, level, &bp); 789 if (xfs_btree_check_block(cur, block, level, bp)) 790 return 0; 791 /* 792 * It's empty, there is no such record. 793 */ 794 if (!block->bb_numrecs) 795 return 0; 796 /* 797 * Set the ptr value to numrecs, that's the last record/key. 798 */ 799 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 800 return 1; 801 } 802 803 /* 804 * Compute first and last byte offsets for the fields given. 805 * Interprets the offsets table, which contains struct field offsets. 806 */ 807 void 808 xfs_btree_offsets( 809 int64_t fields, /* bitmask of fields */ 810 const short *offsets, /* table of field offsets */ 811 int nbits, /* number of bits to inspect */ 812 int *first, /* output: first byte offset */ 813 int *last) /* output: last byte offset */ 814 { 815 int i; /* current bit number */ 816 int64_t imask; /* mask for current bit number */ 817 818 ASSERT(fields != 0); 819 /* 820 * Find the lowest bit, so the first byte offset. 821 */ 822 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 823 if (imask & fields) { 824 *first = offsets[i]; 825 break; 826 } 827 } 828 /* 829 * Find the highest bit, so the last byte offset. 830 */ 831 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 832 if (imask & fields) { 833 *last = offsets[i + 1] - 1; 834 break; 835 } 836 } 837 } 838 839 /* 840 * Get a buffer for the block, return it read in. 841 * Long-form addressing. 842 */ 843 int 844 xfs_btree_read_bufl( 845 struct xfs_mount *mp, /* file system mount point */ 846 struct xfs_trans *tp, /* transaction pointer */ 847 xfs_fsblock_t fsbno, /* file system block number */ 848 uint lock, /* lock flags for read_buf */ 849 struct xfs_buf **bpp, /* buffer for fsbno */ 850 int refval, /* ref count value for buffer */ 851 const struct xfs_buf_ops *ops) 852 { 853 struct xfs_buf *bp; /* return value */ 854 xfs_daddr_t d; /* real disk block address */ 855 int error; 856 857 if (!xfs_verify_fsbno(mp, fsbno)) 858 return -EFSCORRUPTED; 859 d = XFS_FSB_TO_DADDR(mp, fsbno); 860 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 861 mp->m_bsize, lock, &bp, ops); 862 if (error) 863 return error; 864 if (bp) 865 xfs_buf_set_ref(bp, refval); 866 *bpp = bp; 867 return 0; 868 } 869 870 /* 871 * Read-ahead the block, don't wait for it, don't return a buffer. 872 * Long-form addressing. 873 */ 874 /* ARGSUSED */ 875 void 876 xfs_btree_reada_bufl( 877 struct xfs_mount *mp, /* file system mount point */ 878 xfs_fsblock_t fsbno, /* file system block number */ 879 xfs_extlen_t count, /* count of filesystem blocks */ 880 const struct xfs_buf_ops *ops) 881 { 882 xfs_daddr_t d; 883 884 ASSERT(fsbno != NULLFSBLOCK); 885 d = XFS_FSB_TO_DADDR(mp, fsbno); 886 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 887 } 888 889 /* 890 * Read-ahead the block, don't wait for it, don't return a buffer. 891 * Short-form addressing. 892 */ 893 /* ARGSUSED */ 894 void 895 xfs_btree_reada_bufs( 896 struct xfs_mount *mp, /* file system mount point */ 897 xfs_agnumber_t agno, /* allocation group number */ 898 xfs_agblock_t agbno, /* allocation group block number */ 899 xfs_extlen_t count, /* count of filesystem blocks */ 900 const struct xfs_buf_ops *ops) 901 { 902 xfs_daddr_t d; 903 904 ASSERT(agno != NULLAGNUMBER); 905 ASSERT(agbno != NULLAGBLOCK); 906 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 907 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 908 } 909 910 STATIC int 911 xfs_btree_readahead_lblock( 912 struct xfs_btree_cur *cur, 913 int lr, 914 struct xfs_btree_block *block) 915 { 916 int rval = 0; 917 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 918 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 919 920 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 921 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 922 cur->bc_ops->buf_ops); 923 rval++; 924 } 925 926 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 927 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 928 cur->bc_ops->buf_ops); 929 rval++; 930 } 931 932 return rval; 933 } 934 935 STATIC int 936 xfs_btree_readahead_sblock( 937 struct xfs_btree_cur *cur, 938 int lr, 939 struct xfs_btree_block *block) 940 { 941 int rval = 0; 942 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 943 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 944 945 946 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 947 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 948 left, 1, cur->bc_ops->buf_ops); 949 rval++; 950 } 951 952 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 953 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 954 right, 1, cur->bc_ops->buf_ops); 955 rval++; 956 } 957 958 return rval; 959 } 960 961 /* 962 * Read-ahead btree blocks, at the given level. 963 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 964 */ 965 STATIC int 966 xfs_btree_readahead( 967 struct xfs_btree_cur *cur, /* btree cursor */ 968 int lev, /* level in btree */ 969 int lr) /* left/right bits */ 970 { 971 struct xfs_btree_block *block; 972 973 /* 974 * No readahead needed if we are at the root level and the 975 * btree root is stored in the inode. 976 */ 977 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 978 (lev == cur->bc_nlevels - 1)) 979 return 0; 980 981 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 982 return 0; 983 984 cur->bc_ra[lev] |= lr; 985 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 986 987 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 988 return xfs_btree_readahead_lblock(cur, lr, block); 989 return xfs_btree_readahead_sblock(cur, lr, block); 990 } 991 992 STATIC int 993 xfs_btree_ptr_to_daddr( 994 struct xfs_btree_cur *cur, 995 union xfs_btree_ptr *ptr, 996 xfs_daddr_t *daddr) 997 { 998 xfs_fsblock_t fsbno; 999 xfs_agblock_t agbno; 1000 int error; 1001 1002 error = xfs_btree_check_ptr(cur, ptr, 0, 1); 1003 if (error) 1004 return error; 1005 1006 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1007 fsbno = be64_to_cpu(ptr->l); 1008 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); 1009 } else { 1010 agbno = be32_to_cpu(ptr->s); 1011 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 1012 agbno); 1013 } 1014 1015 return 0; 1016 } 1017 1018 /* 1019 * Readahead @count btree blocks at the given @ptr location. 1020 * 1021 * We don't need to care about long or short form btrees here as we have a 1022 * method of converting the ptr directly to a daddr available to us. 1023 */ 1024 STATIC void 1025 xfs_btree_readahead_ptr( 1026 struct xfs_btree_cur *cur, 1027 union xfs_btree_ptr *ptr, 1028 xfs_extlen_t count) 1029 { 1030 xfs_daddr_t daddr; 1031 1032 if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) 1033 return; 1034 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, 1035 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1036 } 1037 1038 /* 1039 * Set the buffer for level "lev" in the cursor to bp, releasing 1040 * any previous buffer. 1041 */ 1042 STATIC void 1043 xfs_btree_setbuf( 1044 xfs_btree_cur_t *cur, /* btree cursor */ 1045 int lev, /* level in btree */ 1046 xfs_buf_t *bp) /* new buffer to set */ 1047 { 1048 struct xfs_btree_block *b; /* btree block */ 1049 1050 if (cur->bc_bufs[lev]) 1051 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 1052 cur->bc_bufs[lev] = bp; 1053 cur->bc_ra[lev] = 0; 1054 1055 b = XFS_BUF_TO_BLOCK(bp); 1056 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1057 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1058 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1059 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1060 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1061 } else { 1062 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1063 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1064 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1065 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1066 } 1067 } 1068 1069 bool 1070 xfs_btree_ptr_is_null( 1071 struct xfs_btree_cur *cur, 1072 union xfs_btree_ptr *ptr) 1073 { 1074 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1075 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1076 else 1077 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1078 } 1079 1080 STATIC void 1081 xfs_btree_set_ptr_null( 1082 struct xfs_btree_cur *cur, 1083 union xfs_btree_ptr *ptr) 1084 { 1085 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1086 ptr->l = cpu_to_be64(NULLFSBLOCK); 1087 else 1088 ptr->s = cpu_to_be32(NULLAGBLOCK); 1089 } 1090 1091 /* 1092 * Get/set/init sibling pointers 1093 */ 1094 void 1095 xfs_btree_get_sibling( 1096 struct xfs_btree_cur *cur, 1097 struct xfs_btree_block *block, 1098 union xfs_btree_ptr *ptr, 1099 int lr) 1100 { 1101 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1102 1103 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1104 if (lr == XFS_BB_RIGHTSIB) 1105 ptr->l = block->bb_u.l.bb_rightsib; 1106 else 1107 ptr->l = block->bb_u.l.bb_leftsib; 1108 } else { 1109 if (lr == XFS_BB_RIGHTSIB) 1110 ptr->s = block->bb_u.s.bb_rightsib; 1111 else 1112 ptr->s = block->bb_u.s.bb_leftsib; 1113 } 1114 } 1115 1116 STATIC void 1117 xfs_btree_set_sibling( 1118 struct xfs_btree_cur *cur, 1119 struct xfs_btree_block *block, 1120 union xfs_btree_ptr *ptr, 1121 int lr) 1122 { 1123 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1124 1125 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1126 if (lr == XFS_BB_RIGHTSIB) 1127 block->bb_u.l.bb_rightsib = ptr->l; 1128 else 1129 block->bb_u.l.bb_leftsib = ptr->l; 1130 } else { 1131 if (lr == XFS_BB_RIGHTSIB) 1132 block->bb_u.s.bb_rightsib = ptr->s; 1133 else 1134 block->bb_u.s.bb_leftsib = ptr->s; 1135 } 1136 } 1137 1138 void 1139 xfs_btree_init_block_int( 1140 struct xfs_mount *mp, 1141 struct xfs_btree_block *buf, 1142 xfs_daddr_t blkno, 1143 xfs_btnum_t btnum, 1144 __u16 level, 1145 __u16 numrecs, 1146 __u64 owner, 1147 unsigned int flags) 1148 { 1149 int crc = xfs_sb_version_hascrc(&mp->m_sb); 1150 __u32 magic = xfs_btree_magic(crc, btnum); 1151 1152 buf->bb_magic = cpu_to_be32(magic); 1153 buf->bb_level = cpu_to_be16(level); 1154 buf->bb_numrecs = cpu_to_be16(numrecs); 1155 1156 if (flags & XFS_BTREE_LONG_PTRS) { 1157 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1158 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1159 if (crc) { 1160 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1161 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1162 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1163 buf->bb_u.l.bb_pad = 0; 1164 buf->bb_u.l.bb_lsn = 0; 1165 } 1166 } else { 1167 /* owner is a 32 bit value on short blocks */ 1168 __u32 __owner = (__u32)owner; 1169 1170 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1171 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1172 if (crc) { 1173 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1174 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1175 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1176 buf->bb_u.s.bb_lsn = 0; 1177 } 1178 } 1179 } 1180 1181 void 1182 xfs_btree_init_block( 1183 struct xfs_mount *mp, 1184 struct xfs_buf *bp, 1185 xfs_btnum_t btnum, 1186 __u16 level, 1187 __u16 numrecs, 1188 __u64 owner, 1189 unsigned int flags) 1190 { 1191 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1192 btnum, level, numrecs, owner, flags); 1193 } 1194 1195 STATIC void 1196 xfs_btree_init_block_cur( 1197 struct xfs_btree_cur *cur, 1198 struct xfs_buf *bp, 1199 int level, 1200 int numrecs) 1201 { 1202 __u64 owner; 1203 1204 /* 1205 * we can pull the owner from the cursor right now as the different 1206 * owners align directly with the pointer size of the btree. This may 1207 * change in future, but is safe for current users of the generic btree 1208 * code. 1209 */ 1210 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1211 owner = cur->bc_private.b.ip->i_ino; 1212 else 1213 owner = cur->bc_private.a.agno; 1214 1215 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1216 cur->bc_btnum, level, numrecs, 1217 owner, cur->bc_flags); 1218 } 1219 1220 /* 1221 * Return true if ptr is the last record in the btree and 1222 * we need to track updates to this record. The decision 1223 * will be further refined in the update_lastrec method. 1224 */ 1225 STATIC int 1226 xfs_btree_is_lastrec( 1227 struct xfs_btree_cur *cur, 1228 struct xfs_btree_block *block, 1229 int level) 1230 { 1231 union xfs_btree_ptr ptr; 1232 1233 if (level > 0) 1234 return 0; 1235 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1236 return 0; 1237 1238 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1239 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1240 return 0; 1241 return 1; 1242 } 1243 1244 STATIC void 1245 xfs_btree_buf_to_ptr( 1246 struct xfs_btree_cur *cur, 1247 struct xfs_buf *bp, 1248 union xfs_btree_ptr *ptr) 1249 { 1250 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1251 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1252 XFS_BUF_ADDR(bp))); 1253 else { 1254 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1255 XFS_BUF_ADDR(bp))); 1256 } 1257 } 1258 1259 STATIC void 1260 xfs_btree_set_refs( 1261 struct xfs_btree_cur *cur, 1262 struct xfs_buf *bp) 1263 { 1264 switch (cur->bc_btnum) { 1265 case XFS_BTNUM_BNO: 1266 case XFS_BTNUM_CNT: 1267 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1268 break; 1269 case XFS_BTNUM_INO: 1270 case XFS_BTNUM_FINO: 1271 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1272 break; 1273 case XFS_BTNUM_BMAP: 1274 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1275 break; 1276 case XFS_BTNUM_RMAP: 1277 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1278 break; 1279 case XFS_BTNUM_REFC: 1280 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1281 break; 1282 default: 1283 ASSERT(0); 1284 } 1285 } 1286 1287 STATIC int 1288 xfs_btree_get_buf_block( 1289 struct xfs_btree_cur *cur, 1290 union xfs_btree_ptr *ptr, 1291 int flags, 1292 struct xfs_btree_block **block, 1293 struct xfs_buf **bpp) 1294 { 1295 struct xfs_mount *mp = cur->bc_mp; 1296 xfs_daddr_t d; 1297 int error; 1298 1299 /* need to sort out how callers deal with failures first */ 1300 ASSERT(!(flags & XBF_TRYLOCK)); 1301 1302 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1303 if (error) 1304 return error; 1305 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1306 mp->m_bsize, flags); 1307 1308 if (!*bpp) 1309 return -ENOMEM; 1310 1311 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1312 *block = XFS_BUF_TO_BLOCK(*bpp); 1313 return 0; 1314 } 1315 1316 /* 1317 * Read in the buffer at the given ptr and return the buffer and 1318 * the block pointer within the buffer. 1319 */ 1320 STATIC int 1321 xfs_btree_read_buf_block( 1322 struct xfs_btree_cur *cur, 1323 union xfs_btree_ptr *ptr, 1324 int flags, 1325 struct xfs_btree_block **block, 1326 struct xfs_buf **bpp) 1327 { 1328 struct xfs_mount *mp = cur->bc_mp; 1329 xfs_daddr_t d; 1330 int error; 1331 1332 /* need to sort out how callers deal with failures first */ 1333 ASSERT(!(flags & XBF_TRYLOCK)); 1334 1335 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1336 if (error) 1337 return error; 1338 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1339 mp->m_bsize, flags, bpp, 1340 cur->bc_ops->buf_ops); 1341 if (error) 1342 return error; 1343 1344 xfs_btree_set_refs(cur, *bpp); 1345 *block = XFS_BUF_TO_BLOCK(*bpp); 1346 return 0; 1347 } 1348 1349 /* 1350 * Copy keys from one btree block to another. 1351 */ 1352 STATIC void 1353 xfs_btree_copy_keys( 1354 struct xfs_btree_cur *cur, 1355 union xfs_btree_key *dst_key, 1356 union xfs_btree_key *src_key, 1357 int numkeys) 1358 { 1359 ASSERT(numkeys >= 0); 1360 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1361 } 1362 1363 /* 1364 * Copy records from one btree block to another. 1365 */ 1366 STATIC void 1367 xfs_btree_copy_recs( 1368 struct xfs_btree_cur *cur, 1369 union xfs_btree_rec *dst_rec, 1370 union xfs_btree_rec *src_rec, 1371 int numrecs) 1372 { 1373 ASSERT(numrecs >= 0); 1374 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1375 } 1376 1377 /* 1378 * Copy block pointers from one btree block to another. 1379 */ 1380 STATIC void 1381 xfs_btree_copy_ptrs( 1382 struct xfs_btree_cur *cur, 1383 union xfs_btree_ptr *dst_ptr, 1384 union xfs_btree_ptr *src_ptr, 1385 int numptrs) 1386 { 1387 ASSERT(numptrs >= 0); 1388 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1389 } 1390 1391 /* 1392 * Shift keys one index left/right inside a single btree block. 1393 */ 1394 STATIC void 1395 xfs_btree_shift_keys( 1396 struct xfs_btree_cur *cur, 1397 union xfs_btree_key *key, 1398 int dir, 1399 int numkeys) 1400 { 1401 char *dst_key; 1402 1403 ASSERT(numkeys >= 0); 1404 ASSERT(dir == 1 || dir == -1); 1405 1406 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1407 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1408 } 1409 1410 /* 1411 * Shift records one index left/right inside a single btree block. 1412 */ 1413 STATIC void 1414 xfs_btree_shift_recs( 1415 struct xfs_btree_cur *cur, 1416 union xfs_btree_rec *rec, 1417 int dir, 1418 int numrecs) 1419 { 1420 char *dst_rec; 1421 1422 ASSERT(numrecs >= 0); 1423 ASSERT(dir == 1 || dir == -1); 1424 1425 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1426 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1427 } 1428 1429 /* 1430 * Shift block pointers one index left/right inside a single btree block. 1431 */ 1432 STATIC void 1433 xfs_btree_shift_ptrs( 1434 struct xfs_btree_cur *cur, 1435 union xfs_btree_ptr *ptr, 1436 int dir, 1437 int numptrs) 1438 { 1439 char *dst_ptr; 1440 1441 ASSERT(numptrs >= 0); 1442 ASSERT(dir == 1 || dir == -1); 1443 1444 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1445 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1446 } 1447 1448 /* 1449 * Log key values from the btree block. 1450 */ 1451 STATIC void 1452 xfs_btree_log_keys( 1453 struct xfs_btree_cur *cur, 1454 struct xfs_buf *bp, 1455 int first, 1456 int last) 1457 { 1458 1459 if (bp) { 1460 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1461 xfs_trans_log_buf(cur->bc_tp, bp, 1462 xfs_btree_key_offset(cur, first), 1463 xfs_btree_key_offset(cur, last + 1) - 1); 1464 } else { 1465 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1466 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1467 } 1468 } 1469 1470 /* 1471 * Log record values from the btree block. 1472 */ 1473 void 1474 xfs_btree_log_recs( 1475 struct xfs_btree_cur *cur, 1476 struct xfs_buf *bp, 1477 int first, 1478 int last) 1479 { 1480 1481 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1482 xfs_trans_log_buf(cur->bc_tp, bp, 1483 xfs_btree_rec_offset(cur, first), 1484 xfs_btree_rec_offset(cur, last + 1) - 1); 1485 1486 } 1487 1488 /* 1489 * Log block pointer fields from a btree block (nonleaf). 1490 */ 1491 STATIC void 1492 xfs_btree_log_ptrs( 1493 struct xfs_btree_cur *cur, /* btree cursor */ 1494 struct xfs_buf *bp, /* buffer containing btree block */ 1495 int first, /* index of first pointer to log */ 1496 int last) /* index of last pointer to log */ 1497 { 1498 1499 if (bp) { 1500 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1501 int level = xfs_btree_get_level(block); 1502 1503 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1504 xfs_trans_log_buf(cur->bc_tp, bp, 1505 xfs_btree_ptr_offset(cur, first, level), 1506 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1507 } else { 1508 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1509 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1510 } 1511 1512 } 1513 1514 /* 1515 * Log fields from a btree block header. 1516 */ 1517 void 1518 xfs_btree_log_block( 1519 struct xfs_btree_cur *cur, /* btree cursor */ 1520 struct xfs_buf *bp, /* buffer containing btree block */ 1521 int fields) /* mask of fields: XFS_BB_... */ 1522 { 1523 int first; /* first byte offset logged */ 1524 int last; /* last byte offset logged */ 1525 static const short soffsets[] = { /* table of offsets (short) */ 1526 offsetof(struct xfs_btree_block, bb_magic), 1527 offsetof(struct xfs_btree_block, bb_level), 1528 offsetof(struct xfs_btree_block, bb_numrecs), 1529 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1530 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1531 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1532 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1533 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1534 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1535 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1536 XFS_BTREE_SBLOCK_CRC_LEN 1537 }; 1538 static const short loffsets[] = { /* table of offsets (long) */ 1539 offsetof(struct xfs_btree_block, bb_magic), 1540 offsetof(struct xfs_btree_block, bb_level), 1541 offsetof(struct xfs_btree_block, bb_numrecs), 1542 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1543 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1544 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1545 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1546 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1547 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1548 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1549 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1550 XFS_BTREE_LBLOCK_CRC_LEN 1551 }; 1552 1553 if (bp) { 1554 int nbits; 1555 1556 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1557 /* 1558 * We don't log the CRC when updating a btree 1559 * block but instead recreate it during log 1560 * recovery. As the log buffers have checksums 1561 * of their own this is safe and avoids logging a crc 1562 * update in a lot of places. 1563 */ 1564 if (fields == XFS_BB_ALL_BITS) 1565 fields = XFS_BB_ALL_BITS_CRC; 1566 nbits = XFS_BB_NUM_BITS_CRC; 1567 } else { 1568 nbits = XFS_BB_NUM_BITS; 1569 } 1570 xfs_btree_offsets(fields, 1571 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1572 loffsets : soffsets, 1573 nbits, &first, &last); 1574 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1575 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1576 } else { 1577 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1578 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1579 } 1580 } 1581 1582 /* 1583 * Increment cursor by one record at the level. 1584 * For nonzero levels the leaf-ward information is untouched. 1585 */ 1586 int /* error */ 1587 xfs_btree_increment( 1588 struct xfs_btree_cur *cur, 1589 int level, 1590 int *stat) /* success/failure */ 1591 { 1592 struct xfs_btree_block *block; 1593 union xfs_btree_ptr ptr; 1594 struct xfs_buf *bp; 1595 int error; /* error return value */ 1596 int lev; 1597 1598 ASSERT(level < cur->bc_nlevels); 1599 1600 /* Read-ahead to the right at this level. */ 1601 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1602 1603 /* Get a pointer to the btree block. */ 1604 block = xfs_btree_get_block(cur, level, &bp); 1605 1606 #ifdef DEBUG 1607 error = xfs_btree_check_block(cur, block, level, bp); 1608 if (error) 1609 goto error0; 1610 #endif 1611 1612 /* We're done if we remain in the block after the increment. */ 1613 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1614 goto out1; 1615 1616 /* Fail if we just went off the right edge of the tree. */ 1617 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1618 if (xfs_btree_ptr_is_null(cur, &ptr)) 1619 goto out0; 1620 1621 XFS_BTREE_STATS_INC(cur, increment); 1622 1623 /* 1624 * March up the tree incrementing pointers. 1625 * Stop when we don't go off the right edge of a block. 1626 */ 1627 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1628 block = xfs_btree_get_block(cur, lev, &bp); 1629 1630 #ifdef DEBUG 1631 error = xfs_btree_check_block(cur, block, lev, bp); 1632 if (error) 1633 goto error0; 1634 #endif 1635 1636 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1637 break; 1638 1639 /* Read-ahead the right block for the next loop. */ 1640 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1641 } 1642 1643 /* 1644 * If we went off the root then we are either seriously 1645 * confused or have the tree root in an inode. 1646 */ 1647 if (lev == cur->bc_nlevels) { 1648 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1649 goto out0; 1650 ASSERT(0); 1651 error = -EFSCORRUPTED; 1652 goto error0; 1653 } 1654 ASSERT(lev < cur->bc_nlevels); 1655 1656 /* 1657 * Now walk back down the tree, fixing up the cursor's buffer 1658 * pointers and key numbers. 1659 */ 1660 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1661 union xfs_btree_ptr *ptrp; 1662 1663 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1664 --lev; 1665 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1666 if (error) 1667 goto error0; 1668 1669 xfs_btree_setbuf(cur, lev, bp); 1670 cur->bc_ptrs[lev] = 1; 1671 } 1672 out1: 1673 *stat = 1; 1674 return 0; 1675 1676 out0: 1677 *stat = 0; 1678 return 0; 1679 1680 error0: 1681 return error; 1682 } 1683 1684 /* 1685 * Decrement cursor by one record at the level. 1686 * For nonzero levels the leaf-ward information is untouched. 1687 */ 1688 int /* error */ 1689 xfs_btree_decrement( 1690 struct xfs_btree_cur *cur, 1691 int level, 1692 int *stat) /* success/failure */ 1693 { 1694 struct xfs_btree_block *block; 1695 xfs_buf_t *bp; 1696 int error; /* error return value */ 1697 int lev; 1698 union xfs_btree_ptr ptr; 1699 1700 ASSERT(level < cur->bc_nlevels); 1701 1702 /* Read-ahead to the left at this level. */ 1703 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1704 1705 /* We're done if we remain in the block after the decrement. */ 1706 if (--cur->bc_ptrs[level] > 0) 1707 goto out1; 1708 1709 /* Get a pointer to the btree block. */ 1710 block = xfs_btree_get_block(cur, level, &bp); 1711 1712 #ifdef DEBUG 1713 error = xfs_btree_check_block(cur, block, level, bp); 1714 if (error) 1715 goto error0; 1716 #endif 1717 1718 /* Fail if we just went off the left edge of the tree. */ 1719 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1720 if (xfs_btree_ptr_is_null(cur, &ptr)) 1721 goto out0; 1722 1723 XFS_BTREE_STATS_INC(cur, decrement); 1724 1725 /* 1726 * March up the tree decrementing pointers. 1727 * Stop when we don't go off the left edge of a block. 1728 */ 1729 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1730 if (--cur->bc_ptrs[lev] > 0) 1731 break; 1732 /* Read-ahead the left block for the next loop. */ 1733 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1734 } 1735 1736 /* 1737 * If we went off the root then we are seriously confused. 1738 * or the root of the tree is in an inode. 1739 */ 1740 if (lev == cur->bc_nlevels) { 1741 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1742 goto out0; 1743 ASSERT(0); 1744 error = -EFSCORRUPTED; 1745 goto error0; 1746 } 1747 ASSERT(lev < cur->bc_nlevels); 1748 1749 /* 1750 * Now walk back down the tree, fixing up the cursor's buffer 1751 * pointers and key numbers. 1752 */ 1753 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1754 union xfs_btree_ptr *ptrp; 1755 1756 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1757 --lev; 1758 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1759 if (error) 1760 goto error0; 1761 xfs_btree_setbuf(cur, lev, bp); 1762 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1763 } 1764 out1: 1765 *stat = 1; 1766 return 0; 1767 1768 out0: 1769 *stat = 0; 1770 return 0; 1771 1772 error0: 1773 return error; 1774 } 1775 1776 int 1777 xfs_btree_lookup_get_block( 1778 struct xfs_btree_cur *cur, /* btree cursor */ 1779 int level, /* level in the btree */ 1780 union xfs_btree_ptr *pp, /* ptr to btree block */ 1781 struct xfs_btree_block **blkp) /* return btree block */ 1782 { 1783 struct xfs_buf *bp; /* buffer pointer for btree block */ 1784 xfs_daddr_t daddr; 1785 int error = 0; 1786 1787 /* special case the root block if in an inode */ 1788 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1789 (level == cur->bc_nlevels - 1)) { 1790 *blkp = xfs_btree_get_iroot(cur); 1791 return 0; 1792 } 1793 1794 /* 1795 * If the old buffer at this level for the disk address we are 1796 * looking for re-use it. 1797 * 1798 * Otherwise throw it away and get a new one. 1799 */ 1800 bp = cur->bc_bufs[level]; 1801 error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); 1802 if (error) 1803 return error; 1804 if (bp && XFS_BUF_ADDR(bp) == daddr) { 1805 *blkp = XFS_BUF_TO_BLOCK(bp); 1806 return 0; 1807 } 1808 1809 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1810 if (error) 1811 return error; 1812 1813 /* Check the inode owner since the verifiers don't. */ 1814 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1815 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && 1816 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1817 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1818 cur->bc_private.b.ip->i_ino) 1819 goto out_bad; 1820 1821 /* Did we get the level we were looking for? */ 1822 if (be16_to_cpu((*blkp)->bb_level) != level) 1823 goto out_bad; 1824 1825 /* Check that internal nodes have at least one record. */ 1826 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1827 goto out_bad; 1828 1829 xfs_btree_setbuf(cur, level, bp); 1830 return 0; 1831 1832 out_bad: 1833 *blkp = NULL; 1834 xfs_trans_brelse(cur->bc_tp, bp); 1835 return -EFSCORRUPTED; 1836 } 1837 1838 /* 1839 * Get current search key. For level 0 we don't actually have a key 1840 * structure so we make one up from the record. For all other levels 1841 * we just return the right key. 1842 */ 1843 STATIC union xfs_btree_key * 1844 xfs_lookup_get_search_key( 1845 struct xfs_btree_cur *cur, 1846 int level, 1847 int keyno, 1848 struct xfs_btree_block *block, 1849 union xfs_btree_key *kp) 1850 { 1851 if (level == 0) { 1852 cur->bc_ops->init_key_from_rec(kp, 1853 xfs_btree_rec_addr(cur, keyno, block)); 1854 return kp; 1855 } 1856 1857 return xfs_btree_key_addr(cur, keyno, block); 1858 } 1859 1860 /* 1861 * Lookup the record. The cursor is made to point to it, based on dir. 1862 * stat is set to 0 if can't find any such record, 1 for success. 1863 */ 1864 int /* error */ 1865 xfs_btree_lookup( 1866 struct xfs_btree_cur *cur, /* btree cursor */ 1867 xfs_lookup_t dir, /* <=, ==, or >= */ 1868 int *stat) /* success/failure */ 1869 { 1870 struct xfs_btree_block *block; /* current btree block */ 1871 int64_t diff; /* difference for the current key */ 1872 int error; /* error return value */ 1873 int keyno; /* current key number */ 1874 int level; /* level in the btree */ 1875 union xfs_btree_ptr *pp; /* ptr to btree block */ 1876 union xfs_btree_ptr ptr; /* ptr to btree block */ 1877 1878 XFS_BTREE_STATS_INC(cur, lookup); 1879 1880 /* No such thing as a zero-level tree. */ 1881 if (cur->bc_nlevels == 0) 1882 return -EFSCORRUPTED; 1883 1884 block = NULL; 1885 keyno = 0; 1886 1887 /* initialise start pointer from cursor */ 1888 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1889 pp = &ptr; 1890 1891 /* 1892 * Iterate over each level in the btree, starting at the root. 1893 * For each level above the leaves, find the key we need, based 1894 * on the lookup record, then follow the corresponding block 1895 * pointer down to the next level. 1896 */ 1897 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1898 /* Get the block we need to do the lookup on. */ 1899 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1900 if (error) 1901 goto error0; 1902 1903 if (diff == 0) { 1904 /* 1905 * If we already had a key match at a higher level, we 1906 * know we need to use the first entry in this block. 1907 */ 1908 keyno = 1; 1909 } else { 1910 /* Otherwise search this block. Do a binary search. */ 1911 1912 int high; /* high entry number */ 1913 int low; /* low entry number */ 1914 1915 /* Set low and high entry numbers, 1-based. */ 1916 low = 1; 1917 high = xfs_btree_get_numrecs(block); 1918 if (!high) { 1919 /* Block is empty, must be an empty leaf. */ 1920 if (level != 0 || cur->bc_nlevels != 1) { 1921 XFS_CORRUPTION_ERROR(__func__, 1922 XFS_ERRLEVEL_LOW, 1923 cur->bc_mp, block, 1924 sizeof(*block)); 1925 return -EFSCORRUPTED; 1926 } 1927 1928 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1929 *stat = 0; 1930 return 0; 1931 } 1932 1933 /* Binary search the block. */ 1934 while (low <= high) { 1935 union xfs_btree_key key; 1936 union xfs_btree_key *kp; 1937 1938 XFS_BTREE_STATS_INC(cur, compare); 1939 1940 /* keyno is average of low and high. */ 1941 keyno = (low + high) >> 1; 1942 1943 /* Get current search key */ 1944 kp = xfs_lookup_get_search_key(cur, level, 1945 keyno, block, &key); 1946 1947 /* 1948 * Compute difference to get next direction: 1949 * - less than, move right 1950 * - greater than, move left 1951 * - equal, we're done 1952 */ 1953 diff = cur->bc_ops->key_diff(cur, kp); 1954 if (diff < 0) 1955 low = keyno + 1; 1956 else if (diff > 0) 1957 high = keyno - 1; 1958 else 1959 break; 1960 } 1961 } 1962 1963 /* 1964 * If there are more levels, set up for the next level 1965 * by getting the block number and filling in the cursor. 1966 */ 1967 if (level > 0) { 1968 /* 1969 * If we moved left, need the previous key number, 1970 * unless there isn't one. 1971 */ 1972 if (diff > 0 && --keyno < 1) 1973 keyno = 1; 1974 pp = xfs_btree_ptr_addr(cur, keyno, block); 1975 1976 error = xfs_btree_debug_check_ptr(cur, pp, 0, level); 1977 if (error) 1978 goto error0; 1979 1980 cur->bc_ptrs[level] = keyno; 1981 } 1982 } 1983 1984 /* Done with the search. See if we need to adjust the results. */ 1985 if (dir != XFS_LOOKUP_LE && diff < 0) { 1986 keyno++; 1987 /* 1988 * If ge search and we went off the end of the block, but it's 1989 * not the last block, we're in the wrong block. 1990 */ 1991 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1992 if (dir == XFS_LOOKUP_GE && 1993 keyno > xfs_btree_get_numrecs(block) && 1994 !xfs_btree_ptr_is_null(cur, &ptr)) { 1995 int i; 1996 1997 cur->bc_ptrs[0] = keyno; 1998 error = xfs_btree_increment(cur, 0, &i); 1999 if (error) 2000 goto error0; 2001 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 2002 *stat = 1; 2003 return 0; 2004 } 2005 } else if (dir == XFS_LOOKUP_LE && diff > 0) 2006 keyno--; 2007 cur->bc_ptrs[0] = keyno; 2008 2009 /* Return if we succeeded or not. */ 2010 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 2011 *stat = 0; 2012 else if (dir != XFS_LOOKUP_EQ || diff == 0) 2013 *stat = 1; 2014 else 2015 *stat = 0; 2016 return 0; 2017 2018 error0: 2019 return error; 2020 } 2021 2022 /* Find the high key storage area from a regular key. */ 2023 union xfs_btree_key * 2024 xfs_btree_high_key_from_key( 2025 struct xfs_btree_cur *cur, 2026 union xfs_btree_key *key) 2027 { 2028 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2029 return (union xfs_btree_key *)((char *)key + 2030 (cur->bc_ops->key_len / 2)); 2031 } 2032 2033 /* Determine the low (and high if overlapped) keys of a leaf block */ 2034 STATIC void 2035 xfs_btree_get_leaf_keys( 2036 struct xfs_btree_cur *cur, 2037 struct xfs_btree_block *block, 2038 union xfs_btree_key *key) 2039 { 2040 union xfs_btree_key max_hkey; 2041 union xfs_btree_key hkey; 2042 union xfs_btree_rec *rec; 2043 union xfs_btree_key *high; 2044 int n; 2045 2046 rec = xfs_btree_rec_addr(cur, 1, block); 2047 cur->bc_ops->init_key_from_rec(key, rec); 2048 2049 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2050 2051 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2052 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2053 rec = xfs_btree_rec_addr(cur, n, block); 2054 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2055 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 2056 > 0) 2057 max_hkey = hkey; 2058 } 2059 2060 high = xfs_btree_high_key_from_key(cur, key); 2061 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2062 } 2063 } 2064 2065 /* Determine the low (and high if overlapped) keys of a node block */ 2066 STATIC void 2067 xfs_btree_get_node_keys( 2068 struct xfs_btree_cur *cur, 2069 struct xfs_btree_block *block, 2070 union xfs_btree_key *key) 2071 { 2072 union xfs_btree_key *hkey; 2073 union xfs_btree_key *max_hkey; 2074 union xfs_btree_key *high; 2075 int n; 2076 2077 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2078 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2079 cur->bc_ops->key_len / 2); 2080 2081 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2082 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2083 hkey = xfs_btree_high_key_addr(cur, n, block); 2084 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 2085 max_hkey = hkey; 2086 } 2087 2088 high = xfs_btree_high_key_from_key(cur, key); 2089 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2090 } else { 2091 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2092 cur->bc_ops->key_len); 2093 } 2094 } 2095 2096 /* Derive the keys for any btree block. */ 2097 void 2098 xfs_btree_get_keys( 2099 struct xfs_btree_cur *cur, 2100 struct xfs_btree_block *block, 2101 union xfs_btree_key *key) 2102 { 2103 if (be16_to_cpu(block->bb_level) == 0) 2104 xfs_btree_get_leaf_keys(cur, block, key); 2105 else 2106 xfs_btree_get_node_keys(cur, block, key); 2107 } 2108 2109 /* 2110 * Decide if we need to update the parent keys of a btree block. For 2111 * a standard btree this is only necessary if we're updating the first 2112 * record/key. For an overlapping btree, we must always update the 2113 * keys because the highest key can be in any of the records or keys 2114 * in the block. 2115 */ 2116 static inline bool 2117 xfs_btree_needs_key_update( 2118 struct xfs_btree_cur *cur, 2119 int ptr) 2120 { 2121 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2122 } 2123 2124 /* 2125 * Update the low and high parent keys of the given level, progressing 2126 * towards the root. If force_all is false, stop if the keys for a given 2127 * level do not need updating. 2128 */ 2129 STATIC int 2130 __xfs_btree_updkeys( 2131 struct xfs_btree_cur *cur, 2132 int level, 2133 struct xfs_btree_block *block, 2134 struct xfs_buf *bp0, 2135 bool force_all) 2136 { 2137 union xfs_btree_key key; /* keys from current level */ 2138 union xfs_btree_key *lkey; /* keys from the next level up */ 2139 union xfs_btree_key *hkey; 2140 union xfs_btree_key *nlkey; /* keys from the next level up */ 2141 union xfs_btree_key *nhkey; 2142 struct xfs_buf *bp; 2143 int ptr; 2144 2145 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2146 2147 /* Exit if there aren't any parent levels to update. */ 2148 if (level + 1 >= cur->bc_nlevels) 2149 return 0; 2150 2151 trace_xfs_btree_updkeys(cur, level, bp0); 2152 2153 lkey = &key; 2154 hkey = xfs_btree_high_key_from_key(cur, lkey); 2155 xfs_btree_get_keys(cur, block, lkey); 2156 for (level++; level < cur->bc_nlevels; level++) { 2157 #ifdef DEBUG 2158 int error; 2159 #endif 2160 block = xfs_btree_get_block(cur, level, &bp); 2161 trace_xfs_btree_updkeys(cur, level, bp); 2162 #ifdef DEBUG 2163 error = xfs_btree_check_block(cur, block, level, bp); 2164 if (error) 2165 return error; 2166 #endif 2167 ptr = cur->bc_ptrs[level]; 2168 nlkey = xfs_btree_key_addr(cur, ptr, block); 2169 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2170 if (!force_all && 2171 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2172 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2173 break; 2174 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2175 xfs_btree_log_keys(cur, bp, ptr, ptr); 2176 if (level + 1 >= cur->bc_nlevels) 2177 break; 2178 xfs_btree_get_node_keys(cur, block, lkey); 2179 } 2180 2181 return 0; 2182 } 2183 2184 /* Update all the keys from some level in cursor back to the root. */ 2185 STATIC int 2186 xfs_btree_updkeys_force( 2187 struct xfs_btree_cur *cur, 2188 int level) 2189 { 2190 struct xfs_buf *bp; 2191 struct xfs_btree_block *block; 2192 2193 block = xfs_btree_get_block(cur, level, &bp); 2194 return __xfs_btree_updkeys(cur, level, block, bp, true); 2195 } 2196 2197 /* 2198 * Update the parent keys of the given level, progressing towards the root. 2199 */ 2200 STATIC int 2201 xfs_btree_update_keys( 2202 struct xfs_btree_cur *cur, 2203 int level) 2204 { 2205 struct xfs_btree_block *block; 2206 struct xfs_buf *bp; 2207 union xfs_btree_key *kp; 2208 union xfs_btree_key key; 2209 int ptr; 2210 2211 ASSERT(level >= 0); 2212 2213 block = xfs_btree_get_block(cur, level, &bp); 2214 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2215 return __xfs_btree_updkeys(cur, level, block, bp, false); 2216 2217 /* 2218 * Go up the tree from this level toward the root. 2219 * At each level, update the key value to the value input. 2220 * Stop when we reach a level where the cursor isn't pointing 2221 * at the first entry in the block. 2222 */ 2223 xfs_btree_get_keys(cur, block, &key); 2224 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2225 #ifdef DEBUG 2226 int error; 2227 #endif 2228 block = xfs_btree_get_block(cur, level, &bp); 2229 #ifdef DEBUG 2230 error = xfs_btree_check_block(cur, block, level, bp); 2231 if (error) 2232 return error; 2233 #endif 2234 ptr = cur->bc_ptrs[level]; 2235 kp = xfs_btree_key_addr(cur, ptr, block); 2236 xfs_btree_copy_keys(cur, kp, &key, 1); 2237 xfs_btree_log_keys(cur, bp, ptr, ptr); 2238 } 2239 2240 return 0; 2241 } 2242 2243 /* 2244 * Update the record referred to by cur to the value in the 2245 * given record. This either works (return 0) or gets an 2246 * EFSCORRUPTED error. 2247 */ 2248 int 2249 xfs_btree_update( 2250 struct xfs_btree_cur *cur, 2251 union xfs_btree_rec *rec) 2252 { 2253 struct xfs_btree_block *block; 2254 struct xfs_buf *bp; 2255 int error; 2256 int ptr; 2257 union xfs_btree_rec *rp; 2258 2259 /* Pick up the current block. */ 2260 block = xfs_btree_get_block(cur, 0, &bp); 2261 2262 #ifdef DEBUG 2263 error = xfs_btree_check_block(cur, block, 0, bp); 2264 if (error) 2265 goto error0; 2266 #endif 2267 /* Get the address of the rec to be updated. */ 2268 ptr = cur->bc_ptrs[0]; 2269 rp = xfs_btree_rec_addr(cur, ptr, block); 2270 2271 /* Fill in the new contents and log them. */ 2272 xfs_btree_copy_recs(cur, rp, rec, 1); 2273 xfs_btree_log_recs(cur, bp, ptr, ptr); 2274 2275 /* 2276 * If we are tracking the last record in the tree and 2277 * we are at the far right edge of the tree, update it. 2278 */ 2279 if (xfs_btree_is_lastrec(cur, block, 0)) { 2280 cur->bc_ops->update_lastrec(cur, block, rec, 2281 ptr, LASTREC_UPDATE); 2282 } 2283 2284 /* Pass new key value up to our parent. */ 2285 if (xfs_btree_needs_key_update(cur, ptr)) { 2286 error = xfs_btree_update_keys(cur, 0); 2287 if (error) 2288 goto error0; 2289 } 2290 2291 return 0; 2292 2293 error0: 2294 return error; 2295 } 2296 2297 /* 2298 * Move 1 record left from cur/level if possible. 2299 * Update cur to reflect the new path. 2300 */ 2301 STATIC int /* error */ 2302 xfs_btree_lshift( 2303 struct xfs_btree_cur *cur, 2304 int level, 2305 int *stat) /* success/failure */ 2306 { 2307 struct xfs_buf *lbp; /* left buffer pointer */ 2308 struct xfs_btree_block *left; /* left btree block */ 2309 int lrecs; /* left record count */ 2310 struct xfs_buf *rbp; /* right buffer pointer */ 2311 struct xfs_btree_block *right; /* right btree block */ 2312 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2313 int rrecs; /* right record count */ 2314 union xfs_btree_ptr lptr; /* left btree pointer */ 2315 union xfs_btree_key *rkp = NULL; /* right btree key */ 2316 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2317 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2318 int error; /* error return value */ 2319 int i; 2320 2321 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2322 level == cur->bc_nlevels - 1) 2323 goto out0; 2324 2325 /* Set up variables for this block as "right". */ 2326 right = xfs_btree_get_block(cur, level, &rbp); 2327 2328 #ifdef DEBUG 2329 error = xfs_btree_check_block(cur, right, level, rbp); 2330 if (error) 2331 goto error0; 2332 #endif 2333 2334 /* If we've got no left sibling then we can't shift an entry left. */ 2335 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2336 if (xfs_btree_ptr_is_null(cur, &lptr)) 2337 goto out0; 2338 2339 /* 2340 * If the cursor entry is the one that would be moved, don't 2341 * do it... it's too complicated. 2342 */ 2343 if (cur->bc_ptrs[level] <= 1) 2344 goto out0; 2345 2346 /* Set up the left neighbor as "left". */ 2347 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2348 if (error) 2349 goto error0; 2350 2351 /* If it's full, it can't take another entry. */ 2352 lrecs = xfs_btree_get_numrecs(left); 2353 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2354 goto out0; 2355 2356 rrecs = xfs_btree_get_numrecs(right); 2357 2358 /* 2359 * We add one entry to the left side and remove one for the right side. 2360 * Account for it here, the changes will be updated on disk and logged 2361 * later. 2362 */ 2363 lrecs++; 2364 rrecs--; 2365 2366 XFS_BTREE_STATS_INC(cur, lshift); 2367 XFS_BTREE_STATS_ADD(cur, moves, 1); 2368 2369 /* 2370 * If non-leaf, copy a key and a ptr to the left block. 2371 * Log the changes to the left block. 2372 */ 2373 if (level > 0) { 2374 /* It's a non-leaf. Move keys and pointers. */ 2375 union xfs_btree_key *lkp; /* left btree key */ 2376 union xfs_btree_ptr *lpp; /* left address pointer */ 2377 2378 lkp = xfs_btree_key_addr(cur, lrecs, left); 2379 rkp = xfs_btree_key_addr(cur, 1, right); 2380 2381 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2382 rpp = xfs_btree_ptr_addr(cur, 1, right); 2383 2384 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); 2385 if (error) 2386 goto error0; 2387 2388 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2389 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2390 2391 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2392 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2393 2394 ASSERT(cur->bc_ops->keys_inorder(cur, 2395 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2396 } else { 2397 /* It's a leaf. Move records. */ 2398 union xfs_btree_rec *lrp; /* left record pointer */ 2399 2400 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2401 rrp = xfs_btree_rec_addr(cur, 1, right); 2402 2403 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2404 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2405 2406 ASSERT(cur->bc_ops->recs_inorder(cur, 2407 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2408 } 2409 2410 xfs_btree_set_numrecs(left, lrecs); 2411 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2412 2413 xfs_btree_set_numrecs(right, rrecs); 2414 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2415 2416 /* 2417 * Slide the contents of right down one entry. 2418 */ 2419 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2420 if (level > 0) { 2421 /* It's a nonleaf. operate on keys and ptrs */ 2422 int i; /* loop index */ 2423 2424 for (i = 0; i < rrecs; i++) { 2425 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); 2426 if (error) 2427 goto error0; 2428 } 2429 2430 xfs_btree_shift_keys(cur, 2431 xfs_btree_key_addr(cur, 2, right), 2432 -1, rrecs); 2433 xfs_btree_shift_ptrs(cur, 2434 xfs_btree_ptr_addr(cur, 2, right), 2435 -1, rrecs); 2436 2437 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2438 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2439 } else { 2440 /* It's a leaf. operate on records */ 2441 xfs_btree_shift_recs(cur, 2442 xfs_btree_rec_addr(cur, 2, right), 2443 -1, rrecs); 2444 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2445 } 2446 2447 /* 2448 * Using a temporary cursor, update the parent key values of the 2449 * block on the left. 2450 */ 2451 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2452 error = xfs_btree_dup_cursor(cur, &tcur); 2453 if (error) 2454 goto error0; 2455 i = xfs_btree_firstrec(tcur, level); 2456 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2457 2458 error = xfs_btree_decrement(tcur, level, &i); 2459 if (error) 2460 goto error1; 2461 2462 /* Update the parent high keys of the left block, if needed. */ 2463 error = xfs_btree_update_keys(tcur, level); 2464 if (error) 2465 goto error1; 2466 2467 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2468 } 2469 2470 /* Update the parent keys of the right block. */ 2471 error = xfs_btree_update_keys(cur, level); 2472 if (error) 2473 goto error0; 2474 2475 /* Slide the cursor value left one. */ 2476 cur->bc_ptrs[level]--; 2477 2478 *stat = 1; 2479 return 0; 2480 2481 out0: 2482 *stat = 0; 2483 return 0; 2484 2485 error0: 2486 return error; 2487 2488 error1: 2489 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2490 return error; 2491 } 2492 2493 /* 2494 * Move 1 record right from cur/level if possible. 2495 * Update cur to reflect the new path. 2496 */ 2497 STATIC int /* error */ 2498 xfs_btree_rshift( 2499 struct xfs_btree_cur *cur, 2500 int level, 2501 int *stat) /* success/failure */ 2502 { 2503 struct xfs_buf *lbp; /* left buffer pointer */ 2504 struct xfs_btree_block *left; /* left btree block */ 2505 struct xfs_buf *rbp; /* right buffer pointer */ 2506 struct xfs_btree_block *right; /* right btree block */ 2507 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2508 union xfs_btree_ptr rptr; /* right block pointer */ 2509 union xfs_btree_key *rkp; /* right btree key */ 2510 int rrecs; /* right record count */ 2511 int lrecs; /* left record count */ 2512 int error; /* error return value */ 2513 int i; /* loop counter */ 2514 2515 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2516 (level == cur->bc_nlevels - 1)) 2517 goto out0; 2518 2519 /* Set up variables for this block as "left". */ 2520 left = xfs_btree_get_block(cur, level, &lbp); 2521 2522 #ifdef DEBUG 2523 error = xfs_btree_check_block(cur, left, level, lbp); 2524 if (error) 2525 goto error0; 2526 #endif 2527 2528 /* If we've got no right sibling then we can't shift an entry right. */ 2529 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2530 if (xfs_btree_ptr_is_null(cur, &rptr)) 2531 goto out0; 2532 2533 /* 2534 * If the cursor entry is the one that would be moved, don't 2535 * do it... it's too complicated. 2536 */ 2537 lrecs = xfs_btree_get_numrecs(left); 2538 if (cur->bc_ptrs[level] >= lrecs) 2539 goto out0; 2540 2541 /* Set up the right neighbor as "right". */ 2542 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2543 if (error) 2544 goto error0; 2545 2546 /* If it's full, it can't take another entry. */ 2547 rrecs = xfs_btree_get_numrecs(right); 2548 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2549 goto out0; 2550 2551 XFS_BTREE_STATS_INC(cur, rshift); 2552 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2553 2554 /* 2555 * Make a hole at the start of the right neighbor block, then 2556 * copy the last left block entry to the hole. 2557 */ 2558 if (level > 0) { 2559 /* It's a nonleaf. make a hole in the keys and ptrs */ 2560 union xfs_btree_key *lkp; 2561 union xfs_btree_ptr *lpp; 2562 union xfs_btree_ptr *rpp; 2563 2564 lkp = xfs_btree_key_addr(cur, lrecs, left); 2565 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2566 rkp = xfs_btree_key_addr(cur, 1, right); 2567 rpp = xfs_btree_ptr_addr(cur, 1, right); 2568 2569 for (i = rrecs - 1; i >= 0; i--) { 2570 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 2571 if (error) 2572 goto error0; 2573 } 2574 2575 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2576 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2577 2578 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); 2579 if (error) 2580 goto error0; 2581 2582 /* Now put the new data in, and log it. */ 2583 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2584 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2585 2586 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2587 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2588 2589 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2590 xfs_btree_key_addr(cur, 2, right))); 2591 } else { 2592 /* It's a leaf. make a hole in the records */ 2593 union xfs_btree_rec *lrp; 2594 union xfs_btree_rec *rrp; 2595 2596 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2597 rrp = xfs_btree_rec_addr(cur, 1, right); 2598 2599 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2600 2601 /* Now put the new data in, and log it. */ 2602 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2603 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2604 } 2605 2606 /* 2607 * Decrement and log left's numrecs, bump and log right's numrecs. 2608 */ 2609 xfs_btree_set_numrecs(left, --lrecs); 2610 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2611 2612 xfs_btree_set_numrecs(right, ++rrecs); 2613 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2614 2615 /* 2616 * Using a temporary cursor, update the parent key values of the 2617 * block on the right. 2618 */ 2619 error = xfs_btree_dup_cursor(cur, &tcur); 2620 if (error) 2621 goto error0; 2622 i = xfs_btree_lastrec(tcur, level); 2623 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2624 2625 error = xfs_btree_increment(tcur, level, &i); 2626 if (error) 2627 goto error1; 2628 2629 /* Update the parent high keys of the left block, if needed. */ 2630 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2631 error = xfs_btree_update_keys(cur, level); 2632 if (error) 2633 goto error1; 2634 } 2635 2636 /* Update the parent keys of the right block. */ 2637 error = xfs_btree_update_keys(tcur, level); 2638 if (error) 2639 goto error1; 2640 2641 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2642 2643 *stat = 1; 2644 return 0; 2645 2646 out0: 2647 *stat = 0; 2648 return 0; 2649 2650 error0: 2651 return error; 2652 2653 error1: 2654 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2655 return error; 2656 } 2657 2658 /* 2659 * Split cur/level block in half. 2660 * Return new block number and the key to its first 2661 * record (to be inserted into parent). 2662 */ 2663 STATIC int /* error */ 2664 __xfs_btree_split( 2665 struct xfs_btree_cur *cur, 2666 int level, 2667 union xfs_btree_ptr *ptrp, 2668 union xfs_btree_key *key, 2669 struct xfs_btree_cur **curp, 2670 int *stat) /* success/failure */ 2671 { 2672 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2673 struct xfs_buf *lbp; /* left buffer pointer */ 2674 struct xfs_btree_block *left; /* left btree block */ 2675 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2676 struct xfs_buf *rbp; /* right buffer pointer */ 2677 struct xfs_btree_block *right; /* right btree block */ 2678 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2679 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2680 struct xfs_btree_block *rrblock; /* right-right btree block */ 2681 int lrecs; 2682 int rrecs; 2683 int src_index; 2684 int error; /* error return value */ 2685 int i; 2686 2687 XFS_BTREE_STATS_INC(cur, split); 2688 2689 /* Set up left block (current one). */ 2690 left = xfs_btree_get_block(cur, level, &lbp); 2691 2692 #ifdef DEBUG 2693 error = xfs_btree_check_block(cur, left, level, lbp); 2694 if (error) 2695 goto error0; 2696 #endif 2697 2698 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2699 2700 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2701 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2702 if (error) 2703 goto error0; 2704 if (*stat == 0) 2705 goto out0; 2706 XFS_BTREE_STATS_INC(cur, alloc); 2707 2708 /* Set up the new block as "right". */ 2709 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2710 if (error) 2711 goto error0; 2712 2713 /* Fill in the btree header for the new right block. */ 2714 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2715 2716 /* 2717 * Split the entries between the old and the new block evenly. 2718 * Make sure that if there's an odd number of entries now, that 2719 * each new block will have the same number of entries. 2720 */ 2721 lrecs = xfs_btree_get_numrecs(left); 2722 rrecs = lrecs / 2; 2723 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2724 rrecs++; 2725 src_index = (lrecs - rrecs + 1); 2726 2727 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2728 2729 /* Adjust numrecs for the later get_*_keys() calls. */ 2730 lrecs -= rrecs; 2731 xfs_btree_set_numrecs(left, lrecs); 2732 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2733 2734 /* 2735 * Copy btree block entries from the left block over to the 2736 * new block, the right. Update the right block and log the 2737 * changes. 2738 */ 2739 if (level > 0) { 2740 /* It's a non-leaf. Move keys and pointers. */ 2741 union xfs_btree_key *lkp; /* left btree key */ 2742 union xfs_btree_ptr *lpp; /* left address pointer */ 2743 union xfs_btree_key *rkp; /* right btree key */ 2744 union xfs_btree_ptr *rpp; /* right address pointer */ 2745 2746 lkp = xfs_btree_key_addr(cur, src_index, left); 2747 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2748 rkp = xfs_btree_key_addr(cur, 1, right); 2749 rpp = xfs_btree_ptr_addr(cur, 1, right); 2750 2751 for (i = src_index; i < rrecs; i++) { 2752 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 2753 if (error) 2754 goto error0; 2755 } 2756 2757 /* Copy the keys & pointers to the new block. */ 2758 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2759 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2760 2761 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2762 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2763 2764 /* Stash the keys of the new block for later insertion. */ 2765 xfs_btree_get_node_keys(cur, right, key); 2766 } else { 2767 /* It's a leaf. Move records. */ 2768 union xfs_btree_rec *lrp; /* left record pointer */ 2769 union xfs_btree_rec *rrp; /* right record pointer */ 2770 2771 lrp = xfs_btree_rec_addr(cur, src_index, left); 2772 rrp = xfs_btree_rec_addr(cur, 1, right); 2773 2774 /* Copy records to the new block. */ 2775 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2776 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2777 2778 /* Stash the keys of the new block for later insertion. */ 2779 xfs_btree_get_leaf_keys(cur, right, key); 2780 } 2781 2782 /* 2783 * Find the left block number by looking in the buffer. 2784 * Adjust sibling pointers. 2785 */ 2786 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2787 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2788 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2789 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2790 2791 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2792 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2793 2794 /* 2795 * If there's a block to the new block's right, make that block 2796 * point back to right instead of to left. 2797 */ 2798 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2799 error = xfs_btree_read_buf_block(cur, &rrptr, 2800 0, &rrblock, &rrbp); 2801 if (error) 2802 goto error0; 2803 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2804 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2805 } 2806 2807 /* Update the parent high keys of the left block, if needed. */ 2808 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2809 error = xfs_btree_update_keys(cur, level); 2810 if (error) 2811 goto error0; 2812 } 2813 2814 /* 2815 * If the cursor is really in the right block, move it there. 2816 * If it's just pointing past the last entry in left, then we'll 2817 * insert there, so don't change anything in that case. 2818 */ 2819 if (cur->bc_ptrs[level] > lrecs + 1) { 2820 xfs_btree_setbuf(cur, level, rbp); 2821 cur->bc_ptrs[level] -= lrecs; 2822 } 2823 /* 2824 * If there are more levels, we'll need another cursor which refers 2825 * the right block, no matter where this cursor was. 2826 */ 2827 if (level + 1 < cur->bc_nlevels) { 2828 error = xfs_btree_dup_cursor(cur, curp); 2829 if (error) 2830 goto error0; 2831 (*curp)->bc_ptrs[level + 1]++; 2832 } 2833 *ptrp = rptr; 2834 *stat = 1; 2835 return 0; 2836 out0: 2837 *stat = 0; 2838 return 0; 2839 2840 error0: 2841 return error; 2842 } 2843 2844 struct xfs_btree_split_args { 2845 struct xfs_btree_cur *cur; 2846 int level; 2847 union xfs_btree_ptr *ptrp; 2848 union xfs_btree_key *key; 2849 struct xfs_btree_cur **curp; 2850 int *stat; /* success/failure */ 2851 int result; 2852 bool kswapd; /* allocation in kswapd context */ 2853 struct completion *done; 2854 struct work_struct work; 2855 }; 2856 2857 /* 2858 * Stack switching interfaces for allocation 2859 */ 2860 static void 2861 xfs_btree_split_worker( 2862 struct work_struct *work) 2863 { 2864 struct xfs_btree_split_args *args = container_of(work, 2865 struct xfs_btree_split_args, work); 2866 unsigned long pflags; 2867 unsigned long new_pflags = PF_MEMALLOC_NOFS; 2868 2869 /* 2870 * we are in a transaction context here, but may also be doing work 2871 * in kswapd context, and hence we may need to inherit that state 2872 * temporarily to ensure that we don't block waiting for memory reclaim 2873 * in any way. 2874 */ 2875 if (args->kswapd) 2876 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2877 2878 current_set_flags_nested(&pflags, new_pflags); 2879 2880 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2881 args->key, args->curp, args->stat); 2882 complete(args->done); 2883 2884 current_restore_flags_nested(&pflags, new_pflags); 2885 } 2886 2887 /* 2888 * BMBT split requests often come in with little stack to work on. Push 2889 * them off to a worker thread so there is lots of stack to use. For the other 2890 * btree types, just call directly to avoid the context switch overhead here. 2891 */ 2892 STATIC int /* error */ 2893 xfs_btree_split( 2894 struct xfs_btree_cur *cur, 2895 int level, 2896 union xfs_btree_ptr *ptrp, 2897 union xfs_btree_key *key, 2898 struct xfs_btree_cur **curp, 2899 int *stat) /* success/failure */ 2900 { 2901 struct xfs_btree_split_args args; 2902 DECLARE_COMPLETION_ONSTACK(done); 2903 2904 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2905 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2906 2907 args.cur = cur; 2908 args.level = level; 2909 args.ptrp = ptrp; 2910 args.key = key; 2911 args.curp = curp; 2912 args.stat = stat; 2913 args.done = &done; 2914 args.kswapd = current_is_kswapd(); 2915 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2916 queue_work(xfs_alloc_wq, &args.work); 2917 wait_for_completion(&done); 2918 destroy_work_on_stack(&args.work); 2919 return args.result; 2920 } 2921 2922 2923 /* 2924 * Copy the old inode root contents into a real block and make the 2925 * broot point to it. 2926 */ 2927 int /* error */ 2928 xfs_btree_new_iroot( 2929 struct xfs_btree_cur *cur, /* btree cursor */ 2930 int *logflags, /* logging flags for inode */ 2931 int *stat) /* return status - 0 fail */ 2932 { 2933 struct xfs_buf *cbp; /* buffer for cblock */ 2934 struct xfs_btree_block *block; /* btree block */ 2935 struct xfs_btree_block *cblock; /* child btree block */ 2936 union xfs_btree_key *ckp; /* child key pointer */ 2937 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2938 union xfs_btree_key *kp; /* pointer to btree key */ 2939 union xfs_btree_ptr *pp; /* pointer to block addr */ 2940 union xfs_btree_ptr nptr; /* new block addr */ 2941 int level; /* btree level */ 2942 int error; /* error return code */ 2943 int i; /* loop counter */ 2944 2945 XFS_BTREE_STATS_INC(cur, newroot); 2946 2947 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2948 2949 level = cur->bc_nlevels - 1; 2950 2951 block = xfs_btree_get_iroot(cur); 2952 pp = xfs_btree_ptr_addr(cur, 1, block); 2953 2954 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2955 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2956 if (error) 2957 goto error0; 2958 if (*stat == 0) 2959 return 0; 2960 2961 XFS_BTREE_STATS_INC(cur, alloc); 2962 2963 /* Copy the root into a real block. */ 2964 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2965 if (error) 2966 goto error0; 2967 2968 /* 2969 * we can't just memcpy() the root in for CRC enabled btree blocks. 2970 * In that case have to also ensure the blkno remains correct 2971 */ 2972 memcpy(cblock, block, xfs_btree_block_len(cur)); 2973 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2974 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2975 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2976 else 2977 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2978 } 2979 2980 be16_add_cpu(&block->bb_level, 1); 2981 xfs_btree_set_numrecs(block, 1); 2982 cur->bc_nlevels++; 2983 cur->bc_ptrs[level + 1] = 1; 2984 2985 kp = xfs_btree_key_addr(cur, 1, block); 2986 ckp = xfs_btree_key_addr(cur, 1, cblock); 2987 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2988 2989 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2990 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2991 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 2992 if (error) 2993 goto error0; 2994 } 2995 2996 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2997 2998 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); 2999 if (error) 3000 goto error0; 3001 3002 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 3003 3004 xfs_iroot_realloc(cur->bc_private.b.ip, 3005 1 - xfs_btree_get_numrecs(cblock), 3006 cur->bc_private.b.whichfork); 3007 3008 xfs_btree_setbuf(cur, level, cbp); 3009 3010 /* 3011 * Do all this logging at the end so that 3012 * the root is at the right level. 3013 */ 3014 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3015 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3016 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3017 3018 *logflags |= 3019 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 3020 *stat = 1; 3021 return 0; 3022 error0: 3023 return error; 3024 } 3025 3026 /* 3027 * Allocate a new root block, fill it in. 3028 */ 3029 STATIC int /* error */ 3030 xfs_btree_new_root( 3031 struct xfs_btree_cur *cur, /* btree cursor */ 3032 int *stat) /* success/failure */ 3033 { 3034 struct xfs_btree_block *block; /* one half of the old root block */ 3035 struct xfs_buf *bp; /* buffer containing block */ 3036 int error; /* error return value */ 3037 struct xfs_buf *lbp; /* left buffer pointer */ 3038 struct xfs_btree_block *left; /* left btree block */ 3039 struct xfs_buf *nbp; /* new (root) buffer */ 3040 struct xfs_btree_block *new; /* new (root) btree block */ 3041 int nptr; /* new value for key index, 1 or 2 */ 3042 struct xfs_buf *rbp; /* right buffer pointer */ 3043 struct xfs_btree_block *right; /* right btree block */ 3044 union xfs_btree_ptr rptr; 3045 union xfs_btree_ptr lptr; 3046 3047 XFS_BTREE_STATS_INC(cur, newroot); 3048 3049 /* initialise our start point from the cursor */ 3050 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3051 3052 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3053 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3054 if (error) 3055 goto error0; 3056 if (*stat == 0) 3057 goto out0; 3058 XFS_BTREE_STATS_INC(cur, alloc); 3059 3060 /* Set up the new block. */ 3061 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3062 if (error) 3063 goto error0; 3064 3065 /* Set the root in the holding structure increasing the level by 1. */ 3066 cur->bc_ops->set_root(cur, &lptr, 1); 3067 3068 /* 3069 * At the previous root level there are now two blocks: the old root, 3070 * and the new block generated when it was split. We don't know which 3071 * one the cursor is pointing at, so we set up variables "left" and 3072 * "right" for each case. 3073 */ 3074 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3075 3076 #ifdef DEBUG 3077 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3078 if (error) 3079 goto error0; 3080 #endif 3081 3082 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3083 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3084 /* Our block is left, pick up the right block. */ 3085 lbp = bp; 3086 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3087 left = block; 3088 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3089 if (error) 3090 goto error0; 3091 bp = rbp; 3092 nptr = 1; 3093 } else { 3094 /* Our block is right, pick up the left block. */ 3095 rbp = bp; 3096 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3097 right = block; 3098 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3099 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3100 if (error) 3101 goto error0; 3102 bp = lbp; 3103 nptr = 2; 3104 } 3105 3106 /* Fill in the new block's btree header and log it. */ 3107 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3108 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3109 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3110 !xfs_btree_ptr_is_null(cur, &rptr)); 3111 3112 /* Fill in the key data in the new root. */ 3113 if (xfs_btree_get_level(left) > 0) { 3114 /* 3115 * Get the keys for the left block's keys and put them directly 3116 * in the parent block. Do the same for the right block. 3117 */ 3118 xfs_btree_get_node_keys(cur, left, 3119 xfs_btree_key_addr(cur, 1, new)); 3120 xfs_btree_get_node_keys(cur, right, 3121 xfs_btree_key_addr(cur, 2, new)); 3122 } else { 3123 /* 3124 * Get the keys for the left block's records and put them 3125 * directly in the parent block. Do the same for the right 3126 * block. 3127 */ 3128 xfs_btree_get_leaf_keys(cur, left, 3129 xfs_btree_key_addr(cur, 1, new)); 3130 xfs_btree_get_leaf_keys(cur, right, 3131 xfs_btree_key_addr(cur, 2, new)); 3132 } 3133 xfs_btree_log_keys(cur, nbp, 1, 2); 3134 3135 /* Fill in the pointer data in the new root. */ 3136 xfs_btree_copy_ptrs(cur, 3137 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3138 xfs_btree_copy_ptrs(cur, 3139 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3140 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3141 3142 /* Fix up the cursor. */ 3143 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3144 cur->bc_ptrs[cur->bc_nlevels] = nptr; 3145 cur->bc_nlevels++; 3146 *stat = 1; 3147 return 0; 3148 error0: 3149 return error; 3150 out0: 3151 *stat = 0; 3152 return 0; 3153 } 3154 3155 STATIC int 3156 xfs_btree_make_block_unfull( 3157 struct xfs_btree_cur *cur, /* btree cursor */ 3158 int level, /* btree level */ 3159 int numrecs,/* # of recs in block */ 3160 int *oindex,/* old tree index */ 3161 int *index, /* new tree index */ 3162 union xfs_btree_ptr *nptr, /* new btree ptr */ 3163 struct xfs_btree_cur **ncur, /* new btree cursor */ 3164 union xfs_btree_key *key, /* key of new block */ 3165 int *stat) 3166 { 3167 int error = 0; 3168 3169 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3170 level == cur->bc_nlevels - 1) { 3171 struct xfs_inode *ip = cur->bc_private.b.ip; 3172 3173 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3174 /* A root block that can be made bigger. */ 3175 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3176 *stat = 1; 3177 } else { 3178 /* A root block that needs replacing */ 3179 int logflags = 0; 3180 3181 error = xfs_btree_new_iroot(cur, &logflags, stat); 3182 if (error || *stat == 0) 3183 return error; 3184 3185 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3186 } 3187 3188 return 0; 3189 } 3190 3191 /* First, try shifting an entry to the right neighbor. */ 3192 error = xfs_btree_rshift(cur, level, stat); 3193 if (error || *stat) 3194 return error; 3195 3196 /* Next, try shifting an entry to the left neighbor. */ 3197 error = xfs_btree_lshift(cur, level, stat); 3198 if (error) 3199 return error; 3200 3201 if (*stat) { 3202 *oindex = *index = cur->bc_ptrs[level]; 3203 return 0; 3204 } 3205 3206 /* 3207 * Next, try splitting the current block in half. 3208 * 3209 * If this works we have to re-set our variables because we 3210 * could be in a different block now. 3211 */ 3212 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3213 if (error || *stat == 0) 3214 return error; 3215 3216 3217 *index = cur->bc_ptrs[level]; 3218 return 0; 3219 } 3220 3221 /* 3222 * Insert one record/level. Return information to the caller 3223 * allowing the next level up to proceed if necessary. 3224 */ 3225 STATIC int 3226 xfs_btree_insrec( 3227 struct xfs_btree_cur *cur, /* btree cursor */ 3228 int level, /* level to insert record at */ 3229 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3230 union xfs_btree_rec *rec, /* record to insert */ 3231 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3232 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3233 int *stat) /* success/failure */ 3234 { 3235 struct xfs_btree_block *block; /* btree block */ 3236 struct xfs_buf *bp; /* buffer for block */ 3237 union xfs_btree_ptr nptr; /* new block ptr */ 3238 struct xfs_btree_cur *ncur; /* new btree cursor */ 3239 union xfs_btree_key nkey; /* new block key */ 3240 union xfs_btree_key *lkey; 3241 int optr; /* old key/record index */ 3242 int ptr; /* key/record index */ 3243 int numrecs;/* number of records */ 3244 int error; /* error return value */ 3245 int i; 3246 xfs_daddr_t old_bn; 3247 3248 ncur = NULL; 3249 lkey = &nkey; 3250 3251 /* 3252 * If we have an external root pointer, and we've made it to the 3253 * root level, allocate a new root block and we're done. 3254 */ 3255 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3256 (level >= cur->bc_nlevels)) { 3257 error = xfs_btree_new_root(cur, stat); 3258 xfs_btree_set_ptr_null(cur, ptrp); 3259 3260 return error; 3261 } 3262 3263 /* If we're off the left edge, return failure. */ 3264 ptr = cur->bc_ptrs[level]; 3265 if (ptr == 0) { 3266 *stat = 0; 3267 return 0; 3268 } 3269 3270 optr = ptr; 3271 3272 XFS_BTREE_STATS_INC(cur, insrec); 3273 3274 /* Get pointers to the btree buffer and block. */ 3275 block = xfs_btree_get_block(cur, level, &bp); 3276 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3277 numrecs = xfs_btree_get_numrecs(block); 3278 3279 #ifdef DEBUG 3280 error = xfs_btree_check_block(cur, block, level, bp); 3281 if (error) 3282 goto error0; 3283 3284 /* Check that the new entry is being inserted in the right place. */ 3285 if (ptr <= numrecs) { 3286 if (level == 0) { 3287 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3288 xfs_btree_rec_addr(cur, ptr, block))); 3289 } else { 3290 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3291 xfs_btree_key_addr(cur, ptr, block))); 3292 } 3293 } 3294 #endif 3295 3296 /* 3297 * If the block is full, we can't insert the new entry until we 3298 * make the block un-full. 3299 */ 3300 xfs_btree_set_ptr_null(cur, &nptr); 3301 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3302 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3303 &optr, &ptr, &nptr, &ncur, lkey, stat); 3304 if (error || *stat == 0) 3305 goto error0; 3306 } 3307 3308 /* 3309 * The current block may have changed if the block was 3310 * previously full and we have just made space in it. 3311 */ 3312 block = xfs_btree_get_block(cur, level, &bp); 3313 numrecs = xfs_btree_get_numrecs(block); 3314 3315 #ifdef DEBUG 3316 error = xfs_btree_check_block(cur, block, level, bp); 3317 if (error) 3318 return error; 3319 #endif 3320 3321 /* 3322 * At this point we know there's room for our new entry in the block 3323 * we're pointing at. 3324 */ 3325 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3326 3327 if (level > 0) { 3328 /* It's a nonleaf. make a hole in the keys and ptrs */ 3329 union xfs_btree_key *kp; 3330 union xfs_btree_ptr *pp; 3331 3332 kp = xfs_btree_key_addr(cur, ptr, block); 3333 pp = xfs_btree_ptr_addr(cur, ptr, block); 3334 3335 for (i = numrecs - ptr; i >= 0; i--) { 3336 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3337 if (error) 3338 return error; 3339 } 3340 3341 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3342 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3343 3344 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); 3345 if (error) 3346 goto error0; 3347 3348 /* Now put the new data in, bump numrecs and log it. */ 3349 xfs_btree_copy_keys(cur, kp, key, 1); 3350 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3351 numrecs++; 3352 xfs_btree_set_numrecs(block, numrecs); 3353 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3354 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3355 #ifdef DEBUG 3356 if (ptr < numrecs) { 3357 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3358 xfs_btree_key_addr(cur, ptr + 1, block))); 3359 } 3360 #endif 3361 } else { 3362 /* It's a leaf. make a hole in the records */ 3363 union xfs_btree_rec *rp; 3364 3365 rp = xfs_btree_rec_addr(cur, ptr, block); 3366 3367 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3368 3369 /* Now put the new data in, bump numrecs and log it. */ 3370 xfs_btree_copy_recs(cur, rp, rec, 1); 3371 xfs_btree_set_numrecs(block, ++numrecs); 3372 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3373 #ifdef DEBUG 3374 if (ptr < numrecs) { 3375 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3376 xfs_btree_rec_addr(cur, ptr + 1, block))); 3377 } 3378 #endif 3379 } 3380 3381 /* Log the new number of records in the btree header. */ 3382 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3383 3384 /* 3385 * If we just inserted into a new tree block, we have to 3386 * recalculate nkey here because nkey is out of date. 3387 * 3388 * Otherwise we're just updating an existing block (having shoved 3389 * some records into the new tree block), so use the regular key 3390 * update mechanism. 3391 */ 3392 if (bp && bp->b_bn != old_bn) { 3393 xfs_btree_get_keys(cur, block, lkey); 3394 } else if (xfs_btree_needs_key_update(cur, optr)) { 3395 error = xfs_btree_update_keys(cur, level); 3396 if (error) 3397 goto error0; 3398 } 3399 3400 /* 3401 * If we are tracking the last record in the tree and 3402 * we are at the far right edge of the tree, update it. 3403 */ 3404 if (xfs_btree_is_lastrec(cur, block, level)) { 3405 cur->bc_ops->update_lastrec(cur, block, rec, 3406 ptr, LASTREC_INSREC); 3407 } 3408 3409 /* 3410 * Return the new block number, if any. 3411 * If there is one, give back a record value and a cursor too. 3412 */ 3413 *ptrp = nptr; 3414 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3415 xfs_btree_copy_keys(cur, key, lkey, 1); 3416 *curp = ncur; 3417 } 3418 3419 *stat = 1; 3420 return 0; 3421 3422 error0: 3423 return error; 3424 } 3425 3426 /* 3427 * Insert the record at the point referenced by cur. 3428 * 3429 * A multi-level split of the tree on insert will invalidate the original 3430 * cursor. All callers of this function should assume that the cursor is 3431 * no longer valid and revalidate it. 3432 */ 3433 int 3434 xfs_btree_insert( 3435 struct xfs_btree_cur *cur, 3436 int *stat) 3437 { 3438 int error; /* error return value */ 3439 int i; /* result value, 0 for failure */ 3440 int level; /* current level number in btree */ 3441 union xfs_btree_ptr nptr; /* new block number (split result) */ 3442 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3443 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3444 union xfs_btree_key bkey; /* key of block to insert */ 3445 union xfs_btree_key *key; 3446 union xfs_btree_rec rec; /* record to insert */ 3447 3448 level = 0; 3449 ncur = NULL; 3450 pcur = cur; 3451 key = &bkey; 3452 3453 xfs_btree_set_ptr_null(cur, &nptr); 3454 3455 /* Make a key out of the record data to be inserted, and save it. */ 3456 cur->bc_ops->init_rec_from_cur(cur, &rec); 3457 cur->bc_ops->init_key_from_rec(key, &rec); 3458 3459 /* 3460 * Loop going up the tree, starting at the leaf level. 3461 * Stop when we don't get a split block, that must mean that 3462 * the insert is finished with this level. 3463 */ 3464 do { 3465 /* 3466 * Insert nrec/nptr into this level of the tree. 3467 * Note if we fail, nptr will be null. 3468 */ 3469 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3470 &ncur, &i); 3471 if (error) { 3472 if (pcur != cur) 3473 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3474 goto error0; 3475 } 3476 3477 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3478 level++; 3479 3480 /* 3481 * See if the cursor we just used is trash. 3482 * Can't trash the caller's cursor, but otherwise we should 3483 * if ncur is a new cursor or we're about to be done. 3484 */ 3485 if (pcur != cur && 3486 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3487 /* Save the state from the cursor before we trash it */ 3488 if (cur->bc_ops->update_cursor) 3489 cur->bc_ops->update_cursor(pcur, cur); 3490 cur->bc_nlevels = pcur->bc_nlevels; 3491 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3492 } 3493 /* If we got a new cursor, switch to it. */ 3494 if (ncur) { 3495 pcur = ncur; 3496 ncur = NULL; 3497 } 3498 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3499 3500 *stat = i; 3501 return 0; 3502 error0: 3503 return error; 3504 } 3505 3506 /* 3507 * Try to merge a non-leaf block back into the inode root. 3508 * 3509 * Note: the killroot names comes from the fact that we're effectively 3510 * killing the old root block. But because we can't just delete the 3511 * inode we have to copy the single block it was pointing to into the 3512 * inode. 3513 */ 3514 STATIC int 3515 xfs_btree_kill_iroot( 3516 struct xfs_btree_cur *cur) 3517 { 3518 int whichfork = cur->bc_private.b.whichfork; 3519 struct xfs_inode *ip = cur->bc_private.b.ip; 3520 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3521 struct xfs_btree_block *block; 3522 struct xfs_btree_block *cblock; 3523 union xfs_btree_key *kp; 3524 union xfs_btree_key *ckp; 3525 union xfs_btree_ptr *pp; 3526 union xfs_btree_ptr *cpp; 3527 struct xfs_buf *cbp; 3528 int level; 3529 int index; 3530 int numrecs; 3531 int error; 3532 #ifdef DEBUG 3533 union xfs_btree_ptr ptr; 3534 #endif 3535 int i; 3536 3537 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3538 ASSERT(cur->bc_nlevels > 1); 3539 3540 /* 3541 * Don't deal with the root block needs to be a leaf case. 3542 * We're just going to turn the thing back into extents anyway. 3543 */ 3544 level = cur->bc_nlevels - 1; 3545 if (level == 1) 3546 goto out0; 3547 3548 /* 3549 * Give up if the root has multiple children. 3550 */ 3551 block = xfs_btree_get_iroot(cur); 3552 if (xfs_btree_get_numrecs(block) != 1) 3553 goto out0; 3554 3555 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3556 numrecs = xfs_btree_get_numrecs(cblock); 3557 3558 /* 3559 * Only do this if the next level will fit. 3560 * Then the data must be copied up to the inode, 3561 * instead of freeing the root you free the next level. 3562 */ 3563 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3564 goto out0; 3565 3566 XFS_BTREE_STATS_INC(cur, killroot); 3567 3568 #ifdef DEBUG 3569 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3570 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3571 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3572 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3573 #endif 3574 3575 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3576 if (index) { 3577 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3578 cur->bc_private.b.whichfork); 3579 block = ifp->if_broot; 3580 } 3581 3582 be16_add_cpu(&block->bb_numrecs, index); 3583 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3584 3585 kp = xfs_btree_key_addr(cur, 1, block); 3586 ckp = xfs_btree_key_addr(cur, 1, cblock); 3587 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3588 3589 pp = xfs_btree_ptr_addr(cur, 1, block); 3590 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3591 3592 for (i = 0; i < numrecs; i++) { 3593 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); 3594 if (error) 3595 return error; 3596 } 3597 3598 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3599 3600 error = xfs_btree_free_block(cur, cbp); 3601 if (error) 3602 return error; 3603 3604 cur->bc_bufs[level - 1] = NULL; 3605 be16_add_cpu(&block->bb_level, -1); 3606 xfs_trans_log_inode(cur->bc_tp, ip, 3607 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3608 cur->bc_nlevels--; 3609 out0: 3610 return 0; 3611 } 3612 3613 /* 3614 * Kill the current root node, and replace it with it's only child node. 3615 */ 3616 STATIC int 3617 xfs_btree_kill_root( 3618 struct xfs_btree_cur *cur, 3619 struct xfs_buf *bp, 3620 int level, 3621 union xfs_btree_ptr *newroot) 3622 { 3623 int error; 3624 3625 XFS_BTREE_STATS_INC(cur, killroot); 3626 3627 /* 3628 * Update the root pointer, decreasing the level by 1 and then 3629 * free the old root. 3630 */ 3631 cur->bc_ops->set_root(cur, newroot, -1); 3632 3633 error = xfs_btree_free_block(cur, bp); 3634 if (error) 3635 return error; 3636 3637 cur->bc_bufs[level] = NULL; 3638 cur->bc_ra[level] = 0; 3639 cur->bc_nlevels--; 3640 3641 return 0; 3642 } 3643 3644 STATIC int 3645 xfs_btree_dec_cursor( 3646 struct xfs_btree_cur *cur, 3647 int level, 3648 int *stat) 3649 { 3650 int error; 3651 int i; 3652 3653 if (level > 0) { 3654 error = xfs_btree_decrement(cur, level, &i); 3655 if (error) 3656 return error; 3657 } 3658 3659 *stat = 1; 3660 return 0; 3661 } 3662 3663 /* 3664 * Single level of the btree record deletion routine. 3665 * Delete record pointed to by cur/level. 3666 * Remove the record from its block then rebalance the tree. 3667 * Return 0 for error, 1 for done, 2 to go on to the next level. 3668 */ 3669 STATIC int /* error */ 3670 xfs_btree_delrec( 3671 struct xfs_btree_cur *cur, /* btree cursor */ 3672 int level, /* level removing record from */ 3673 int *stat) /* fail/done/go-on */ 3674 { 3675 struct xfs_btree_block *block; /* btree block */ 3676 union xfs_btree_ptr cptr; /* current block ptr */ 3677 struct xfs_buf *bp; /* buffer for block */ 3678 int error; /* error return value */ 3679 int i; /* loop counter */ 3680 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3681 struct xfs_buf *lbp; /* left buffer pointer */ 3682 struct xfs_btree_block *left; /* left btree block */ 3683 int lrecs = 0; /* left record count */ 3684 int ptr; /* key/record index */ 3685 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3686 struct xfs_buf *rbp; /* right buffer pointer */ 3687 struct xfs_btree_block *right; /* right btree block */ 3688 struct xfs_btree_block *rrblock; /* right-right btree block */ 3689 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3690 int rrecs = 0; /* right record count */ 3691 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3692 int numrecs; /* temporary numrec count */ 3693 3694 tcur = NULL; 3695 3696 /* Get the index of the entry being deleted, check for nothing there. */ 3697 ptr = cur->bc_ptrs[level]; 3698 if (ptr == 0) { 3699 *stat = 0; 3700 return 0; 3701 } 3702 3703 /* Get the buffer & block containing the record or key/ptr. */ 3704 block = xfs_btree_get_block(cur, level, &bp); 3705 numrecs = xfs_btree_get_numrecs(block); 3706 3707 #ifdef DEBUG 3708 error = xfs_btree_check_block(cur, block, level, bp); 3709 if (error) 3710 goto error0; 3711 #endif 3712 3713 /* Fail if we're off the end of the block. */ 3714 if (ptr > numrecs) { 3715 *stat = 0; 3716 return 0; 3717 } 3718 3719 XFS_BTREE_STATS_INC(cur, delrec); 3720 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3721 3722 /* Excise the entries being deleted. */ 3723 if (level > 0) { 3724 /* It's a nonleaf. operate on keys and ptrs */ 3725 union xfs_btree_key *lkp; 3726 union xfs_btree_ptr *lpp; 3727 3728 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3729 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3730 3731 for (i = 0; i < numrecs - ptr; i++) { 3732 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 3733 if (error) 3734 goto error0; 3735 } 3736 3737 if (ptr < numrecs) { 3738 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3739 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3740 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3741 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3742 } 3743 } else { 3744 /* It's a leaf. operate on records */ 3745 if (ptr < numrecs) { 3746 xfs_btree_shift_recs(cur, 3747 xfs_btree_rec_addr(cur, ptr + 1, block), 3748 -1, numrecs - ptr); 3749 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3750 } 3751 } 3752 3753 /* 3754 * Decrement and log the number of entries in the block. 3755 */ 3756 xfs_btree_set_numrecs(block, --numrecs); 3757 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3758 3759 /* 3760 * If we are tracking the last record in the tree and 3761 * we are at the far right edge of the tree, update it. 3762 */ 3763 if (xfs_btree_is_lastrec(cur, block, level)) { 3764 cur->bc_ops->update_lastrec(cur, block, NULL, 3765 ptr, LASTREC_DELREC); 3766 } 3767 3768 /* 3769 * We're at the root level. First, shrink the root block in-memory. 3770 * Try to get rid of the next level down. If we can't then there's 3771 * nothing left to do. 3772 */ 3773 if (level == cur->bc_nlevels - 1) { 3774 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3775 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3776 cur->bc_private.b.whichfork); 3777 3778 error = xfs_btree_kill_iroot(cur); 3779 if (error) 3780 goto error0; 3781 3782 error = xfs_btree_dec_cursor(cur, level, stat); 3783 if (error) 3784 goto error0; 3785 *stat = 1; 3786 return 0; 3787 } 3788 3789 /* 3790 * If this is the root level, and there's only one entry left, 3791 * and it's NOT the leaf level, then we can get rid of this 3792 * level. 3793 */ 3794 if (numrecs == 1 && level > 0) { 3795 union xfs_btree_ptr *pp; 3796 /* 3797 * pp is still set to the first pointer in the block. 3798 * Make it the new root of the btree. 3799 */ 3800 pp = xfs_btree_ptr_addr(cur, 1, block); 3801 error = xfs_btree_kill_root(cur, bp, level, pp); 3802 if (error) 3803 goto error0; 3804 } else if (level > 0) { 3805 error = xfs_btree_dec_cursor(cur, level, stat); 3806 if (error) 3807 goto error0; 3808 } 3809 *stat = 1; 3810 return 0; 3811 } 3812 3813 /* 3814 * If we deleted the leftmost entry in the block, update the 3815 * key values above us in the tree. 3816 */ 3817 if (xfs_btree_needs_key_update(cur, ptr)) { 3818 error = xfs_btree_update_keys(cur, level); 3819 if (error) 3820 goto error0; 3821 } 3822 3823 /* 3824 * If the number of records remaining in the block is at least 3825 * the minimum, we're done. 3826 */ 3827 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3828 error = xfs_btree_dec_cursor(cur, level, stat); 3829 if (error) 3830 goto error0; 3831 return 0; 3832 } 3833 3834 /* 3835 * Otherwise, we have to move some records around to keep the 3836 * tree balanced. Look at the left and right sibling blocks to 3837 * see if we can re-balance by moving only one record. 3838 */ 3839 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3840 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3841 3842 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3843 /* 3844 * One child of root, need to get a chance to copy its contents 3845 * into the root and delete it. Can't go up to next level, 3846 * there's nothing to delete there. 3847 */ 3848 if (xfs_btree_ptr_is_null(cur, &rptr) && 3849 xfs_btree_ptr_is_null(cur, &lptr) && 3850 level == cur->bc_nlevels - 2) { 3851 error = xfs_btree_kill_iroot(cur); 3852 if (!error) 3853 error = xfs_btree_dec_cursor(cur, level, stat); 3854 if (error) 3855 goto error0; 3856 return 0; 3857 } 3858 } 3859 3860 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3861 !xfs_btree_ptr_is_null(cur, &lptr)); 3862 3863 /* 3864 * Duplicate the cursor so our btree manipulations here won't 3865 * disrupt the next level up. 3866 */ 3867 error = xfs_btree_dup_cursor(cur, &tcur); 3868 if (error) 3869 goto error0; 3870 3871 /* 3872 * If there's a right sibling, see if it's ok to shift an entry 3873 * out of it. 3874 */ 3875 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3876 /* 3877 * Move the temp cursor to the last entry in the next block. 3878 * Actually any entry but the first would suffice. 3879 */ 3880 i = xfs_btree_lastrec(tcur, level); 3881 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3882 3883 error = xfs_btree_increment(tcur, level, &i); 3884 if (error) 3885 goto error0; 3886 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3887 3888 i = xfs_btree_lastrec(tcur, level); 3889 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3890 3891 /* Grab a pointer to the block. */ 3892 right = xfs_btree_get_block(tcur, level, &rbp); 3893 #ifdef DEBUG 3894 error = xfs_btree_check_block(tcur, right, level, rbp); 3895 if (error) 3896 goto error0; 3897 #endif 3898 /* Grab the current block number, for future use. */ 3899 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3900 3901 /* 3902 * If right block is full enough so that removing one entry 3903 * won't make it too empty, and left-shifting an entry out 3904 * of right to us works, we're done. 3905 */ 3906 if (xfs_btree_get_numrecs(right) - 1 >= 3907 cur->bc_ops->get_minrecs(tcur, level)) { 3908 error = xfs_btree_lshift(tcur, level, &i); 3909 if (error) 3910 goto error0; 3911 if (i) { 3912 ASSERT(xfs_btree_get_numrecs(block) >= 3913 cur->bc_ops->get_minrecs(tcur, level)); 3914 3915 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3916 tcur = NULL; 3917 3918 error = xfs_btree_dec_cursor(cur, level, stat); 3919 if (error) 3920 goto error0; 3921 return 0; 3922 } 3923 } 3924 3925 /* 3926 * Otherwise, grab the number of records in right for 3927 * future reference, and fix up the temp cursor to point 3928 * to our block again (last record). 3929 */ 3930 rrecs = xfs_btree_get_numrecs(right); 3931 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3932 i = xfs_btree_firstrec(tcur, level); 3933 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3934 3935 error = xfs_btree_decrement(tcur, level, &i); 3936 if (error) 3937 goto error0; 3938 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3939 } 3940 } 3941 3942 /* 3943 * If there's a left sibling, see if it's ok to shift an entry 3944 * out of it. 3945 */ 3946 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3947 /* 3948 * Move the temp cursor to the first entry in the 3949 * previous block. 3950 */ 3951 i = xfs_btree_firstrec(tcur, level); 3952 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3953 3954 error = xfs_btree_decrement(tcur, level, &i); 3955 if (error) 3956 goto error0; 3957 i = xfs_btree_firstrec(tcur, level); 3958 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3959 3960 /* Grab a pointer to the block. */ 3961 left = xfs_btree_get_block(tcur, level, &lbp); 3962 #ifdef DEBUG 3963 error = xfs_btree_check_block(cur, left, level, lbp); 3964 if (error) 3965 goto error0; 3966 #endif 3967 /* Grab the current block number, for future use. */ 3968 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3969 3970 /* 3971 * If left block is full enough so that removing one entry 3972 * won't make it too empty, and right-shifting an entry out 3973 * of left to us works, we're done. 3974 */ 3975 if (xfs_btree_get_numrecs(left) - 1 >= 3976 cur->bc_ops->get_minrecs(tcur, level)) { 3977 error = xfs_btree_rshift(tcur, level, &i); 3978 if (error) 3979 goto error0; 3980 if (i) { 3981 ASSERT(xfs_btree_get_numrecs(block) >= 3982 cur->bc_ops->get_minrecs(tcur, level)); 3983 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3984 tcur = NULL; 3985 if (level == 0) 3986 cur->bc_ptrs[0]++; 3987 3988 *stat = 1; 3989 return 0; 3990 } 3991 } 3992 3993 /* 3994 * Otherwise, grab the number of records in right for 3995 * future reference. 3996 */ 3997 lrecs = xfs_btree_get_numrecs(left); 3998 } 3999 4000 /* Delete the temp cursor, we're done with it. */ 4001 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4002 tcur = NULL; 4003 4004 /* If here, we need to do a join to keep the tree balanced. */ 4005 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4006 4007 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4008 lrecs + xfs_btree_get_numrecs(block) <= 4009 cur->bc_ops->get_maxrecs(cur, level)) { 4010 /* 4011 * Set "right" to be the starting block, 4012 * "left" to be the left neighbor. 4013 */ 4014 rptr = cptr; 4015 right = block; 4016 rbp = bp; 4017 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4018 if (error) 4019 goto error0; 4020 4021 /* 4022 * If that won't work, see if we can join with the right neighbor block. 4023 */ 4024 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4025 rrecs + xfs_btree_get_numrecs(block) <= 4026 cur->bc_ops->get_maxrecs(cur, level)) { 4027 /* 4028 * Set "left" to be the starting block, 4029 * "right" to be the right neighbor. 4030 */ 4031 lptr = cptr; 4032 left = block; 4033 lbp = bp; 4034 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4035 if (error) 4036 goto error0; 4037 4038 /* 4039 * Otherwise, we can't fix the imbalance. 4040 * Just return. This is probably a logic error, but it's not fatal. 4041 */ 4042 } else { 4043 error = xfs_btree_dec_cursor(cur, level, stat); 4044 if (error) 4045 goto error0; 4046 return 0; 4047 } 4048 4049 rrecs = xfs_btree_get_numrecs(right); 4050 lrecs = xfs_btree_get_numrecs(left); 4051 4052 /* 4053 * We're now going to join "left" and "right" by moving all the stuff 4054 * in "right" to "left" and deleting "right". 4055 */ 4056 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4057 if (level > 0) { 4058 /* It's a non-leaf. Move keys and pointers. */ 4059 union xfs_btree_key *lkp; /* left btree key */ 4060 union xfs_btree_ptr *lpp; /* left address pointer */ 4061 union xfs_btree_key *rkp; /* right btree key */ 4062 union xfs_btree_ptr *rpp; /* right address pointer */ 4063 4064 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4065 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4066 rkp = xfs_btree_key_addr(cur, 1, right); 4067 rpp = xfs_btree_ptr_addr(cur, 1, right); 4068 4069 for (i = 1; i < rrecs; i++) { 4070 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 4071 if (error) 4072 goto error0; 4073 } 4074 4075 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4076 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4077 4078 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4079 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4080 } else { 4081 /* It's a leaf. Move records. */ 4082 union xfs_btree_rec *lrp; /* left record pointer */ 4083 union xfs_btree_rec *rrp; /* right record pointer */ 4084 4085 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4086 rrp = xfs_btree_rec_addr(cur, 1, right); 4087 4088 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4089 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4090 } 4091 4092 XFS_BTREE_STATS_INC(cur, join); 4093 4094 /* 4095 * Fix up the number of records and right block pointer in the 4096 * surviving block, and log it. 4097 */ 4098 xfs_btree_set_numrecs(left, lrecs + rrecs); 4099 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 4100 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4101 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4102 4103 /* If there is a right sibling, point it to the remaining block. */ 4104 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4105 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4106 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4107 if (error) 4108 goto error0; 4109 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4110 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4111 } 4112 4113 /* Free the deleted block. */ 4114 error = xfs_btree_free_block(cur, rbp); 4115 if (error) 4116 goto error0; 4117 4118 /* 4119 * If we joined with the left neighbor, set the buffer in the 4120 * cursor to the left block, and fix up the index. 4121 */ 4122 if (bp != lbp) { 4123 cur->bc_bufs[level] = lbp; 4124 cur->bc_ptrs[level] += lrecs; 4125 cur->bc_ra[level] = 0; 4126 } 4127 /* 4128 * If we joined with the right neighbor and there's a level above 4129 * us, increment the cursor at that level. 4130 */ 4131 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4132 (level + 1 < cur->bc_nlevels)) { 4133 error = xfs_btree_increment(cur, level + 1, &i); 4134 if (error) 4135 goto error0; 4136 } 4137 4138 /* 4139 * Readjust the ptr at this level if it's not a leaf, since it's 4140 * still pointing at the deletion point, which makes the cursor 4141 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4142 * We can't use decrement because it would change the next level up. 4143 */ 4144 if (level > 0) 4145 cur->bc_ptrs[level]--; 4146 4147 /* 4148 * We combined blocks, so we have to update the parent keys if the 4149 * btree supports overlapped intervals. However, bc_ptrs[level + 1] 4150 * points to the old block so that the caller knows which record to 4151 * delete. Therefore, the caller must be savvy enough to call updkeys 4152 * for us if we return stat == 2. The other exit points from this 4153 * function don't require deletions further up the tree, so they can 4154 * call updkeys directly. 4155 */ 4156 4157 /* Return value means the next level up has something to do. */ 4158 *stat = 2; 4159 return 0; 4160 4161 error0: 4162 if (tcur) 4163 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4164 return error; 4165 } 4166 4167 /* 4168 * Delete the record pointed to by cur. 4169 * The cursor refers to the place where the record was (could be inserted) 4170 * when the operation returns. 4171 */ 4172 int /* error */ 4173 xfs_btree_delete( 4174 struct xfs_btree_cur *cur, 4175 int *stat) /* success/failure */ 4176 { 4177 int error; /* error return value */ 4178 int level; 4179 int i; 4180 bool joined = false; 4181 4182 /* 4183 * Go up the tree, starting at leaf level. 4184 * 4185 * If 2 is returned then a join was done; go to the next level. 4186 * Otherwise we are done. 4187 */ 4188 for (level = 0, i = 2; i == 2; level++) { 4189 error = xfs_btree_delrec(cur, level, &i); 4190 if (error) 4191 goto error0; 4192 if (i == 2) 4193 joined = true; 4194 } 4195 4196 /* 4197 * If we combined blocks as part of deleting the record, delrec won't 4198 * have updated the parent high keys so we have to do that here. 4199 */ 4200 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4201 error = xfs_btree_updkeys_force(cur, 0); 4202 if (error) 4203 goto error0; 4204 } 4205 4206 if (i == 0) { 4207 for (level = 1; level < cur->bc_nlevels; level++) { 4208 if (cur->bc_ptrs[level] == 0) { 4209 error = xfs_btree_decrement(cur, level, &i); 4210 if (error) 4211 goto error0; 4212 break; 4213 } 4214 } 4215 } 4216 4217 *stat = i; 4218 return 0; 4219 error0: 4220 return error; 4221 } 4222 4223 /* 4224 * Get the data from the pointed-to record. 4225 */ 4226 int /* error */ 4227 xfs_btree_get_rec( 4228 struct xfs_btree_cur *cur, /* btree cursor */ 4229 union xfs_btree_rec **recp, /* output: btree record */ 4230 int *stat) /* output: success/failure */ 4231 { 4232 struct xfs_btree_block *block; /* btree block */ 4233 struct xfs_buf *bp; /* buffer pointer */ 4234 int ptr; /* record number */ 4235 #ifdef DEBUG 4236 int error; /* error return value */ 4237 #endif 4238 4239 ptr = cur->bc_ptrs[0]; 4240 block = xfs_btree_get_block(cur, 0, &bp); 4241 4242 #ifdef DEBUG 4243 error = xfs_btree_check_block(cur, block, 0, bp); 4244 if (error) 4245 return error; 4246 #endif 4247 4248 /* 4249 * Off the right end or left end, return failure. 4250 */ 4251 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4252 *stat = 0; 4253 return 0; 4254 } 4255 4256 /* 4257 * Point to the record and extract its data. 4258 */ 4259 *recp = xfs_btree_rec_addr(cur, ptr, block); 4260 *stat = 1; 4261 return 0; 4262 } 4263 4264 /* Visit a block in a btree. */ 4265 STATIC int 4266 xfs_btree_visit_block( 4267 struct xfs_btree_cur *cur, 4268 int level, 4269 xfs_btree_visit_blocks_fn fn, 4270 void *data) 4271 { 4272 struct xfs_btree_block *block; 4273 struct xfs_buf *bp; 4274 union xfs_btree_ptr rptr; 4275 int error; 4276 4277 /* do right sibling readahead */ 4278 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4279 block = xfs_btree_get_block(cur, level, &bp); 4280 4281 /* process the block */ 4282 error = fn(cur, level, data); 4283 if (error) 4284 return error; 4285 4286 /* now read rh sibling block for next iteration */ 4287 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4288 if (xfs_btree_ptr_is_null(cur, &rptr)) 4289 return -ENOENT; 4290 4291 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4292 } 4293 4294 4295 /* Visit every block in a btree. */ 4296 int 4297 xfs_btree_visit_blocks( 4298 struct xfs_btree_cur *cur, 4299 xfs_btree_visit_blocks_fn fn, 4300 void *data) 4301 { 4302 union xfs_btree_ptr lptr; 4303 int level; 4304 struct xfs_btree_block *block = NULL; 4305 int error = 0; 4306 4307 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4308 4309 /* for each level */ 4310 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4311 /* grab the left hand block */ 4312 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4313 if (error) 4314 return error; 4315 4316 /* readahead the left most block for the next level down */ 4317 if (level > 0) { 4318 union xfs_btree_ptr *ptr; 4319 4320 ptr = xfs_btree_ptr_addr(cur, 1, block); 4321 xfs_btree_readahead_ptr(cur, ptr, 1); 4322 4323 /* save for the next iteration of the loop */ 4324 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4325 } 4326 4327 /* for each buffer in the level */ 4328 do { 4329 error = xfs_btree_visit_block(cur, level, fn, data); 4330 } while (!error); 4331 4332 if (error != -ENOENT) 4333 return error; 4334 } 4335 4336 return 0; 4337 } 4338 4339 /* 4340 * Change the owner of a btree. 4341 * 4342 * The mechanism we use here is ordered buffer logging. Because we don't know 4343 * how many buffers were are going to need to modify, we don't really want to 4344 * have to make transaction reservations for the worst case of every buffer in a 4345 * full size btree as that may be more space that we can fit in the log.... 4346 * 4347 * We do the btree walk in the most optimal manner possible - we have sibling 4348 * pointers so we can just walk all the blocks on each level from left to right 4349 * in a single pass, and then move to the next level and do the same. We can 4350 * also do readahead on the sibling pointers to get IO moving more quickly, 4351 * though for slow disks this is unlikely to make much difference to performance 4352 * as the amount of CPU work we have to do before moving to the next block is 4353 * relatively small. 4354 * 4355 * For each btree block that we load, modify the owner appropriately, set the 4356 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4357 * we mark the region we change dirty so that if the buffer is relogged in 4358 * a subsequent transaction the changes we make here as an ordered buffer are 4359 * correctly relogged in that transaction. If we are in recovery context, then 4360 * just queue the modified buffer as delayed write buffer so the transaction 4361 * recovery completion writes the changes to disk. 4362 */ 4363 struct xfs_btree_block_change_owner_info { 4364 uint64_t new_owner; 4365 struct list_head *buffer_list; 4366 }; 4367 4368 static int 4369 xfs_btree_block_change_owner( 4370 struct xfs_btree_cur *cur, 4371 int level, 4372 void *data) 4373 { 4374 struct xfs_btree_block_change_owner_info *bbcoi = data; 4375 struct xfs_btree_block *block; 4376 struct xfs_buf *bp; 4377 4378 /* modify the owner */ 4379 block = xfs_btree_get_block(cur, level, &bp); 4380 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4381 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4382 return 0; 4383 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4384 } else { 4385 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4386 return 0; 4387 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4388 } 4389 4390 /* 4391 * If the block is a root block hosted in an inode, we might not have a 4392 * buffer pointer here and we shouldn't attempt to log the change as the 4393 * information is already held in the inode and discarded when the root 4394 * block is formatted into the on-disk inode fork. We still change it, 4395 * though, so everything is consistent in memory. 4396 */ 4397 if (!bp) { 4398 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4399 ASSERT(level == cur->bc_nlevels - 1); 4400 return 0; 4401 } 4402 4403 if (cur->bc_tp) { 4404 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4405 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4406 return -EAGAIN; 4407 } 4408 } else { 4409 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4410 } 4411 4412 return 0; 4413 } 4414 4415 int 4416 xfs_btree_change_owner( 4417 struct xfs_btree_cur *cur, 4418 uint64_t new_owner, 4419 struct list_head *buffer_list) 4420 { 4421 struct xfs_btree_block_change_owner_info bbcoi; 4422 4423 bbcoi.new_owner = new_owner; 4424 bbcoi.buffer_list = buffer_list; 4425 4426 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4427 &bbcoi); 4428 } 4429 4430 /* Verify the v5 fields of a long-format btree block. */ 4431 xfs_failaddr_t 4432 xfs_btree_lblock_v5hdr_verify( 4433 struct xfs_buf *bp, 4434 uint64_t owner) 4435 { 4436 struct xfs_mount *mp = bp->b_target->bt_mount; 4437 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4438 4439 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4440 return __this_address; 4441 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4442 return __this_address; 4443 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4444 return __this_address; 4445 if (owner != XFS_RMAP_OWN_UNKNOWN && 4446 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4447 return __this_address; 4448 return NULL; 4449 } 4450 4451 /* Verify a long-format btree block. */ 4452 xfs_failaddr_t 4453 xfs_btree_lblock_verify( 4454 struct xfs_buf *bp, 4455 unsigned int max_recs) 4456 { 4457 struct xfs_mount *mp = bp->b_target->bt_mount; 4458 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4459 4460 /* numrecs verification */ 4461 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4462 return __this_address; 4463 4464 /* sibling pointer verification */ 4465 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4466 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4467 return __this_address; 4468 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4469 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4470 return __this_address; 4471 4472 return NULL; 4473 } 4474 4475 /** 4476 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4477 * btree block 4478 * 4479 * @bp: buffer containing the btree block 4480 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4481 * @pag_max_level: pointer to the per-ag max level field 4482 */ 4483 xfs_failaddr_t 4484 xfs_btree_sblock_v5hdr_verify( 4485 struct xfs_buf *bp) 4486 { 4487 struct xfs_mount *mp = bp->b_target->bt_mount; 4488 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4489 struct xfs_perag *pag = bp->b_pag; 4490 4491 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4492 return __this_address; 4493 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4494 return __this_address; 4495 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4496 return __this_address; 4497 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4498 return __this_address; 4499 return NULL; 4500 } 4501 4502 /** 4503 * xfs_btree_sblock_verify() -- verify a short-format btree block 4504 * 4505 * @bp: buffer containing the btree block 4506 * @max_recs: maximum records allowed in this btree node 4507 */ 4508 xfs_failaddr_t 4509 xfs_btree_sblock_verify( 4510 struct xfs_buf *bp, 4511 unsigned int max_recs) 4512 { 4513 struct xfs_mount *mp = bp->b_target->bt_mount; 4514 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4515 xfs_agblock_t agno; 4516 4517 /* numrecs verification */ 4518 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4519 return __this_address; 4520 4521 /* sibling pointer verification */ 4522 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4523 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4524 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4525 return __this_address; 4526 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4527 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4528 return __this_address; 4529 4530 return NULL; 4531 } 4532 4533 /* 4534 * Calculate the number of btree levels needed to store a given number of 4535 * records in a short-format btree. 4536 */ 4537 uint 4538 xfs_btree_compute_maxlevels( 4539 uint *limits, 4540 unsigned long len) 4541 { 4542 uint level; 4543 unsigned long maxblocks; 4544 4545 maxblocks = (len + limits[0] - 1) / limits[0]; 4546 for (level = 1; maxblocks > 1; level++) 4547 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4548 return level; 4549 } 4550 4551 /* 4552 * Query a regular btree for all records overlapping a given interval. 4553 * Start with a LE lookup of the key of low_rec and return all records 4554 * until we find a record with a key greater than the key of high_rec. 4555 */ 4556 STATIC int 4557 xfs_btree_simple_query_range( 4558 struct xfs_btree_cur *cur, 4559 union xfs_btree_key *low_key, 4560 union xfs_btree_key *high_key, 4561 xfs_btree_query_range_fn fn, 4562 void *priv) 4563 { 4564 union xfs_btree_rec *recp; 4565 union xfs_btree_key rec_key; 4566 int64_t diff; 4567 int stat; 4568 bool firstrec = true; 4569 int error; 4570 4571 ASSERT(cur->bc_ops->init_high_key_from_rec); 4572 ASSERT(cur->bc_ops->diff_two_keys); 4573 4574 /* 4575 * Find the leftmost record. The btree cursor must be set 4576 * to the low record used to generate low_key. 4577 */ 4578 stat = 0; 4579 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4580 if (error) 4581 goto out; 4582 4583 /* Nothing? See if there's anything to the right. */ 4584 if (!stat) { 4585 error = xfs_btree_increment(cur, 0, &stat); 4586 if (error) 4587 goto out; 4588 } 4589 4590 while (stat) { 4591 /* Find the record. */ 4592 error = xfs_btree_get_rec(cur, &recp, &stat); 4593 if (error || !stat) 4594 break; 4595 4596 /* Skip if high_key(rec) < low_key. */ 4597 if (firstrec) { 4598 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4599 firstrec = false; 4600 diff = cur->bc_ops->diff_two_keys(cur, low_key, 4601 &rec_key); 4602 if (diff > 0) 4603 goto advloop; 4604 } 4605 4606 /* Stop if high_key < low_key(rec). */ 4607 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4608 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4609 if (diff > 0) 4610 break; 4611 4612 /* Callback */ 4613 error = fn(cur, recp, priv); 4614 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) 4615 break; 4616 4617 advloop: 4618 /* Move on to the next record. */ 4619 error = xfs_btree_increment(cur, 0, &stat); 4620 if (error) 4621 break; 4622 } 4623 4624 out: 4625 return error; 4626 } 4627 4628 /* 4629 * Query an overlapped interval btree for all records overlapping a given 4630 * interval. This function roughly follows the algorithm given in 4631 * "Interval Trees" of _Introduction to Algorithms_, which is section 4632 * 14.3 in the 2nd and 3rd editions. 4633 * 4634 * First, generate keys for the low and high records passed in. 4635 * 4636 * For any leaf node, generate the high and low keys for the record. 4637 * If the record keys overlap with the query low/high keys, pass the 4638 * record to the function iterator. 4639 * 4640 * For any internal node, compare the low and high keys of each 4641 * pointer against the query low/high keys. If there's an overlap, 4642 * follow the pointer. 4643 * 4644 * As an optimization, we stop scanning a block when we find a low key 4645 * that is greater than the query's high key. 4646 */ 4647 STATIC int 4648 xfs_btree_overlapped_query_range( 4649 struct xfs_btree_cur *cur, 4650 union xfs_btree_key *low_key, 4651 union xfs_btree_key *high_key, 4652 xfs_btree_query_range_fn fn, 4653 void *priv) 4654 { 4655 union xfs_btree_ptr ptr; 4656 union xfs_btree_ptr *pp; 4657 union xfs_btree_key rec_key; 4658 union xfs_btree_key rec_hkey; 4659 union xfs_btree_key *lkp; 4660 union xfs_btree_key *hkp; 4661 union xfs_btree_rec *recp; 4662 struct xfs_btree_block *block; 4663 int64_t ldiff; 4664 int64_t hdiff; 4665 int level; 4666 struct xfs_buf *bp; 4667 int i; 4668 int error; 4669 4670 /* Load the root of the btree. */ 4671 level = cur->bc_nlevels - 1; 4672 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4673 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4674 if (error) 4675 return error; 4676 xfs_btree_get_block(cur, level, &bp); 4677 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4678 #ifdef DEBUG 4679 error = xfs_btree_check_block(cur, block, level, bp); 4680 if (error) 4681 goto out; 4682 #endif 4683 cur->bc_ptrs[level] = 1; 4684 4685 while (level < cur->bc_nlevels) { 4686 block = xfs_btree_get_block(cur, level, &bp); 4687 4688 /* End of node, pop back towards the root. */ 4689 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4690 pop_up: 4691 if (level < cur->bc_nlevels - 1) 4692 cur->bc_ptrs[level + 1]++; 4693 level++; 4694 continue; 4695 } 4696 4697 if (level == 0) { 4698 /* Handle a leaf node. */ 4699 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4700 4701 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4702 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4703 low_key); 4704 4705 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4706 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4707 &rec_key); 4708 4709 /* 4710 * If (record's high key >= query's low key) and 4711 * (query's high key >= record's low key), then 4712 * this record overlaps the query range; callback. 4713 */ 4714 if (ldiff >= 0 && hdiff >= 0) { 4715 error = fn(cur, recp, priv); 4716 if (error < 0 || 4717 error == XFS_BTREE_QUERY_RANGE_ABORT) 4718 break; 4719 } else if (hdiff < 0) { 4720 /* Record is larger than high key; pop. */ 4721 goto pop_up; 4722 } 4723 cur->bc_ptrs[level]++; 4724 continue; 4725 } 4726 4727 /* Handle an internal node. */ 4728 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4729 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4730 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4731 4732 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4733 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4734 4735 /* 4736 * If (pointer's high key >= query's low key) and 4737 * (query's high key >= pointer's low key), then 4738 * this record overlaps the query range; follow pointer. 4739 */ 4740 if (ldiff >= 0 && hdiff >= 0) { 4741 level--; 4742 error = xfs_btree_lookup_get_block(cur, level, pp, 4743 &block); 4744 if (error) 4745 goto out; 4746 xfs_btree_get_block(cur, level, &bp); 4747 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4748 #ifdef DEBUG 4749 error = xfs_btree_check_block(cur, block, level, bp); 4750 if (error) 4751 goto out; 4752 #endif 4753 cur->bc_ptrs[level] = 1; 4754 continue; 4755 } else if (hdiff < 0) { 4756 /* The low key is larger than the upper range; pop. */ 4757 goto pop_up; 4758 } 4759 cur->bc_ptrs[level]++; 4760 } 4761 4762 out: 4763 /* 4764 * If we don't end this function with the cursor pointing at a record 4765 * block, a subsequent non-error cursor deletion will not release 4766 * node-level buffers, causing a buffer leak. This is quite possible 4767 * with a zero-results range query, so release the buffers if we 4768 * failed to return any results. 4769 */ 4770 if (cur->bc_bufs[0] == NULL) { 4771 for (i = 0; i < cur->bc_nlevels; i++) { 4772 if (cur->bc_bufs[i]) { 4773 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4774 cur->bc_bufs[i] = NULL; 4775 cur->bc_ptrs[i] = 0; 4776 cur->bc_ra[i] = 0; 4777 } 4778 } 4779 } 4780 4781 return error; 4782 } 4783 4784 /* 4785 * Query a btree for all records overlapping a given interval of keys. The 4786 * supplied function will be called with each record found; return one of the 4787 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4788 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a 4789 * negative error code. 4790 */ 4791 int 4792 xfs_btree_query_range( 4793 struct xfs_btree_cur *cur, 4794 union xfs_btree_irec *low_rec, 4795 union xfs_btree_irec *high_rec, 4796 xfs_btree_query_range_fn fn, 4797 void *priv) 4798 { 4799 union xfs_btree_rec rec; 4800 union xfs_btree_key low_key; 4801 union xfs_btree_key high_key; 4802 4803 /* Find the keys of both ends of the interval. */ 4804 cur->bc_rec = *high_rec; 4805 cur->bc_ops->init_rec_from_cur(cur, &rec); 4806 cur->bc_ops->init_key_from_rec(&high_key, &rec); 4807 4808 cur->bc_rec = *low_rec; 4809 cur->bc_ops->init_rec_from_cur(cur, &rec); 4810 cur->bc_ops->init_key_from_rec(&low_key, &rec); 4811 4812 /* Enforce low key < high key. */ 4813 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4814 return -EINVAL; 4815 4816 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4817 return xfs_btree_simple_query_range(cur, &low_key, 4818 &high_key, fn, priv); 4819 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4820 fn, priv); 4821 } 4822 4823 /* Query a btree for all records. */ 4824 int 4825 xfs_btree_query_all( 4826 struct xfs_btree_cur *cur, 4827 xfs_btree_query_range_fn fn, 4828 void *priv) 4829 { 4830 union xfs_btree_key low_key; 4831 union xfs_btree_key high_key; 4832 4833 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 4834 memset(&low_key, 0, sizeof(low_key)); 4835 memset(&high_key, 0xFF, sizeof(high_key)); 4836 4837 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 4838 } 4839 4840 /* 4841 * Calculate the number of blocks needed to store a given number of records 4842 * in a short-format (per-AG metadata) btree. 4843 */ 4844 unsigned long long 4845 xfs_btree_calc_size( 4846 uint *limits, 4847 unsigned long long len) 4848 { 4849 int level; 4850 int maxrecs; 4851 unsigned long long rval; 4852 4853 maxrecs = limits[0]; 4854 for (level = 0, rval = 0; len > 1; level++) { 4855 len += maxrecs - 1; 4856 do_div(len, maxrecs); 4857 maxrecs = limits[1]; 4858 rval += len; 4859 } 4860 return rval; 4861 } 4862 4863 static int 4864 xfs_btree_count_blocks_helper( 4865 struct xfs_btree_cur *cur, 4866 int level, 4867 void *data) 4868 { 4869 xfs_extlen_t *blocks = data; 4870 (*blocks)++; 4871 4872 return 0; 4873 } 4874 4875 /* Count the blocks in a btree and return the result in *blocks. */ 4876 int 4877 xfs_btree_count_blocks( 4878 struct xfs_btree_cur *cur, 4879 xfs_extlen_t *blocks) 4880 { 4881 *blocks = 0; 4882 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4883 blocks); 4884 } 4885 4886 /* Compare two btree pointers. */ 4887 int64_t 4888 xfs_btree_diff_two_ptrs( 4889 struct xfs_btree_cur *cur, 4890 const union xfs_btree_ptr *a, 4891 const union xfs_btree_ptr *b) 4892 { 4893 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4894 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4895 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4896 } 4897 4898 /* If there's an extent, we're done. */ 4899 STATIC int 4900 xfs_btree_has_record_helper( 4901 struct xfs_btree_cur *cur, 4902 union xfs_btree_rec *rec, 4903 void *priv) 4904 { 4905 return XFS_BTREE_QUERY_RANGE_ABORT; 4906 } 4907 4908 /* Is there a record covering a given range of keys? */ 4909 int 4910 xfs_btree_has_record( 4911 struct xfs_btree_cur *cur, 4912 union xfs_btree_irec *low, 4913 union xfs_btree_irec *high, 4914 bool *exists) 4915 { 4916 int error; 4917 4918 error = xfs_btree_query_range(cur, low, high, 4919 &xfs_btree_has_record_helper, NULL); 4920 if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 4921 *exists = true; 4922 return 0; 4923 } 4924 *exists = false; 4925 return error; 4926 } 4927 4928 /* Are there more records in this btree? */ 4929 bool 4930 xfs_btree_has_more_records( 4931 struct xfs_btree_cur *cur) 4932 { 4933 struct xfs_btree_block *block; 4934 struct xfs_buf *bp; 4935 4936 block = xfs_btree_get_block(cur, 0, &bp); 4937 4938 /* There are still records in this block. */ 4939 if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) 4940 return true; 4941 4942 /* There are more record blocks. */ 4943 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4944 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); 4945 else 4946 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); 4947 } 4948