1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_inode.h" 28 #include "xfs_trans.h" 29 #include "xfs_inode_item.h" 30 #include "xfs_buf_item.h" 31 #include "xfs_btree.h" 32 #include "xfs_error.h" 33 #include "xfs_trace.h" 34 #include "xfs_cksum.h" 35 #include "xfs_alloc.h" 36 #include "xfs_log.h" 37 38 /* 39 * Cursor allocation zone. 40 */ 41 kmem_zone_t *xfs_btree_cur_zone; 42 43 /* 44 * Btree magic numbers. 45 */ 46 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 47 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 48 XFS_FIBT_MAGIC, 0 }, 49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 51 XFS_REFC_CRC_MAGIC } 52 }; 53 54 __uint32_t 55 xfs_btree_magic( 56 int crc, 57 xfs_btnum_t btnum) 58 { 59 __uint32_t magic = xfs_magics[crc][btnum]; 60 61 /* Ensure we asked for crc for crc-only magics. */ 62 ASSERT(magic != 0); 63 return magic; 64 } 65 66 STATIC int /* error (0 or EFSCORRUPTED) */ 67 xfs_btree_check_lblock( 68 struct xfs_btree_cur *cur, /* btree cursor */ 69 struct xfs_btree_block *block, /* btree long form block pointer */ 70 int level, /* level of the btree block */ 71 struct xfs_buf *bp) /* buffer for block, if any */ 72 { 73 int lblock_ok = 1; /* block passes checks */ 74 struct xfs_mount *mp; /* file system mount point */ 75 xfs_btnum_t btnum = cur->bc_btnum; 76 int crc; 77 78 mp = cur->bc_mp; 79 crc = xfs_sb_version_hascrc(&mp->m_sb); 80 81 if (crc) { 82 lblock_ok = lblock_ok && 83 uuid_equal(&block->bb_u.l.bb_uuid, 84 &mp->m_sb.sb_meta_uuid) && 85 block->bb_u.l.bb_blkno == cpu_to_be64( 86 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 87 } 88 89 lblock_ok = lblock_ok && 90 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 91 be16_to_cpu(block->bb_level) == level && 92 be16_to_cpu(block->bb_numrecs) <= 93 cur->bc_ops->get_maxrecs(cur, level) && 94 block->bb_u.l.bb_leftsib && 95 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || 96 XFS_FSB_SANITY_CHECK(mp, 97 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 98 block->bb_u.l.bb_rightsib && 99 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || 100 XFS_FSB_SANITY_CHECK(mp, 101 be64_to_cpu(block->bb_u.l.bb_rightsib))); 102 103 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 104 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 105 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 106 if (bp) 107 trace_xfs_btree_corrupt(bp, _RET_IP_); 108 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 109 return -EFSCORRUPTED; 110 } 111 return 0; 112 } 113 114 STATIC int /* error (0 or EFSCORRUPTED) */ 115 xfs_btree_check_sblock( 116 struct xfs_btree_cur *cur, /* btree cursor */ 117 struct xfs_btree_block *block, /* btree short form block pointer */ 118 int level, /* level of the btree block */ 119 struct xfs_buf *bp) /* buffer containing block */ 120 { 121 struct xfs_mount *mp; /* file system mount point */ 122 struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 123 struct xfs_agf *agf; /* ag. freespace structure */ 124 xfs_agblock_t agflen; /* native ag. freespace length */ 125 int sblock_ok = 1; /* block passes checks */ 126 xfs_btnum_t btnum = cur->bc_btnum; 127 int crc; 128 129 mp = cur->bc_mp; 130 crc = xfs_sb_version_hascrc(&mp->m_sb); 131 agbp = cur->bc_private.a.agbp; 132 agf = XFS_BUF_TO_AGF(agbp); 133 agflen = be32_to_cpu(agf->agf_length); 134 135 if (crc) { 136 sblock_ok = sblock_ok && 137 uuid_equal(&block->bb_u.s.bb_uuid, 138 &mp->m_sb.sb_meta_uuid) && 139 block->bb_u.s.bb_blkno == cpu_to_be64( 140 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 141 } 142 143 sblock_ok = sblock_ok && 144 be32_to_cpu(block->bb_magic) == xfs_btree_magic(crc, btnum) && 145 be16_to_cpu(block->bb_level) == level && 146 be16_to_cpu(block->bb_numrecs) <= 147 cur->bc_ops->get_maxrecs(cur, level) && 148 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 149 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 150 block->bb_u.s.bb_leftsib && 151 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 152 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 153 block->bb_u.s.bb_rightsib; 154 155 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, 156 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 157 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 158 if (bp) 159 trace_xfs_btree_corrupt(bp, _RET_IP_); 160 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 161 return -EFSCORRUPTED; 162 } 163 return 0; 164 } 165 166 /* 167 * Debug routine: check that block header is ok. 168 */ 169 int 170 xfs_btree_check_block( 171 struct xfs_btree_cur *cur, /* btree cursor */ 172 struct xfs_btree_block *block, /* generic btree block pointer */ 173 int level, /* level of the btree block */ 174 struct xfs_buf *bp) /* buffer containing block, if any */ 175 { 176 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 177 return xfs_btree_check_lblock(cur, block, level, bp); 178 else 179 return xfs_btree_check_sblock(cur, block, level, bp); 180 } 181 182 /* 183 * Check that (long) pointer is ok. 184 */ 185 int /* error (0 or EFSCORRUPTED) */ 186 xfs_btree_check_lptr( 187 struct xfs_btree_cur *cur, /* btree cursor */ 188 xfs_fsblock_t bno, /* btree block disk address */ 189 int level) /* btree block level */ 190 { 191 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 192 level > 0 && 193 bno != NULLFSBLOCK && 194 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 195 return 0; 196 } 197 198 #ifdef DEBUG 199 /* 200 * Check that (short) pointer is ok. 201 */ 202 STATIC int /* error (0 or EFSCORRUPTED) */ 203 xfs_btree_check_sptr( 204 struct xfs_btree_cur *cur, /* btree cursor */ 205 xfs_agblock_t bno, /* btree block disk address */ 206 int level) /* btree block level */ 207 { 208 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 209 210 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 211 level > 0 && 212 bno != NULLAGBLOCK && 213 bno != 0 && 214 bno < agblocks); 215 return 0; 216 } 217 218 /* 219 * Check that block ptr is ok. 220 */ 221 STATIC int /* error (0 or EFSCORRUPTED) */ 222 xfs_btree_check_ptr( 223 struct xfs_btree_cur *cur, /* btree cursor */ 224 union xfs_btree_ptr *ptr, /* btree block disk address */ 225 int index, /* offset from ptr to check */ 226 int level) /* btree block level */ 227 { 228 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 229 return xfs_btree_check_lptr(cur, 230 be64_to_cpu((&ptr->l)[index]), level); 231 } else { 232 return xfs_btree_check_sptr(cur, 233 be32_to_cpu((&ptr->s)[index]), level); 234 } 235 } 236 #endif 237 238 /* 239 * Calculate CRC on the whole btree block and stuff it into the 240 * long-form btree header. 241 * 242 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 243 * it into the buffer so recovery knows what the last modification was that made 244 * it to disk. 245 */ 246 void 247 xfs_btree_lblock_calc_crc( 248 struct xfs_buf *bp) 249 { 250 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 251 struct xfs_buf_log_item *bip = bp->b_fspriv; 252 253 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 254 return; 255 if (bip) 256 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 257 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 258 } 259 260 bool 261 xfs_btree_lblock_verify_crc( 262 struct xfs_buf *bp) 263 { 264 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 265 struct xfs_mount *mp = bp->b_target->bt_mount; 266 267 if (xfs_sb_version_hascrc(&mp->m_sb)) { 268 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 269 return false; 270 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 271 } 272 273 return true; 274 } 275 276 /* 277 * Calculate CRC on the whole btree block and stuff it into the 278 * short-form btree header. 279 * 280 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 281 * it into the buffer so recovery knows what the last modification was that made 282 * it to disk. 283 */ 284 void 285 xfs_btree_sblock_calc_crc( 286 struct xfs_buf *bp) 287 { 288 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 289 struct xfs_buf_log_item *bip = bp->b_fspriv; 290 291 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 292 return; 293 if (bip) 294 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 295 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 296 } 297 298 bool 299 xfs_btree_sblock_verify_crc( 300 struct xfs_buf *bp) 301 { 302 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 303 struct xfs_mount *mp = bp->b_target->bt_mount; 304 305 if (xfs_sb_version_hascrc(&mp->m_sb)) { 306 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 307 return false; 308 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 309 } 310 311 return true; 312 } 313 314 static int 315 xfs_btree_free_block( 316 struct xfs_btree_cur *cur, 317 struct xfs_buf *bp) 318 { 319 int error; 320 321 error = cur->bc_ops->free_block(cur, bp); 322 if (!error) { 323 xfs_trans_binval(cur->bc_tp, bp); 324 XFS_BTREE_STATS_INC(cur, free); 325 } 326 return error; 327 } 328 329 /* 330 * Delete the btree cursor. 331 */ 332 void 333 xfs_btree_del_cursor( 334 xfs_btree_cur_t *cur, /* btree cursor */ 335 int error) /* del because of error */ 336 { 337 int i; /* btree level */ 338 339 /* 340 * Clear the buffer pointers, and release the buffers. 341 * If we're doing this in the face of an error, we 342 * need to make sure to inspect all of the entries 343 * in the bc_bufs array for buffers to be unlocked. 344 * This is because some of the btree code works from 345 * level n down to 0, and if we get an error along 346 * the way we won't have initialized all the entries 347 * down to 0. 348 */ 349 for (i = 0; i < cur->bc_nlevels; i++) { 350 if (cur->bc_bufs[i]) 351 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 352 else if (!error) 353 break; 354 } 355 /* 356 * Can't free a bmap cursor without having dealt with the 357 * allocated indirect blocks' accounting. 358 */ 359 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 360 cur->bc_private.b.allocated == 0); 361 /* 362 * Free the cursor. 363 */ 364 kmem_zone_free(xfs_btree_cur_zone, cur); 365 } 366 367 /* 368 * Duplicate the btree cursor. 369 * Allocate a new one, copy the record, re-get the buffers. 370 */ 371 int /* error */ 372 xfs_btree_dup_cursor( 373 xfs_btree_cur_t *cur, /* input cursor */ 374 xfs_btree_cur_t **ncur) /* output cursor */ 375 { 376 xfs_buf_t *bp; /* btree block's buffer pointer */ 377 int error; /* error return value */ 378 int i; /* level number of btree block */ 379 xfs_mount_t *mp; /* mount structure for filesystem */ 380 xfs_btree_cur_t *new; /* new cursor value */ 381 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 382 383 tp = cur->bc_tp; 384 mp = cur->bc_mp; 385 386 /* 387 * Allocate a new cursor like the old one. 388 */ 389 new = cur->bc_ops->dup_cursor(cur); 390 391 /* 392 * Copy the record currently in the cursor. 393 */ 394 new->bc_rec = cur->bc_rec; 395 396 /* 397 * For each level current, re-get the buffer and copy the ptr value. 398 */ 399 for (i = 0; i < new->bc_nlevels; i++) { 400 new->bc_ptrs[i] = cur->bc_ptrs[i]; 401 new->bc_ra[i] = cur->bc_ra[i]; 402 bp = cur->bc_bufs[i]; 403 if (bp) { 404 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 405 XFS_BUF_ADDR(bp), mp->m_bsize, 406 0, &bp, 407 cur->bc_ops->buf_ops); 408 if (error) { 409 xfs_btree_del_cursor(new, error); 410 *ncur = NULL; 411 return error; 412 } 413 } 414 new->bc_bufs[i] = bp; 415 } 416 *ncur = new; 417 return 0; 418 } 419 420 /* 421 * XFS btree block layout and addressing: 422 * 423 * There are two types of blocks in the btree: leaf and non-leaf blocks. 424 * 425 * The leaf record start with a header then followed by records containing 426 * the values. A non-leaf block also starts with the same header, and 427 * then first contains lookup keys followed by an equal number of pointers 428 * to the btree blocks at the previous level. 429 * 430 * +--------+-------+-------+-------+-------+-------+-------+ 431 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 432 * +--------+-------+-------+-------+-------+-------+-------+ 433 * 434 * +--------+-------+-------+-------+-------+-------+-------+ 435 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 436 * +--------+-------+-------+-------+-------+-------+-------+ 437 * 438 * The header is called struct xfs_btree_block for reasons better left unknown 439 * and comes in different versions for short (32bit) and long (64bit) block 440 * pointers. The record and key structures are defined by the btree instances 441 * and opaque to the btree core. The block pointers are simple disk endian 442 * integers, available in a short (32bit) and long (64bit) variant. 443 * 444 * The helpers below calculate the offset of a given record, key or pointer 445 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 446 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 447 * inside the btree block is done using indices starting at one, not zero! 448 * 449 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 450 * overlapping intervals. In such a tree, records are still sorted lowest to 451 * highest and indexed by the smallest key value that refers to the record. 452 * However, nodes are different: each pointer has two associated keys -- one 453 * indexing the lowest key available in the block(s) below (the same behavior 454 * as the key in a regular btree) and another indexing the highest key 455 * available in the block(s) below. Because records are /not/ sorted by the 456 * highest key, all leaf block updates require us to compute the highest key 457 * that matches any record in the leaf and to recursively update the high keys 458 * in the nodes going further up in the tree, if necessary. Nodes look like 459 * this: 460 * 461 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 462 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 463 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 464 * 465 * To perform an interval query on an overlapped tree, perform the usual 466 * depth-first search and use the low and high keys to decide if we can skip 467 * that particular node. If a leaf node is reached, return the records that 468 * intersect the interval. Note that an interval query may return numerous 469 * entries. For a non-overlapped tree, simply search for the record associated 470 * with the lowest key and iterate forward until a non-matching record is 471 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 472 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 473 * more detail. 474 * 475 * Why do we care about overlapping intervals? Let's say you have a bunch of 476 * reverse mapping records on a reflink filesystem: 477 * 478 * 1: +- file A startblock B offset C length D -----------+ 479 * 2: +- file E startblock F offset G length H --------------+ 480 * 3: +- file I startblock F offset J length K --+ 481 * 4: +- file L... --+ 482 * 483 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 484 * we'd simply increment the length of record 1. But how do we find the record 485 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 486 * record 3 because the keys are ordered first by startblock. An interval 487 * query would return records 1 and 2 because they both overlap (B+D-1), and 488 * from that we can pick out record 1 as the appropriate left neighbor. 489 * 490 * In the non-overlapped case you can do a LE lookup and decrement the cursor 491 * because a record's interval must end before the next record. 492 */ 493 494 /* 495 * Return size of the btree block header for this btree instance. 496 */ 497 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 498 { 499 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 500 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 501 return XFS_BTREE_LBLOCK_CRC_LEN; 502 return XFS_BTREE_LBLOCK_LEN; 503 } 504 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 505 return XFS_BTREE_SBLOCK_CRC_LEN; 506 return XFS_BTREE_SBLOCK_LEN; 507 } 508 509 /* 510 * Return size of btree block pointers for this btree instance. 511 */ 512 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 513 { 514 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 515 sizeof(__be64) : sizeof(__be32); 516 } 517 518 /* 519 * Calculate offset of the n-th record in a btree block. 520 */ 521 STATIC size_t 522 xfs_btree_rec_offset( 523 struct xfs_btree_cur *cur, 524 int n) 525 { 526 return xfs_btree_block_len(cur) + 527 (n - 1) * cur->bc_ops->rec_len; 528 } 529 530 /* 531 * Calculate offset of the n-th key in a btree block. 532 */ 533 STATIC size_t 534 xfs_btree_key_offset( 535 struct xfs_btree_cur *cur, 536 int n) 537 { 538 return xfs_btree_block_len(cur) + 539 (n - 1) * cur->bc_ops->key_len; 540 } 541 542 /* 543 * Calculate offset of the n-th high key in a btree block. 544 */ 545 STATIC size_t 546 xfs_btree_high_key_offset( 547 struct xfs_btree_cur *cur, 548 int n) 549 { 550 return xfs_btree_block_len(cur) + 551 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 552 } 553 554 /* 555 * Calculate offset of the n-th block pointer in a btree block. 556 */ 557 STATIC size_t 558 xfs_btree_ptr_offset( 559 struct xfs_btree_cur *cur, 560 int n, 561 int level) 562 { 563 return xfs_btree_block_len(cur) + 564 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 565 (n - 1) * xfs_btree_ptr_len(cur); 566 } 567 568 /* 569 * Return a pointer to the n-th record in the btree block. 570 */ 571 STATIC union xfs_btree_rec * 572 xfs_btree_rec_addr( 573 struct xfs_btree_cur *cur, 574 int n, 575 struct xfs_btree_block *block) 576 { 577 return (union xfs_btree_rec *) 578 ((char *)block + xfs_btree_rec_offset(cur, n)); 579 } 580 581 /* 582 * Return a pointer to the n-th key in the btree block. 583 */ 584 STATIC union xfs_btree_key * 585 xfs_btree_key_addr( 586 struct xfs_btree_cur *cur, 587 int n, 588 struct xfs_btree_block *block) 589 { 590 return (union xfs_btree_key *) 591 ((char *)block + xfs_btree_key_offset(cur, n)); 592 } 593 594 /* 595 * Return a pointer to the n-th high key in the btree block. 596 */ 597 STATIC union xfs_btree_key * 598 xfs_btree_high_key_addr( 599 struct xfs_btree_cur *cur, 600 int n, 601 struct xfs_btree_block *block) 602 { 603 return (union xfs_btree_key *) 604 ((char *)block + xfs_btree_high_key_offset(cur, n)); 605 } 606 607 /* 608 * Return a pointer to the n-th block pointer in the btree block. 609 */ 610 STATIC union xfs_btree_ptr * 611 xfs_btree_ptr_addr( 612 struct xfs_btree_cur *cur, 613 int n, 614 struct xfs_btree_block *block) 615 { 616 int level = xfs_btree_get_level(block); 617 618 ASSERT(block->bb_level != 0); 619 620 return (union xfs_btree_ptr *) 621 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 622 } 623 624 /* 625 * Get the root block which is stored in the inode. 626 * 627 * For now this btree implementation assumes the btree root is always 628 * stored in the if_broot field of an inode fork. 629 */ 630 STATIC struct xfs_btree_block * 631 xfs_btree_get_iroot( 632 struct xfs_btree_cur *cur) 633 { 634 struct xfs_ifork *ifp; 635 636 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 637 return (struct xfs_btree_block *)ifp->if_broot; 638 } 639 640 /* 641 * Retrieve the block pointer from the cursor at the given level. 642 * This may be an inode btree root or from a buffer. 643 */ 644 STATIC struct xfs_btree_block * /* generic btree block pointer */ 645 xfs_btree_get_block( 646 struct xfs_btree_cur *cur, /* btree cursor */ 647 int level, /* level in btree */ 648 struct xfs_buf **bpp) /* buffer containing the block */ 649 { 650 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 651 (level == cur->bc_nlevels - 1)) { 652 *bpp = NULL; 653 return xfs_btree_get_iroot(cur); 654 } 655 656 *bpp = cur->bc_bufs[level]; 657 return XFS_BUF_TO_BLOCK(*bpp); 658 } 659 660 /* 661 * Get a buffer for the block, return it with no data read. 662 * Long-form addressing. 663 */ 664 xfs_buf_t * /* buffer for fsbno */ 665 xfs_btree_get_bufl( 666 xfs_mount_t *mp, /* file system mount point */ 667 xfs_trans_t *tp, /* transaction pointer */ 668 xfs_fsblock_t fsbno, /* file system block number */ 669 uint lock) /* lock flags for get_buf */ 670 { 671 xfs_daddr_t d; /* real disk block address */ 672 673 ASSERT(fsbno != NULLFSBLOCK); 674 d = XFS_FSB_TO_DADDR(mp, fsbno); 675 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 676 } 677 678 /* 679 * Get a buffer for the block, return it with no data read. 680 * Short-form addressing. 681 */ 682 xfs_buf_t * /* buffer for agno/agbno */ 683 xfs_btree_get_bufs( 684 xfs_mount_t *mp, /* file system mount point */ 685 xfs_trans_t *tp, /* transaction pointer */ 686 xfs_agnumber_t agno, /* allocation group number */ 687 xfs_agblock_t agbno, /* allocation group block number */ 688 uint lock) /* lock flags for get_buf */ 689 { 690 xfs_daddr_t d; /* real disk block address */ 691 692 ASSERT(agno != NULLAGNUMBER); 693 ASSERT(agbno != NULLAGBLOCK); 694 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 695 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 696 } 697 698 /* 699 * Check for the cursor referring to the last block at the given level. 700 */ 701 int /* 1=is last block, 0=not last block */ 702 xfs_btree_islastblock( 703 xfs_btree_cur_t *cur, /* btree cursor */ 704 int level) /* level to check */ 705 { 706 struct xfs_btree_block *block; /* generic btree block pointer */ 707 xfs_buf_t *bp; /* buffer containing block */ 708 709 block = xfs_btree_get_block(cur, level, &bp); 710 xfs_btree_check_block(cur, block, level, bp); 711 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 712 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 713 else 714 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 715 } 716 717 /* 718 * Change the cursor to point to the first record at the given level. 719 * Other levels are unaffected. 720 */ 721 STATIC int /* success=1, failure=0 */ 722 xfs_btree_firstrec( 723 xfs_btree_cur_t *cur, /* btree cursor */ 724 int level) /* level to change */ 725 { 726 struct xfs_btree_block *block; /* generic btree block pointer */ 727 xfs_buf_t *bp; /* buffer containing block */ 728 729 /* 730 * Get the block pointer for this level. 731 */ 732 block = xfs_btree_get_block(cur, level, &bp); 733 xfs_btree_check_block(cur, block, level, bp); 734 /* 735 * It's empty, there is no such record. 736 */ 737 if (!block->bb_numrecs) 738 return 0; 739 /* 740 * Set the ptr value to 1, that's the first record/key. 741 */ 742 cur->bc_ptrs[level] = 1; 743 return 1; 744 } 745 746 /* 747 * Change the cursor to point to the last record in the current block 748 * at the given level. Other levels are unaffected. 749 */ 750 STATIC int /* success=1, failure=0 */ 751 xfs_btree_lastrec( 752 xfs_btree_cur_t *cur, /* btree cursor */ 753 int level) /* level to change */ 754 { 755 struct xfs_btree_block *block; /* generic btree block pointer */ 756 xfs_buf_t *bp; /* buffer containing block */ 757 758 /* 759 * Get the block pointer for this level. 760 */ 761 block = xfs_btree_get_block(cur, level, &bp); 762 xfs_btree_check_block(cur, block, level, bp); 763 /* 764 * It's empty, there is no such record. 765 */ 766 if (!block->bb_numrecs) 767 return 0; 768 /* 769 * Set the ptr value to numrecs, that's the last record/key. 770 */ 771 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 772 return 1; 773 } 774 775 /* 776 * Compute first and last byte offsets for the fields given. 777 * Interprets the offsets table, which contains struct field offsets. 778 */ 779 void 780 xfs_btree_offsets( 781 __int64_t fields, /* bitmask of fields */ 782 const short *offsets, /* table of field offsets */ 783 int nbits, /* number of bits to inspect */ 784 int *first, /* output: first byte offset */ 785 int *last) /* output: last byte offset */ 786 { 787 int i; /* current bit number */ 788 __int64_t imask; /* mask for current bit number */ 789 790 ASSERT(fields != 0); 791 /* 792 * Find the lowest bit, so the first byte offset. 793 */ 794 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 795 if (imask & fields) { 796 *first = offsets[i]; 797 break; 798 } 799 } 800 /* 801 * Find the highest bit, so the last byte offset. 802 */ 803 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 804 if (imask & fields) { 805 *last = offsets[i + 1] - 1; 806 break; 807 } 808 } 809 } 810 811 /* 812 * Get a buffer for the block, return it read in. 813 * Long-form addressing. 814 */ 815 int 816 xfs_btree_read_bufl( 817 struct xfs_mount *mp, /* file system mount point */ 818 struct xfs_trans *tp, /* transaction pointer */ 819 xfs_fsblock_t fsbno, /* file system block number */ 820 uint lock, /* lock flags for read_buf */ 821 struct xfs_buf **bpp, /* buffer for fsbno */ 822 int refval, /* ref count value for buffer */ 823 const struct xfs_buf_ops *ops) 824 { 825 struct xfs_buf *bp; /* return value */ 826 xfs_daddr_t d; /* real disk block address */ 827 int error; 828 829 if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) 830 return -EFSCORRUPTED; 831 d = XFS_FSB_TO_DADDR(mp, fsbno); 832 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 833 mp->m_bsize, lock, &bp, ops); 834 if (error) 835 return error; 836 if (bp) 837 xfs_buf_set_ref(bp, refval); 838 *bpp = bp; 839 return 0; 840 } 841 842 /* 843 * Read-ahead the block, don't wait for it, don't return a buffer. 844 * Long-form addressing. 845 */ 846 /* ARGSUSED */ 847 void 848 xfs_btree_reada_bufl( 849 struct xfs_mount *mp, /* file system mount point */ 850 xfs_fsblock_t fsbno, /* file system block number */ 851 xfs_extlen_t count, /* count of filesystem blocks */ 852 const struct xfs_buf_ops *ops) 853 { 854 xfs_daddr_t d; 855 856 ASSERT(fsbno != NULLFSBLOCK); 857 d = XFS_FSB_TO_DADDR(mp, fsbno); 858 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 859 } 860 861 /* 862 * Read-ahead the block, don't wait for it, don't return a buffer. 863 * Short-form addressing. 864 */ 865 /* ARGSUSED */ 866 void 867 xfs_btree_reada_bufs( 868 struct xfs_mount *mp, /* file system mount point */ 869 xfs_agnumber_t agno, /* allocation group number */ 870 xfs_agblock_t agbno, /* allocation group block number */ 871 xfs_extlen_t count, /* count of filesystem blocks */ 872 const struct xfs_buf_ops *ops) 873 { 874 xfs_daddr_t d; 875 876 ASSERT(agno != NULLAGNUMBER); 877 ASSERT(agbno != NULLAGBLOCK); 878 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 879 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 880 } 881 882 STATIC int 883 xfs_btree_readahead_lblock( 884 struct xfs_btree_cur *cur, 885 int lr, 886 struct xfs_btree_block *block) 887 { 888 int rval = 0; 889 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 890 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 891 892 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 893 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 894 cur->bc_ops->buf_ops); 895 rval++; 896 } 897 898 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 899 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 900 cur->bc_ops->buf_ops); 901 rval++; 902 } 903 904 return rval; 905 } 906 907 STATIC int 908 xfs_btree_readahead_sblock( 909 struct xfs_btree_cur *cur, 910 int lr, 911 struct xfs_btree_block *block) 912 { 913 int rval = 0; 914 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 915 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 916 917 918 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 919 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 920 left, 1, cur->bc_ops->buf_ops); 921 rval++; 922 } 923 924 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 925 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 926 right, 1, cur->bc_ops->buf_ops); 927 rval++; 928 } 929 930 return rval; 931 } 932 933 /* 934 * Read-ahead btree blocks, at the given level. 935 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 936 */ 937 STATIC int 938 xfs_btree_readahead( 939 struct xfs_btree_cur *cur, /* btree cursor */ 940 int lev, /* level in btree */ 941 int lr) /* left/right bits */ 942 { 943 struct xfs_btree_block *block; 944 945 /* 946 * No readahead needed if we are at the root level and the 947 * btree root is stored in the inode. 948 */ 949 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 950 (lev == cur->bc_nlevels - 1)) 951 return 0; 952 953 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 954 return 0; 955 956 cur->bc_ra[lev] |= lr; 957 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 958 959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 960 return xfs_btree_readahead_lblock(cur, lr, block); 961 return xfs_btree_readahead_sblock(cur, lr, block); 962 } 963 964 STATIC xfs_daddr_t 965 xfs_btree_ptr_to_daddr( 966 struct xfs_btree_cur *cur, 967 union xfs_btree_ptr *ptr) 968 { 969 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 970 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); 971 972 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 973 } else { 974 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); 975 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); 976 977 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 978 be32_to_cpu(ptr->s)); 979 } 980 } 981 982 /* 983 * Readahead @count btree blocks at the given @ptr location. 984 * 985 * We don't need to care about long or short form btrees here as we have a 986 * method of converting the ptr directly to a daddr available to us. 987 */ 988 STATIC void 989 xfs_btree_readahead_ptr( 990 struct xfs_btree_cur *cur, 991 union xfs_btree_ptr *ptr, 992 xfs_extlen_t count) 993 { 994 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, 995 xfs_btree_ptr_to_daddr(cur, ptr), 996 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 997 } 998 999 /* 1000 * Set the buffer for level "lev" in the cursor to bp, releasing 1001 * any previous buffer. 1002 */ 1003 STATIC void 1004 xfs_btree_setbuf( 1005 xfs_btree_cur_t *cur, /* btree cursor */ 1006 int lev, /* level in btree */ 1007 xfs_buf_t *bp) /* new buffer to set */ 1008 { 1009 struct xfs_btree_block *b; /* btree block */ 1010 1011 if (cur->bc_bufs[lev]) 1012 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 1013 cur->bc_bufs[lev] = bp; 1014 cur->bc_ra[lev] = 0; 1015 1016 b = XFS_BUF_TO_BLOCK(bp); 1017 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1018 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1019 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1020 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1021 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1022 } else { 1023 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1024 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1025 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1026 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1027 } 1028 } 1029 1030 STATIC int 1031 xfs_btree_ptr_is_null( 1032 struct xfs_btree_cur *cur, 1033 union xfs_btree_ptr *ptr) 1034 { 1035 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1036 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1037 else 1038 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1039 } 1040 1041 STATIC void 1042 xfs_btree_set_ptr_null( 1043 struct xfs_btree_cur *cur, 1044 union xfs_btree_ptr *ptr) 1045 { 1046 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1047 ptr->l = cpu_to_be64(NULLFSBLOCK); 1048 else 1049 ptr->s = cpu_to_be32(NULLAGBLOCK); 1050 } 1051 1052 /* 1053 * Get/set/init sibling pointers 1054 */ 1055 STATIC void 1056 xfs_btree_get_sibling( 1057 struct xfs_btree_cur *cur, 1058 struct xfs_btree_block *block, 1059 union xfs_btree_ptr *ptr, 1060 int lr) 1061 { 1062 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1063 1064 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1065 if (lr == XFS_BB_RIGHTSIB) 1066 ptr->l = block->bb_u.l.bb_rightsib; 1067 else 1068 ptr->l = block->bb_u.l.bb_leftsib; 1069 } else { 1070 if (lr == XFS_BB_RIGHTSIB) 1071 ptr->s = block->bb_u.s.bb_rightsib; 1072 else 1073 ptr->s = block->bb_u.s.bb_leftsib; 1074 } 1075 } 1076 1077 STATIC void 1078 xfs_btree_set_sibling( 1079 struct xfs_btree_cur *cur, 1080 struct xfs_btree_block *block, 1081 union xfs_btree_ptr *ptr, 1082 int lr) 1083 { 1084 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1085 1086 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1087 if (lr == XFS_BB_RIGHTSIB) 1088 block->bb_u.l.bb_rightsib = ptr->l; 1089 else 1090 block->bb_u.l.bb_leftsib = ptr->l; 1091 } else { 1092 if (lr == XFS_BB_RIGHTSIB) 1093 block->bb_u.s.bb_rightsib = ptr->s; 1094 else 1095 block->bb_u.s.bb_leftsib = ptr->s; 1096 } 1097 } 1098 1099 void 1100 xfs_btree_init_block_int( 1101 struct xfs_mount *mp, 1102 struct xfs_btree_block *buf, 1103 xfs_daddr_t blkno, 1104 xfs_btnum_t btnum, 1105 __u16 level, 1106 __u16 numrecs, 1107 __u64 owner, 1108 unsigned int flags) 1109 { 1110 int crc = xfs_sb_version_hascrc(&mp->m_sb); 1111 __u32 magic = xfs_btree_magic(crc, btnum); 1112 1113 buf->bb_magic = cpu_to_be32(magic); 1114 buf->bb_level = cpu_to_be16(level); 1115 buf->bb_numrecs = cpu_to_be16(numrecs); 1116 1117 if (flags & XFS_BTREE_LONG_PTRS) { 1118 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1119 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1120 if (crc) { 1121 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1122 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1123 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1124 buf->bb_u.l.bb_pad = 0; 1125 buf->bb_u.l.bb_lsn = 0; 1126 } 1127 } else { 1128 /* owner is a 32 bit value on short blocks */ 1129 __u32 __owner = (__u32)owner; 1130 1131 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1132 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1133 if (crc) { 1134 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1135 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1136 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1137 buf->bb_u.s.bb_lsn = 0; 1138 } 1139 } 1140 } 1141 1142 void 1143 xfs_btree_init_block( 1144 struct xfs_mount *mp, 1145 struct xfs_buf *bp, 1146 xfs_btnum_t btnum, 1147 __u16 level, 1148 __u16 numrecs, 1149 __u64 owner, 1150 unsigned int flags) 1151 { 1152 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1153 btnum, level, numrecs, owner, flags); 1154 } 1155 1156 STATIC void 1157 xfs_btree_init_block_cur( 1158 struct xfs_btree_cur *cur, 1159 struct xfs_buf *bp, 1160 int level, 1161 int numrecs) 1162 { 1163 __u64 owner; 1164 1165 /* 1166 * we can pull the owner from the cursor right now as the different 1167 * owners align directly with the pointer size of the btree. This may 1168 * change in future, but is safe for current users of the generic btree 1169 * code. 1170 */ 1171 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1172 owner = cur->bc_private.b.ip->i_ino; 1173 else 1174 owner = cur->bc_private.a.agno; 1175 1176 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1177 cur->bc_btnum, level, numrecs, 1178 owner, cur->bc_flags); 1179 } 1180 1181 /* 1182 * Return true if ptr is the last record in the btree and 1183 * we need to track updates to this record. The decision 1184 * will be further refined in the update_lastrec method. 1185 */ 1186 STATIC int 1187 xfs_btree_is_lastrec( 1188 struct xfs_btree_cur *cur, 1189 struct xfs_btree_block *block, 1190 int level) 1191 { 1192 union xfs_btree_ptr ptr; 1193 1194 if (level > 0) 1195 return 0; 1196 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1197 return 0; 1198 1199 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1200 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1201 return 0; 1202 return 1; 1203 } 1204 1205 STATIC void 1206 xfs_btree_buf_to_ptr( 1207 struct xfs_btree_cur *cur, 1208 struct xfs_buf *bp, 1209 union xfs_btree_ptr *ptr) 1210 { 1211 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1212 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1213 XFS_BUF_ADDR(bp))); 1214 else { 1215 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1216 XFS_BUF_ADDR(bp))); 1217 } 1218 } 1219 1220 STATIC void 1221 xfs_btree_set_refs( 1222 struct xfs_btree_cur *cur, 1223 struct xfs_buf *bp) 1224 { 1225 switch (cur->bc_btnum) { 1226 case XFS_BTNUM_BNO: 1227 case XFS_BTNUM_CNT: 1228 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1229 break; 1230 case XFS_BTNUM_INO: 1231 case XFS_BTNUM_FINO: 1232 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1233 break; 1234 case XFS_BTNUM_BMAP: 1235 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1236 break; 1237 case XFS_BTNUM_RMAP: 1238 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1239 break; 1240 case XFS_BTNUM_REFC: 1241 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1242 break; 1243 default: 1244 ASSERT(0); 1245 } 1246 } 1247 1248 STATIC int 1249 xfs_btree_get_buf_block( 1250 struct xfs_btree_cur *cur, 1251 union xfs_btree_ptr *ptr, 1252 int flags, 1253 struct xfs_btree_block **block, 1254 struct xfs_buf **bpp) 1255 { 1256 struct xfs_mount *mp = cur->bc_mp; 1257 xfs_daddr_t d; 1258 1259 /* need to sort out how callers deal with failures first */ 1260 ASSERT(!(flags & XBF_TRYLOCK)); 1261 1262 d = xfs_btree_ptr_to_daddr(cur, ptr); 1263 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1264 mp->m_bsize, flags); 1265 1266 if (!*bpp) 1267 return -ENOMEM; 1268 1269 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1270 *block = XFS_BUF_TO_BLOCK(*bpp); 1271 return 0; 1272 } 1273 1274 /* 1275 * Read in the buffer at the given ptr and return the buffer and 1276 * the block pointer within the buffer. 1277 */ 1278 STATIC int 1279 xfs_btree_read_buf_block( 1280 struct xfs_btree_cur *cur, 1281 union xfs_btree_ptr *ptr, 1282 int flags, 1283 struct xfs_btree_block **block, 1284 struct xfs_buf **bpp) 1285 { 1286 struct xfs_mount *mp = cur->bc_mp; 1287 xfs_daddr_t d; 1288 int error; 1289 1290 /* need to sort out how callers deal with failures first */ 1291 ASSERT(!(flags & XBF_TRYLOCK)); 1292 1293 d = xfs_btree_ptr_to_daddr(cur, ptr); 1294 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1295 mp->m_bsize, flags, bpp, 1296 cur->bc_ops->buf_ops); 1297 if (error) 1298 return error; 1299 1300 xfs_btree_set_refs(cur, *bpp); 1301 *block = XFS_BUF_TO_BLOCK(*bpp); 1302 return 0; 1303 } 1304 1305 /* 1306 * Copy keys from one btree block to another. 1307 */ 1308 STATIC void 1309 xfs_btree_copy_keys( 1310 struct xfs_btree_cur *cur, 1311 union xfs_btree_key *dst_key, 1312 union xfs_btree_key *src_key, 1313 int numkeys) 1314 { 1315 ASSERT(numkeys >= 0); 1316 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1317 } 1318 1319 /* 1320 * Copy records from one btree block to another. 1321 */ 1322 STATIC void 1323 xfs_btree_copy_recs( 1324 struct xfs_btree_cur *cur, 1325 union xfs_btree_rec *dst_rec, 1326 union xfs_btree_rec *src_rec, 1327 int numrecs) 1328 { 1329 ASSERT(numrecs >= 0); 1330 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1331 } 1332 1333 /* 1334 * Copy block pointers from one btree block to another. 1335 */ 1336 STATIC void 1337 xfs_btree_copy_ptrs( 1338 struct xfs_btree_cur *cur, 1339 union xfs_btree_ptr *dst_ptr, 1340 union xfs_btree_ptr *src_ptr, 1341 int numptrs) 1342 { 1343 ASSERT(numptrs >= 0); 1344 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1345 } 1346 1347 /* 1348 * Shift keys one index left/right inside a single btree block. 1349 */ 1350 STATIC void 1351 xfs_btree_shift_keys( 1352 struct xfs_btree_cur *cur, 1353 union xfs_btree_key *key, 1354 int dir, 1355 int numkeys) 1356 { 1357 char *dst_key; 1358 1359 ASSERT(numkeys >= 0); 1360 ASSERT(dir == 1 || dir == -1); 1361 1362 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1363 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1364 } 1365 1366 /* 1367 * Shift records one index left/right inside a single btree block. 1368 */ 1369 STATIC void 1370 xfs_btree_shift_recs( 1371 struct xfs_btree_cur *cur, 1372 union xfs_btree_rec *rec, 1373 int dir, 1374 int numrecs) 1375 { 1376 char *dst_rec; 1377 1378 ASSERT(numrecs >= 0); 1379 ASSERT(dir == 1 || dir == -1); 1380 1381 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1382 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1383 } 1384 1385 /* 1386 * Shift block pointers one index left/right inside a single btree block. 1387 */ 1388 STATIC void 1389 xfs_btree_shift_ptrs( 1390 struct xfs_btree_cur *cur, 1391 union xfs_btree_ptr *ptr, 1392 int dir, 1393 int numptrs) 1394 { 1395 char *dst_ptr; 1396 1397 ASSERT(numptrs >= 0); 1398 ASSERT(dir == 1 || dir == -1); 1399 1400 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1401 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1402 } 1403 1404 /* 1405 * Log key values from the btree block. 1406 */ 1407 STATIC void 1408 xfs_btree_log_keys( 1409 struct xfs_btree_cur *cur, 1410 struct xfs_buf *bp, 1411 int first, 1412 int last) 1413 { 1414 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1415 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1416 1417 if (bp) { 1418 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1419 xfs_trans_log_buf(cur->bc_tp, bp, 1420 xfs_btree_key_offset(cur, first), 1421 xfs_btree_key_offset(cur, last + 1) - 1); 1422 } else { 1423 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1424 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1425 } 1426 1427 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1428 } 1429 1430 /* 1431 * Log record values from the btree block. 1432 */ 1433 void 1434 xfs_btree_log_recs( 1435 struct xfs_btree_cur *cur, 1436 struct xfs_buf *bp, 1437 int first, 1438 int last) 1439 { 1440 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1441 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1442 1443 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1444 xfs_trans_log_buf(cur->bc_tp, bp, 1445 xfs_btree_rec_offset(cur, first), 1446 xfs_btree_rec_offset(cur, last + 1) - 1); 1447 1448 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1449 } 1450 1451 /* 1452 * Log block pointer fields from a btree block (nonleaf). 1453 */ 1454 STATIC void 1455 xfs_btree_log_ptrs( 1456 struct xfs_btree_cur *cur, /* btree cursor */ 1457 struct xfs_buf *bp, /* buffer containing btree block */ 1458 int first, /* index of first pointer to log */ 1459 int last) /* index of last pointer to log */ 1460 { 1461 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1462 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1463 1464 if (bp) { 1465 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1466 int level = xfs_btree_get_level(block); 1467 1468 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1469 xfs_trans_log_buf(cur->bc_tp, bp, 1470 xfs_btree_ptr_offset(cur, first, level), 1471 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1472 } else { 1473 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1474 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1475 } 1476 1477 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1478 } 1479 1480 /* 1481 * Log fields from a btree block header. 1482 */ 1483 void 1484 xfs_btree_log_block( 1485 struct xfs_btree_cur *cur, /* btree cursor */ 1486 struct xfs_buf *bp, /* buffer containing btree block */ 1487 int fields) /* mask of fields: XFS_BB_... */ 1488 { 1489 int first; /* first byte offset logged */ 1490 int last; /* last byte offset logged */ 1491 static const short soffsets[] = { /* table of offsets (short) */ 1492 offsetof(struct xfs_btree_block, bb_magic), 1493 offsetof(struct xfs_btree_block, bb_level), 1494 offsetof(struct xfs_btree_block, bb_numrecs), 1495 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1496 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1497 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1498 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1499 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1500 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1501 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1502 XFS_BTREE_SBLOCK_CRC_LEN 1503 }; 1504 static const short loffsets[] = { /* table of offsets (long) */ 1505 offsetof(struct xfs_btree_block, bb_magic), 1506 offsetof(struct xfs_btree_block, bb_level), 1507 offsetof(struct xfs_btree_block, bb_numrecs), 1508 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1509 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1510 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1511 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1512 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1513 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1514 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1515 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1516 XFS_BTREE_LBLOCK_CRC_LEN 1517 }; 1518 1519 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1520 XFS_BTREE_TRACE_ARGBI(cur, bp, fields); 1521 1522 if (bp) { 1523 int nbits; 1524 1525 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1526 /* 1527 * We don't log the CRC when updating a btree 1528 * block but instead recreate it during log 1529 * recovery. As the log buffers have checksums 1530 * of their own this is safe and avoids logging a crc 1531 * update in a lot of places. 1532 */ 1533 if (fields == XFS_BB_ALL_BITS) 1534 fields = XFS_BB_ALL_BITS_CRC; 1535 nbits = XFS_BB_NUM_BITS_CRC; 1536 } else { 1537 nbits = XFS_BB_NUM_BITS; 1538 } 1539 xfs_btree_offsets(fields, 1540 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1541 loffsets : soffsets, 1542 nbits, &first, &last); 1543 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1544 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1545 } else { 1546 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1547 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1548 } 1549 1550 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1551 } 1552 1553 /* 1554 * Increment cursor by one record at the level. 1555 * For nonzero levels the leaf-ward information is untouched. 1556 */ 1557 int /* error */ 1558 xfs_btree_increment( 1559 struct xfs_btree_cur *cur, 1560 int level, 1561 int *stat) /* success/failure */ 1562 { 1563 struct xfs_btree_block *block; 1564 union xfs_btree_ptr ptr; 1565 struct xfs_buf *bp; 1566 int error; /* error return value */ 1567 int lev; 1568 1569 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1570 XFS_BTREE_TRACE_ARGI(cur, level); 1571 1572 ASSERT(level < cur->bc_nlevels); 1573 1574 /* Read-ahead to the right at this level. */ 1575 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1576 1577 /* Get a pointer to the btree block. */ 1578 block = xfs_btree_get_block(cur, level, &bp); 1579 1580 #ifdef DEBUG 1581 error = xfs_btree_check_block(cur, block, level, bp); 1582 if (error) 1583 goto error0; 1584 #endif 1585 1586 /* We're done if we remain in the block after the increment. */ 1587 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1588 goto out1; 1589 1590 /* Fail if we just went off the right edge of the tree. */ 1591 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1592 if (xfs_btree_ptr_is_null(cur, &ptr)) 1593 goto out0; 1594 1595 XFS_BTREE_STATS_INC(cur, increment); 1596 1597 /* 1598 * March up the tree incrementing pointers. 1599 * Stop when we don't go off the right edge of a block. 1600 */ 1601 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1602 block = xfs_btree_get_block(cur, lev, &bp); 1603 1604 #ifdef DEBUG 1605 error = xfs_btree_check_block(cur, block, lev, bp); 1606 if (error) 1607 goto error0; 1608 #endif 1609 1610 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1611 break; 1612 1613 /* Read-ahead the right block for the next loop. */ 1614 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1615 } 1616 1617 /* 1618 * If we went off the root then we are either seriously 1619 * confused or have the tree root in an inode. 1620 */ 1621 if (lev == cur->bc_nlevels) { 1622 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1623 goto out0; 1624 ASSERT(0); 1625 error = -EFSCORRUPTED; 1626 goto error0; 1627 } 1628 ASSERT(lev < cur->bc_nlevels); 1629 1630 /* 1631 * Now walk back down the tree, fixing up the cursor's buffer 1632 * pointers and key numbers. 1633 */ 1634 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1635 union xfs_btree_ptr *ptrp; 1636 1637 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1638 --lev; 1639 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1640 if (error) 1641 goto error0; 1642 1643 xfs_btree_setbuf(cur, lev, bp); 1644 cur->bc_ptrs[lev] = 1; 1645 } 1646 out1: 1647 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1648 *stat = 1; 1649 return 0; 1650 1651 out0: 1652 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1653 *stat = 0; 1654 return 0; 1655 1656 error0: 1657 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1658 return error; 1659 } 1660 1661 /* 1662 * Decrement cursor by one record at the level. 1663 * For nonzero levels the leaf-ward information is untouched. 1664 */ 1665 int /* error */ 1666 xfs_btree_decrement( 1667 struct xfs_btree_cur *cur, 1668 int level, 1669 int *stat) /* success/failure */ 1670 { 1671 struct xfs_btree_block *block; 1672 xfs_buf_t *bp; 1673 int error; /* error return value */ 1674 int lev; 1675 union xfs_btree_ptr ptr; 1676 1677 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1678 XFS_BTREE_TRACE_ARGI(cur, level); 1679 1680 ASSERT(level < cur->bc_nlevels); 1681 1682 /* Read-ahead to the left at this level. */ 1683 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1684 1685 /* We're done if we remain in the block after the decrement. */ 1686 if (--cur->bc_ptrs[level] > 0) 1687 goto out1; 1688 1689 /* Get a pointer to the btree block. */ 1690 block = xfs_btree_get_block(cur, level, &bp); 1691 1692 #ifdef DEBUG 1693 error = xfs_btree_check_block(cur, block, level, bp); 1694 if (error) 1695 goto error0; 1696 #endif 1697 1698 /* Fail if we just went off the left edge of the tree. */ 1699 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1700 if (xfs_btree_ptr_is_null(cur, &ptr)) 1701 goto out0; 1702 1703 XFS_BTREE_STATS_INC(cur, decrement); 1704 1705 /* 1706 * March up the tree decrementing pointers. 1707 * Stop when we don't go off the left edge of a block. 1708 */ 1709 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1710 if (--cur->bc_ptrs[lev] > 0) 1711 break; 1712 /* Read-ahead the left block for the next loop. */ 1713 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1714 } 1715 1716 /* 1717 * If we went off the root then we are seriously confused. 1718 * or the root of the tree is in an inode. 1719 */ 1720 if (lev == cur->bc_nlevels) { 1721 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1722 goto out0; 1723 ASSERT(0); 1724 error = -EFSCORRUPTED; 1725 goto error0; 1726 } 1727 ASSERT(lev < cur->bc_nlevels); 1728 1729 /* 1730 * Now walk back down the tree, fixing up the cursor's buffer 1731 * pointers and key numbers. 1732 */ 1733 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1734 union xfs_btree_ptr *ptrp; 1735 1736 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1737 --lev; 1738 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1739 if (error) 1740 goto error0; 1741 xfs_btree_setbuf(cur, lev, bp); 1742 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1743 } 1744 out1: 1745 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1746 *stat = 1; 1747 return 0; 1748 1749 out0: 1750 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1751 *stat = 0; 1752 return 0; 1753 1754 error0: 1755 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1756 return error; 1757 } 1758 1759 STATIC int 1760 xfs_btree_lookup_get_block( 1761 struct xfs_btree_cur *cur, /* btree cursor */ 1762 int level, /* level in the btree */ 1763 union xfs_btree_ptr *pp, /* ptr to btree block */ 1764 struct xfs_btree_block **blkp) /* return btree block */ 1765 { 1766 struct xfs_buf *bp; /* buffer pointer for btree block */ 1767 int error = 0; 1768 1769 /* special case the root block if in an inode */ 1770 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1771 (level == cur->bc_nlevels - 1)) { 1772 *blkp = xfs_btree_get_iroot(cur); 1773 return 0; 1774 } 1775 1776 /* 1777 * If the old buffer at this level for the disk address we are 1778 * looking for re-use it. 1779 * 1780 * Otherwise throw it away and get a new one. 1781 */ 1782 bp = cur->bc_bufs[level]; 1783 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { 1784 *blkp = XFS_BUF_TO_BLOCK(bp); 1785 return 0; 1786 } 1787 1788 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1789 if (error) 1790 return error; 1791 1792 /* Check the inode owner since the verifiers don't. */ 1793 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1794 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1795 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1796 cur->bc_private.b.ip->i_ino) 1797 goto out_bad; 1798 1799 /* Did we get the level we were looking for? */ 1800 if (be16_to_cpu((*blkp)->bb_level) != level) 1801 goto out_bad; 1802 1803 /* Check that internal nodes have at least one record. */ 1804 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1805 goto out_bad; 1806 1807 xfs_btree_setbuf(cur, level, bp); 1808 return 0; 1809 1810 out_bad: 1811 *blkp = NULL; 1812 xfs_trans_brelse(cur->bc_tp, bp); 1813 return -EFSCORRUPTED; 1814 } 1815 1816 /* 1817 * Get current search key. For level 0 we don't actually have a key 1818 * structure so we make one up from the record. For all other levels 1819 * we just return the right key. 1820 */ 1821 STATIC union xfs_btree_key * 1822 xfs_lookup_get_search_key( 1823 struct xfs_btree_cur *cur, 1824 int level, 1825 int keyno, 1826 struct xfs_btree_block *block, 1827 union xfs_btree_key *kp) 1828 { 1829 if (level == 0) { 1830 cur->bc_ops->init_key_from_rec(kp, 1831 xfs_btree_rec_addr(cur, keyno, block)); 1832 return kp; 1833 } 1834 1835 return xfs_btree_key_addr(cur, keyno, block); 1836 } 1837 1838 /* 1839 * Lookup the record. The cursor is made to point to it, based on dir. 1840 * stat is set to 0 if can't find any such record, 1 for success. 1841 */ 1842 int /* error */ 1843 xfs_btree_lookup( 1844 struct xfs_btree_cur *cur, /* btree cursor */ 1845 xfs_lookup_t dir, /* <=, ==, or >= */ 1846 int *stat) /* success/failure */ 1847 { 1848 struct xfs_btree_block *block; /* current btree block */ 1849 __int64_t diff; /* difference for the current key */ 1850 int error; /* error return value */ 1851 int keyno; /* current key number */ 1852 int level; /* level in the btree */ 1853 union xfs_btree_ptr *pp; /* ptr to btree block */ 1854 union xfs_btree_ptr ptr; /* ptr to btree block */ 1855 1856 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1857 XFS_BTREE_TRACE_ARGI(cur, dir); 1858 1859 XFS_BTREE_STATS_INC(cur, lookup); 1860 1861 /* No such thing as a zero-level tree. */ 1862 if (cur->bc_nlevels == 0) 1863 return -EFSCORRUPTED; 1864 1865 block = NULL; 1866 keyno = 0; 1867 1868 /* initialise start pointer from cursor */ 1869 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1870 pp = &ptr; 1871 1872 /* 1873 * Iterate over each level in the btree, starting at the root. 1874 * For each level above the leaves, find the key we need, based 1875 * on the lookup record, then follow the corresponding block 1876 * pointer down to the next level. 1877 */ 1878 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1879 /* Get the block we need to do the lookup on. */ 1880 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1881 if (error) 1882 goto error0; 1883 1884 if (diff == 0) { 1885 /* 1886 * If we already had a key match at a higher level, we 1887 * know we need to use the first entry in this block. 1888 */ 1889 keyno = 1; 1890 } else { 1891 /* Otherwise search this block. Do a binary search. */ 1892 1893 int high; /* high entry number */ 1894 int low; /* low entry number */ 1895 1896 /* Set low and high entry numbers, 1-based. */ 1897 low = 1; 1898 high = xfs_btree_get_numrecs(block); 1899 if (!high) { 1900 /* Block is empty, must be an empty leaf. */ 1901 ASSERT(level == 0 && cur->bc_nlevels == 1); 1902 1903 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1904 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1905 *stat = 0; 1906 return 0; 1907 } 1908 1909 /* Binary search the block. */ 1910 while (low <= high) { 1911 union xfs_btree_key key; 1912 union xfs_btree_key *kp; 1913 1914 XFS_BTREE_STATS_INC(cur, compare); 1915 1916 /* keyno is average of low and high. */ 1917 keyno = (low + high) >> 1; 1918 1919 /* Get current search key */ 1920 kp = xfs_lookup_get_search_key(cur, level, 1921 keyno, block, &key); 1922 1923 /* 1924 * Compute difference to get next direction: 1925 * - less than, move right 1926 * - greater than, move left 1927 * - equal, we're done 1928 */ 1929 diff = cur->bc_ops->key_diff(cur, kp); 1930 if (diff < 0) 1931 low = keyno + 1; 1932 else if (diff > 0) 1933 high = keyno - 1; 1934 else 1935 break; 1936 } 1937 } 1938 1939 /* 1940 * If there are more levels, set up for the next level 1941 * by getting the block number and filling in the cursor. 1942 */ 1943 if (level > 0) { 1944 /* 1945 * If we moved left, need the previous key number, 1946 * unless there isn't one. 1947 */ 1948 if (diff > 0 && --keyno < 1) 1949 keyno = 1; 1950 pp = xfs_btree_ptr_addr(cur, keyno, block); 1951 1952 #ifdef DEBUG 1953 error = xfs_btree_check_ptr(cur, pp, 0, level); 1954 if (error) 1955 goto error0; 1956 #endif 1957 cur->bc_ptrs[level] = keyno; 1958 } 1959 } 1960 1961 /* Done with the search. See if we need to adjust the results. */ 1962 if (dir != XFS_LOOKUP_LE && diff < 0) { 1963 keyno++; 1964 /* 1965 * If ge search and we went off the end of the block, but it's 1966 * not the last block, we're in the wrong block. 1967 */ 1968 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1969 if (dir == XFS_LOOKUP_GE && 1970 keyno > xfs_btree_get_numrecs(block) && 1971 !xfs_btree_ptr_is_null(cur, &ptr)) { 1972 int i; 1973 1974 cur->bc_ptrs[0] = keyno; 1975 error = xfs_btree_increment(cur, 0, &i); 1976 if (error) 1977 goto error0; 1978 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 1979 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1980 *stat = 1; 1981 return 0; 1982 } 1983 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1984 keyno--; 1985 cur->bc_ptrs[0] = keyno; 1986 1987 /* Return if we succeeded or not. */ 1988 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 1989 *stat = 0; 1990 else if (dir != XFS_LOOKUP_EQ || diff == 0) 1991 *stat = 1; 1992 else 1993 *stat = 0; 1994 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1995 return 0; 1996 1997 error0: 1998 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1999 return error; 2000 } 2001 2002 /* Find the high key storage area from a regular key. */ 2003 STATIC union xfs_btree_key * 2004 xfs_btree_high_key_from_key( 2005 struct xfs_btree_cur *cur, 2006 union xfs_btree_key *key) 2007 { 2008 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2009 return (union xfs_btree_key *)((char *)key + 2010 (cur->bc_ops->key_len / 2)); 2011 } 2012 2013 /* Determine the low (and high if overlapped) keys of a leaf block */ 2014 STATIC void 2015 xfs_btree_get_leaf_keys( 2016 struct xfs_btree_cur *cur, 2017 struct xfs_btree_block *block, 2018 union xfs_btree_key *key) 2019 { 2020 union xfs_btree_key max_hkey; 2021 union xfs_btree_key hkey; 2022 union xfs_btree_rec *rec; 2023 union xfs_btree_key *high; 2024 int n; 2025 2026 rec = xfs_btree_rec_addr(cur, 1, block); 2027 cur->bc_ops->init_key_from_rec(key, rec); 2028 2029 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2030 2031 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2032 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2033 rec = xfs_btree_rec_addr(cur, n, block); 2034 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2035 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 2036 > 0) 2037 max_hkey = hkey; 2038 } 2039 2040 high = xfs_btree_high_key_from_key(cur, key); 2041 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2042 } 2043 } 2044 2045 /* Determine the low (and high if overlapped) keys of a node block */ 2046 STATIC void 2047 xfs_btree_get_node_keys( 2048 struct xfs_btree_cur *cur, 2049 struct xfs_btree_block *block, 2050 union xfs_btree_key *key) 2051 { 2052 union xfs_btree_key *hkey; 2053 union xfs_btree_key *max_hkey; 2054 union xfs_btree_key *high; 2055 int n; 2056 2057 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2058 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2059 cur->bc_ops->key_len / 2); 2060 2061 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2062 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2063 hkey = xfs_btree_high_key_addr(cur, n, block); 2064 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 2065 max_hkey = hkey; 2066 } 2067 2068 high = xfs_btree_high_key_from_key(cur, key); 2069 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2070 } else { 2071 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2072 cur->bc_ops->key_len); 2073 } 2074 } 2075 2076 /* Derive the keys for any btree block. */ 2077 STATIC void 2078 xfs_btree_get_keys( 2079 struct xfs_btree_cur *cur, 2080 struct xfs_btree_block *block, 2081 union xfs_btree_key *key) 2082 { 2083 if (be16_to_cpu(block->bb_level) == 0) 2084 xfs_btree_get_leaf_keys(cur, block, key); 2085 else 2086 xfs_btree_get_node_keys(cur, block, key); 2087 } 2088 2089 /* 2090 * Decide if we need to update the parent keys of a btree block. For 2091 * a standard btree this is only necessary if we're updating the first 2092 * record/key. For an overlapping btree, we must always update the 2093 * keys because the highest key can be in any of the records or keys 2094 * in the block. 2095 */ 2096 static inline bool 2097 xfs_btree_needs_key_update( 2098 struct xfs_btree_cur *cur, 2099 int ptr) 2100 { 2101 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2102 } 2103 2104 /* 2105 * Update the low and high parent keys of the given level, progressing 2106 * towards the root. If force_all is false, stop if the keys for a given 2107 * level do not need updating. 2108 */ 2109 STATIC int 2110 __xfs_btree_updkeys( 2111 struct xfs_btree_cur *cur, 2112 int level, 2113 struct xfs_btree_block *block, 2114 struct xfs_buf *bp0, 2115 bool force_all) 2116 { 2117 union xfs_btree_key key; /* keys from current level */ 2118 union xfs_btree_key *lkey; /* keys from the next level up */ 2119 union xfs_btree_key *hkey; 2120 union xfs_btree_key *nlkey; /* keys from the next level up */ 2121 union xfs_btree_key *nhkey; 2122 struct xfs_buf *bp; 2123 int ptr; 2124 2125 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2126 2127 /* Exit if there aren't any parent levels to update. */ 2128 if (level + 1 >= cur->bc_nlevels) 2129 return 0; 2130 2131 trace_xfs_btree_updkeys(cur, level, bp0); 2132 2133 lkey = &key; 2134 hkey = xfs_btree_high_key_from_key(cur, lkey); 2135 xfs_btree_get_keys(cur, block, lkey); 2136 for (level++; level < cur->bc_nlevels; level++) { 2137 #ifdef DEBUG 2138 int error; 2139 #endif 2140 block = xfs_btree_get_block(cur, level, &bp); 2141 trace_xfs_btree_updkeys(cur, level, bp); 2142 #ifdef DEBUG 2143 error = xfs_btree_check_block(cur, block, level, bp); 2144 if (error) { 2145 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2146 return error; 2147 } 2148 #endif 2149 ptr = cur->bc_ptrs[level]; 2150 nlkey = xfs_btree_key_addr(cur, ptr, block); 2151 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2152 if (!force_all && 2153 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2154 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2155 break; 2156 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2157 xfs_btree_log_keys(cur, bp, ptr, ptr); 2158 if (level + 1 >= cur->bc_nlevels) 2159 break; 2160 xfs_btree_get_node_keys(cur, block, lkey); 2161 } 2162 2163 return 0; 2164 } 2165 2166 /* Update all the keys from some level in cursor back to the root. */ 2167 STATIC int 2168 xfs_btree_updkeys_force( 2169 struct xfs_btree_cur *cur, 2170 int level) 2171 { 2172 struct xfs_buf *bp; 2173 struct xfs_btree_block *block; 2174 2175 block = xfs_btree_get_block(cur, level, &bp); 2176 return __xfs_btree_updkeys(cur, level, block, bp, true); 2177 } 2178 2179 /* 2180 * Update the parent keys of the given level, progressing towards the root. 2181 */ 2182 STATIC int 2183 xfs_btree_update_keys( 2184 struct xfs_btree_cur *cur, 2185 int level) 2186 { 2187 struct xfs_btree_block *block; 2188 struct xfs_buf *bp; 2189 union xfs_btree_key *kp; 2190 union xfs_btree_key key; 2191 int ptr; 2192 2193 ASSERT(level >= 0); 2194 2195 block = xfs_btree_get_block(cur, level, &bp); 2196 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2197 return __xfs_btree_updkeys(cur, level, block, bp, false); 2198 2199 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2200 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 2201 2202 /* 2203 * Go up the tree from this level toward the root. 2204 * At each level, update the key value to the value input. 2205 * Stop when we reach a level where the cursor isn't pointing 2206 * at the first entry in the block. 2207 */ 2208 xfs_btree_get_keys(cur, block, &key); 2209 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2210 #ifdef DEBUG 2211 int error; 2212 #endif 2213 block = xfs_btree_get_block(cur, level, &bp); 2214 #ifdef DEBUG 2215 error = xfs_btree_check_block(cur, block, level, bp); 2216 if (error) { 2217 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2218 return error; 2219 } 2220 #endif 2221 ptr = cur->bc_ptrs[level]; 2222 kp = xfs_btree_key_addr(cur, ptr, block); 2223 xfs_btree_copy_keys(cur, kp, &key, 1); 2224 xfs_btree_log_keys(cur, bp, ptr, ptr); 2225 } 2226 2227 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2228 return 0; 2229 } 2230 2231 /* 2232 * Update the record referred to by cur to the value in the 2233 * given record. This either works (return 0) or gets an 2234 * EFSCORRUPTED error. 2235 */ 2236 int 2237 xfs_btree_update( 2238 struct xfs_btree_cur *cur, 2239 union xfs_btree_rec *rec) 2240 { 2241 struct xfs_btree_block *block; 2242 struct xfs_buf *bp; 2243 int error; 2244 int ptr; 2245 union xfs_btree_rec *rp; 2246 2247 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2248 XFS_BTREE_TRACE_ARGR(cur, rec); 2249 2250 /* Pick up the current block. */ 2251 block = xfs_btree_get_block(cur, 0, &bp); 2252 2253 #ifdef DEBUG 2254 error = xfs_btree_check_block(cur, block, 0, bp); 2255 if (error) 2256 goto error0; 2257 #endif 2258 /* Get the address of the rec to be updated. */ 2259 ptr = cur->bc_ptrs[0]; 2260 rp = xfs_btree_rec_addr(cur, ptr, block); 2261 2262 /* Fill in the new contents and log them. */ 2263 xfs_btree_copy_recs(cur, rp, rec, 1); 2264 xfs_btree_log_recs(cur, bp, ptr, ptr); 2265 2266 /* 2267 * If we are tracking the last record in the tree and 2268 * we are at the far right edge of the tree, update it. 2269 */ 2270 if (xfs_btree_is_lastrec(cur, block, 0)) { 2271 cur->bc_ops->update_lastrec(cur, block, rec, 2272 ptr, LASTREC_UPDATE); 2273 } 2274 2275 /* Pass new key value up to our parent. */ 2276 if (xfs_btree_needs_key_update(cur, ptr)) { 2277 error = xfs_btree_update_keys(cur, 0); 2278 if (error) 2279 goto error0; 2280 } 2281 2282 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2283 return 0; 2284 2285 error0: 2286 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2287 return error; 2288 } 2289 2290 /* 2291 * Move 1 record left from cur/level if possible. 2292 * Update cur to reflect the new path. 2293 */ 2294 STATIC int /* error */ 2295 xfs_btree_lshift( 2296 struct xfs_btree_cur *cur, 2297 int level, 2298 int *stat) /* success/failure */ 2299 { 2300 struct xfs_buf *lbp; /* left buffer pointer */ 2301 struct xfs_btree_block *left; /* left btree block */ 2302 int lrecs; /* left record count */ 2303 struct xfs_buf *rbp; /* right buffer pointer */ 2304 struct xfs_btree_block *right; /* right btree block */ 2305 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2306 int rrecs; /* right record count */ 2307 union xfs_btree_ptr lptr; /* left btree pointer */ 2308 union xfs_btree_key *rkp = NULL; /* right btree key */ 2309 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2310 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2311 int error; /* error return value */ 2312 int i; 2313 2314 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2315 XFS_BTREE_TRACE_ARGI(cur, level); 2316 2317 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2318 level == cur->bc_nlevels - 1) 2319 goto out0; 2320 2321 /* Set up variables for this block as "right". */ 2322 right = xfs_btree_get_block(cur, level, &rbp); 2323 2324 #ifdef DEBUG 2325 error = xfs_btree_check_block(cur, right, level, rbp); 2326 if (error) 2327 goto error0; 2328 #endif 2329 2330 /* If we've got no left sibling then we can't shift an entry left. */ 2331 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2332 if (xfs_btree_ptr_is_null(cur, &lptr)) 2333 goto out0; 2334 2335 /* 2336 * If the cursor entry is the one that would be moved, don't 2337 * do it... it's too complicated. 2338 */ 2339 if (cur->bc_ptrs[level] <= 1) 2340 goto out0; 2341 2342 /* Set up the left neighbor as "left". */ 2343 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2344 if (error) 2345 goto error0; 2346 2347 /* If it's full, it can't take another entry. */ 2348 lrecs = xfs_btree_get_numrecs(left); 2349 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2350 goto out0; 2351 2352 rrecs = xfs_btree_get_numrecs(right); 2353 2354 /* 2355 * We add one entry to the left side and remove one for the right side. 2356 * Account for it here, the changes will be updated on disk and logged 2357 * later. 2358 */ 2359 lrecs++; 2360 rrecs--; 2361 2362 XFS_BTREE_STATS_INC(cur, lshift); 2363 XFS_BTREE_STATS_ADD(cur, moves, 1); 2364 2365 /* 2366 * If non-leaf, copy a key and a ptr to the left block. 2367 * Log the changes to the left block. 2368 */ 2369 if (level > 0) { 2370 /* It's a non-leaf. Move keys and pointers. */ 2371 union xfs_btree_key *lkp; /* left btree key */ 2372 union xfs_btree_ptr *lpp; /* left address pointer */ 2373 2374 lkp = xfs_btree_key_addr(cur, lrecs, left); 2375 rkp = xfs_btree_key_addr(cur, 1, right); 2376 2377 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2378 rpp = xfs_btree_ptr_addr(cur, 1, right); 2379 #ifdef DEBUG 2380 error = xfs_btree_check_ptr(cur, rpp, 0, level); 2381 if (error) 2382 goto error0; 2383 #endif 2384 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2385 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2386 2387 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2388 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2389 2390 ASSERT(cur->bc_ops->keys_inorder(cur, 2391 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2392 } else { 2393 /* It's a leaf. Move records. */ 2394 union xfs_btree_rec *lrp; /* left record pointer */ 2395 2396 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2397 rrp = xfs_btree_rec_addr(cur, 1, right); 2398 2399 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2400 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2401 2402 ASSERT(cur->bc_ops->recs_inorder(cur, 2403 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2404 } 2405 2406 xfs_btree_set_numrecs(left, lrecs); 2407 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2408 2409 xfs_btree_set_numrecs(right, rrecs); 2410 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2411 2412 /* 2413 * Slide the contents of right down one entry. 2414 */ 2415 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2416 if (level > 0) { 2417 /* It's a nonleaf. operate on keys and ptrs */ 2418 #ifdef DEBUG 2419 int i; /* loop index */ 2420 2421 for (i = 0; i < rrecs; i++) { 2422 error = xfs_btree_check_ptr(cur, rpp, i + 1, level); 2423 if (error) 2424 goto error0; 2425 } 2426 #endif 2427 xfs_btree_shift_keys(cur, 2428 xfs_btree_key_addr(cur, 2, right), 2429 -1, rrecs); 2430 xfs_btree_shift_ptrs(cur, 2431 xfs_btree_ptr_addr(cur, 2, right), 2432 -1, rrecs); 2433 2434 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2435 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2436 } else { 2437 /* It's a leaf. operate on records */ 2438 xfs_btree_shift_recs(cur, 2439 xfs_btree_rec_addr(cur, 2, right), 2440 -1, rrecs); 2441 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2442 } 2443 2444 /* 2445 * Using a temporary cursor, update the parent key values of the 2446 * block on the left. 2447 */ 2448 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2449 error = xfs_btree_dup_cursor(cur, &tcur); 2450 if (error) 2451 goto error0; 2452 i = xfs_btree_firstrec(tcur, level); 2453 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2454 2455 error = xfs_btree_decrement(tcur, level, &i); 2456 if (error) 2457 goto error1; 2458 2459 /* Update the parent high keys of the left block, if needed. */ 2460 error = xfs_btree_update_keys(tcur, level); 2461 if (error) 2462 goto error1; 2463 2464 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2465 } 2466 2467 /* Update the parent keys of the right block. */ 2468 error = xfs_btree_update_keys(cur, level); 2469 if (error) 2470 goto error0; 2471 2472 /* Slide the cursor value left one. */ 2473 cur->bc_ptrs[level]--; 2474 2475 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2476 *stat = 1; 2477 return 0; 2478 2479 out0: 2480 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2481 *stat = 0; 2482 return 0; 2483 2484 error0: 2485 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2486 return error; 2487 2488 error1: 2489 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); 2490 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2491 return error; 2492 } 2493 2494 /* 2495 * Move 1 record right from cur/level if possible. 2496 * Update cur to reflect the new path. 2497 */ 2498 STATIC int /* error */ 2499 xfs_btree_rshift( 2500 struct xfs_btree_cur *cur, 2501 int level, 2502 int *stat) /* success/failure */ 2503 { 2504 struct xfs_buf *lbp; /* left buffer pointer */ 2505 struct xfs_btree_block *left; /* left btree block */ 2506 struct xfs_buf *rbp; /* right buffer pointer */ 2507 struct xfs_btree_block *right; /* right btree block */ 2508 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2509 union xfs_btree_ptr rptr; /* right block pointer */ 2510 union xfs_btree_key *rkp; /* right btree key */ 2511 int rrecs; /* right record count */ 2512 int lrecs; /* left record count */ 2513 int error; /* error return value */ 2514 int i; /* loop counter */ 2515 2516 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2517 XFS_BTREE_TRACE_ARGI(cur, level); 2518 2519 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2520 (level == cur->bc_nlevels - 1)) 2521 goto out0; 2522 2523 /* Set up variables for this block as "left". */ 2524 left = xfs_btree_get_block(cur, level, &lbp); 2525 2526 #ifdef DEBUG 2527 error = xfs_btree_check_block(cur, left, level, lbp); 2528 if (error) 2529 goto error0; 2530 #endif 2531 2532 /* If we've got no right sibling then we can't shift an entry right. */ 2533 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2534 if (xfs_btree_ptr_is_null(cur, &rptr)) 2535 goto out0; 2536 2537 /* 2538 * If the cursor entry is the one that would be moved, don't 2539 * do it... it's too complicated. 2540 */ 2541 lrecs = xfs_btree_get_numrecs(left); 2542 if (cur->bc_ptrs[level] >= lrecs) 2543 goto out0; 2544 2545 /* Set up the right neighbor as "right". */ 2546 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2547 if (error) 2548 goto error0; 2549 2550 /* If it's full, it can't take another entry. */ 2551 rrecs = xfs_btree_get_numrecs(right); 2552 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2553 goto out0; 2554 2555 XFS_BTREE_STATS_INC(cur, rshift); 2556 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2557 2558 /* 2559 * Make a hole at the start of the right neighbor block, then 2560 * copy the last left block entry to the hole. 2561 */ 2562 if (level > 0) { 2563 /* It's a nonleaf. make a hole in the keys and ptrs */ 2564 union xfs_btree_key *lkp; 2565 union xfs_btree_ptr *lpp; 2566 union xfs_btree_ptr *rpp; 2567 2568 lkp = xfs_btree_key_addr(cur, lrecs, left); 2569 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2570 rkp = xfs_btree_key_addr(cur, 1, right); 2571 rpp = xfs_btree_ptr_addr(cur, 1, right); 2572 2573 #ifdef DEBUG 2574 for (i = rrecs - 1; i >= 0; i--) { 2575 error = xfs_btree_check_ptr(cur, rpp, i, level); 2576 if (error) 2577 goto error0; 2578 } 2579 #endif 2580 2581 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2582 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2583 2584 #ifdef DEBUG 2585 error = xfs_btree_check_ptr(cur, lpp, 0, level); 2586 if (error) 2587 goto error0; 2588 #endif 2589 2590 /* Now put the new data in, and log it. */ 2591 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2592 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2593 2594 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2595 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2596 2597 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2598 xfs_btree_key_addr(cur, 2, right))); 2599 } else { 2600 /* It's a leaf. make a hole in the records */ 2601 union xfs_btree_rec *lrp; 2602 union xfs_btree_rec *rrp; 2603 2604 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2605 rrp = xfs_btree_rec_addr(cur, 1, right); 2606 2607 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2608 2609 /* Now put the new data in, and log it. */ 2610 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2611 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2612 } 2613 2614 /* 2615 * Decrement and log left's numrecs, bump and log right's numrecs. 2616 */ 2617 xfs_btree_set_numrecs(left, --lrecs); 2618 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2619 2620 xfs_btree_set_numrecs(right, ++rrecs); 2621 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2622 2623 /* 2624 * Using a temporary cursor, update the parent key values of the 2625 * block on the right. 2626 */ 2627 error = xfs_btree_dup_cursor(cur, &tcur); 2628 if (error) 2629 goto error0; 2630 i = xfs_btree_lastrec(tcur, level); 2631 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2632 2633 error = xfs_btree_increment(tcur, level, &i); 2634 if (error) 2635 goto error1; 2636 2637 /* Update the parent high keys of the left block, if needed. */ 2638 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2639 error = xfs_btree_update_keys(cur, level); 2640 if (error) 2641 goto error1; 2642 } 2643 2644 /* Update the parent keys of the right block. */ 2645 error = xfs_btree_update_keys(tcur, level); 2646 if (error) 2647 goto error1; 2648 2649 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2650 2651 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2652 *stat = 1; 2653 return 0; 2654 2655 out0: 2656 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2657 *stat = 0; 2658 return 0; 2659 2660 error0: 2661 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2662 return error; 2663 2664 error1: 2665 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); 2666 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2667 return error; 2668 } 2669 2670 /* 2671 * Split cur/level block in half. 2672 * Return new block number and the key to its first 2673 * record (to be inserted into parent). 2674 */ 2675 STATIC int /* error */ 2676 __xfs_btree_split( 2677 struct xfs_btree_cur *cur, 2678 int level, 2679 union xfs_btree_ptr *ptrp, 2680 union xfs_btree_key *key, 2681 struct xfs_btree_cur **curp, 2682 int *stat) /* success/failure */ 2683 { 2684 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2685 struct xfs_buf *lbp; /* left buffer pointer */ 2686 struct xfs_btree_block *left; /* left btree block */ 2687 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2688 struct xfs_buf *rbp; /* right buffer pointer */ 2689 struct xfs_btree_block *right; /* right btree block */ 2690 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2691 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2692 struct xfs_btree_block *rrblock; /* right-right btree block */ 2693 int lrecs; 2694 int rrecs; 2695 int src_index; 2696 int error; /* error return value */ 2697 #ifdef DEBUG 2698 int i; 2699 #endif 2700 2701 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2702 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); 2703 2704 XFS_BTREE_STATS_INC(cur, split); 2705 2706 /* Set up left block (current one). */ 2707 left = xfs_btree_get_block(cur, level, &lbp); 2708 2709 #ifdef DEBUG 2710 error = xfs_btree_check_block(cur, left, level, lbp); 2711 if (error) 2712 goto error0; 2713 #endif 2714 2715 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2716 2717 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2718 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2719 if (error) 2720 goto error0; 2721 if (*stat == 0) 2722 goto out0; 2723 XFS_BTREE_STATS_INC(cur, alloc); 2724 2725 /* Set up the new block as "right". */ 2726 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2727 if (error) 2728 goto error0; 2729 2730 /* Fill in the btree header for the new right block. */ 2731 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2732 2733 /* 2734 * Split the entries between the old and the new block evenly. 2735 * Make sure that if there's an odd number of entries now, that 2736 * each new block will have the same number of entries. 2737 */ 2738 lrecs = xfs_btree_get_numrecs(left); 2739 rrecs = lrecs / 2; 2740 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2741 rrecs++; 2742 src_index = (lrecs - rrecs + 1); 2743 2744 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2745 2746 /* Adjust numrecs for the later get_*_keys() calls. */ 2747 lrecs -= rrecs; 2748 xfs_btree_set_numrecs(left, lrecs); 2749 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2750 2751 /* 2752 * Copy btree block entries from the left block over to the 2753 * new block, the right. Update the right block and log the 2754 * changes. 2755 */ 2756 if (level > 0) { 2757 /* It's a non-leaf. Move keys and pointers. */ 2758 union xfs_btree_key *lkp; /* left btree key */ 2759 union xfs_btree_ptr *lpp; /* left address pointer */ 2760 union xfs_btree_key *rkp; /* right btree key */ 2761 union xfs_btree_ptr *rpp; /* right address pointer */ 2762 2763 lkp = xfs_btree_key_addr(cur, src_index, left); 2764 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2765 rkp = xfs_btree_key_addr(cur, 1, right); 2766 rpp = xfs_btree_ptr_addr(cur, 1, right); 2767 2768 #ifdef DEBUG 2769 for (i = src_index; i < rrecs; i++) { 2770 error = xfs_btree_check_ptr(cur, lpp, i, level); 2771 if (error) 2772 goto error0; 2773 } 2774 #endif 2775 2776 /* Copy the keys & pointers to the new block. */ 2777 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2778 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2779 2780 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2781 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2782 2783 /* Stash the keys of the new block for later insertion. */ 2784 xfs_btree_get_node_keys(cur, right, key); 2785 } else { 2786 /* It's a leaf. Move records. */ 2787 union xfs_btree_rec *lrp; /* left record pointer */ 2788 union xfs_btree_rec *rrp; /* right record pointer */ 2789 2790 lrp = xfs_btree_rec_addr(cur, src_index, left); 2791 rrp = xfs_btree_rec_addr(cur, 1, right); 2792 2793 /* Copy records to the new block. */ 2794 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2795 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2796 2797 /* Stash the keys of the new block for later insertion. */ 2798 xfs_btree_get_leaf_keys(cur, right, key); 2799 } 2800 2801 /* 2802 * Find the left block number by looking in the buffer. 2803 * Adjust sibling pointers. 2804 */ 2805 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2806 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2807 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2808 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2809 2810 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2811 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2812 2813 /* 2814 * If there's a block to the new block's right, make that block 2815 * point back to right instead of to left. 2816 */ 2817 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2818 error = xfs_btree_read_buf_block(cur, &rrptr, 2819 0, &rrblock, &rrbp); 2820 if (error) 2821 goto error0; 2822 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2823 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2824 } 2825 2826 /* Update the parent high keys of the left block, if needed. */ 2827 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2828 error = xfs_btree_update_keys(cur, level); 2829 if (error) 2830 goto error0; 2831 } 2832 2833 /* 2834 * If the cursor is really in the right block, move it there. 2835 * If it's just pointing past the last entry in left, then we'll 2836 * insert there, so don't change anything in that case. 2837 */ 2838 if (cur->bc_ptrs[level] > lrecs + 1) { 2839 xfs_btree_setbuf(cur, level, rbp); 2840 cur->bc_ptrs[level] -= lrecs; 2841 } 2842 /* 2843 * If there are more levels, we'll need another cursor which refers 2844 * the right block, no matter where this cursor was. 2845 */ 2846 if (level + 1 < cur->bc_nlevels) { 2847 error = xfs_btree_dup_cursor(cur, curp); 2848 if (error) 2849 goto error0; 2850 (*curp)->bc_ptrs[level + 1]++; 2851 } 2852 *ptrp = rptr; 2853 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2854 *stat = 1; 2855 return 0; 2856 out0: 2857 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2858 *stat = 0; 2859 return 0; 2860 2861 error0: 2862 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2863 return error; 2864 } 2865 2866 struct xfs_btree_split_args { 2867 struct xfs_btree_cur *cur; 2868 int level; 2869 union xfs_btree_ptr *ptrp; 2870 union xfs_btree_key *key; 2871 struct xfs_btree_cur **curp; 2872 int *stat; /* success/failure */ 2873 int result; 2874 bool kswapd; /* allocation in kswapd context */ 2875 struct completion *done; 2876 struct work_struct work; 2877 }; 2878 2879 /* 2880 * Stack switching interfaces for allocation 2881 */ 2882 static void 2883 xfs_btree_split_worker( 2884 struct work_struct *work) 2885 { 2886 struct xfs_btree_split_args *args = container_of(work, 2887 struct xfs_btree_split_args, work); 2888 unsigned long pflags; 2889 unsigned long new_pflags = PF_FSTRANS; 2890 2891 /* 2892 * we are in a transaction context here, but may also be doing work 2893 * in kswapd context, and hence we may need to inherit that state 2894 * temporarily to ensure that we don't block waiting for memory reclaim 2895 * in any way. 2896 */ 2897 if (args->kswapd) 2898 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2899 2900 current_set_flags_nested(&pflags, new_pflags); 2901 2902 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2903 args->key, args->curp, args->stat); 2904 complete(args->done); 2905 2906 current_restore_flags_nested(&pflags, new_pflags); 2907 } 2908 2909 /* 2910 * BMBT split requests often come in with little stack to work on. Push 2911 * them off to a worker thread so there is lots of stack to use. For the other 2912 * btree types, just call directly to avoid the context switch overhead here. 2913 */ 2914 STATIC int /* error */ 2915 xfs_btree_split( 2916 struct xfs_btree_cur *cur, 2917 int level, 2918 union xfs_btree_ptr *ptrp, 2919 union xfs_btree_key *key, 2920 struct xfs_btree_cur **curp, 2921 int *stat) /* success/failure */ 2922 { 2923 struct xfs_btree_split_args args; 2924 DECLARE_COMPLETION_ONSTACK(done); 2925 2926 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2927 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2928 2929 args.cur = cur; 2930 args.level = level; 2931 args.ptrp = ptrp; 2932 args.key = key; 2933 args.curp = curp; 2934 args.stat = stat; 2935 args.done = &done; 2936 args.kswapd = current_is_kswapd(); 2937 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2938 queue_work(xfs_alloc_wq, &args.work); 2939 wait_for_completion(&done); 2940 destroy_work_on_stack(&args.work); 2941 return args.result; 2942 } 2943 2944 2945 /* 2946 * Copy the old inode root contents into a real block and make the 2947 * broot point to it. 2948 */ 2949 int /* error */ 2950 xfs_btree_new_iroot( 2951 struct xfs_btree_cur *cur, /* btree cursor */ 2952 int *logflags, /* logging flags for inode */ 2953 int *stat) /* return status - 0 fail */ 2954 { 2955 struct xfs_buf *cbp; /* buffer for cblock */ 2956 struct xfs_btree_block *block; /* btree block */ 2957 struct xfs_btree_block *cblock; /* child btree block */ 2958 union xfs_btree_key *ckp; /* child key pointer */ 2959 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2960 union xfs_btree_key *kp; /* pointer to btree key */ 2961 union xfs_btree_ptr *pp; /* pointer to block addr */ 2962 union xfs_btree_ptr nptr; /* new block addr */ 2963 int level; /* btree level */ 2964 int error; /* error return code */ 2965 #ifdef DEBUG 2966 int i; /* loop counter */ 2967 #endif 2968 2969 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2970 XFS_BTREE_STATS_INC(cur, newroot); 2971 2972 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2973 2974 level = cur->bc_nlevels - 1; 2975 2976 block = xfs_btree_get_iroot(cur); 2977 pp = xfs_btree_ptr_addr(cur, 1, block); 2978 2979 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2980 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2981 if (error) 2982 goto error0; 2983 if (*stat == 0) { 2984 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2985 return 0; 2986 } 2987 XFS_BTREE_STATS_INC(cur, alloc); 2988 2989 /* Copy the root into a real block. */ 2990 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2991 if (error) 2992 goto error0; 2993 2994 /* 2995 * we can't just memcpy() the root in for CRC enabled btree blocks. 2996 * In that case have to also ensure the blkno remains correct 2997 */ 2998 memcpy(cblock, block, xfs_btree_block_len(cur)); 2999 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 3000 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 3001 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 3002 else 3003 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 3004 } 3005 3006 be16_add_cpu(&block->bb_level, 1); 3007 xfs_btree_set_numrecs(block, 1); 3008 cur->bc_nlevels++; 3009 cur->bc_ptrs[level + 1] = 1; 3010 3011 kp = xfs_btree_key_addr(cur, 1, block); 3012 ckp = xfs_btree_key_addr(cur, 1, cblock); 3013 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 3014 3015 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3016 #ifdef DEBUG 3017 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 3018 error = xfs_btree_check_ptr(cur, pp, i, level); 3019 if (error) 3020 goto error0; 3021 } 3022 #endif 3023 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 3024 3025 #ifdef DEBUG 3026 error = xfs_btree_check_ptr(cur, &nptr, 0, level); 3027 if (error) 3028 goto error0; 3029 #endif 3030 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 3031 3032 xfs_iroot_realloc(cur->bc_private.b.ip, 3033 1 - xfs_btree_get_numrecs(cblock), 3034 cur->bc_private.b.whichfork); 3035 3036 xfs_btree_setbuf(cur, level, cbp); 3037 3038 /* 3039 * Do all this logging at the end so that 3040 * the root is at the right level. 3041 */ 3042 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3043 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3044 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3045 3046 *logflags |= 3047 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 3048 *stat = 1; 3049 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3050 return 0; 3051 error0: 3052 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3053 return error; 3054 } 3055 3056 /* 3057 * Allocate a new root block, fill it in. 3058 */ 3059 STATIC int /* error */ 3060 xfs_btree_new_root( 3061 struct xfs_btree_cur *cur, /* btree cursor */ 3062 int *stat) /* success/failure */ 3063 { 3064 struct xfs_btree_block *block; /* one half of the old root block */ 3065 struct xfs_buf *bp; /* buffer containing block */ 3066 int error; /* error return value */ 3067 struct xfs_buf *lbp; /* left buffer pointer */ 3068 struct xfs_btree_block *left; /* left btree block */ 3069 struct xfs_buf *nbp; /* new (root) buffer */ 3070 struct xfs_btree_block *new; /* new (root) btree block */ 3071 int nptr; /* new value for key index, 1 or 2 */ 3072 struct xfs_buf *rbp; /* right buffer pointer */ 3073 struct xfs_btree_block *right; /* right btree block */ 3074 union xfs_btree_ptr rptr; 3075 union xfs_btree_ptr lptr; 3076 3077 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3078 XFS_BTREE_STATS_INC(cur, newroot); 3079 3080 /* initialise our start point from the cursor */ 3081 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3082 3083 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3084 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3085 if (error) 3086 goto error0; 3087 if (*stat == 0) 3088 goto out0; 3089 XFS_BTREE_STATS_INC(cur, alloc); 3090 3091 /* Set up the new block. */ 3092 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3093 if (error) 3094 goto error0; 3095 3096 /* Set the root in the holding structure increasing the level by 1. */ 3097 cur->bc_ops->set_root(cur, &lptr, 1); 3098 3099 /* 3100 * At the previous root level there are now two blocks: the old root, 3101 * and the new block generated when it was split. We don't know which 3102 * one the cursor is pointing at, so we set up variables "left" and 3103 * "right" for each case. 3104 */ 3105 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3106 3107 #ifdef DEBUG 3108 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3109 if (error) 3110 goto error0; 3111 #endif 3112 3113 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3114 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3115 /* Our block is left, pick up the right block. */ 3116 lbp = bp; 3117 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3118 left = block; 3119 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3120 if (error) 3121 goto error0; 3122 bp = rbp; 3123 nptr = 1; 3124 } else { 3125 /* Our block is right, pick up the left block. */ 3126 rbp = bp; 3127 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3128 right = block; 3129 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3130 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3131 if (error) 3132 goto error0; 3133 bp = lbp; 3134 nptr = 2; 3135 } 3136 3137 /* Fill in the new block's btree header and log it. */ 3138 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3139 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3140 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3141 !xfs_btree_ptr_is_null(cur, &rptr)); 3142 3143 /* Fill in the key data in the new root. */ 3144 if (xfs_btree_get_level(left) > 0) { 3145 /* 3146 * Get the keys for the left block's keys and put them directly 3147 * in the parent block. Do the same for the right block. 3148 */ 3149 xfs_btree_get_node_keys(cur, left, 3150 xfs_btree_key_addr(cur, 1, new)); 3151 xfs_btree_get_node_keys(cur, right, 3152 xfs_btree_key_addr(cur, 2, new)); 3153 } else { 3154 /* 3155 * Get the keys for the left block's records and put them 3156 * directly in the parent block. Do the same for the right 3157 * block. 3158 */ 3159 xfs_btree_get_leaf_keys(cur, left, 3160 xfs_btree_key_addr(cur, 1, new)); 3161 xfs_btree_get_leaf_keys(cur, right, 3162 xfs_btree_key_addr(cur, 2, new)); 3163 } 3164 xfs_btree_log_keys(cur, nbp, 1, 2); 3165 3166 /* Fill in the pointer data in the new root. */ 3167 xfs_btree_copy_ptrs(cur, 3168 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3169 xfs_btree_copy_ptrs(cur, 3170 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3171 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3172 3173 /* Fix up the cursor. */ 3174 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3175 cur->bc_ptrs[cur->bc_nlevels] = nptr; 3176 cur->bc_nlevels++; 3177 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3178 *stat = 1; 3179 return 0; 3180 error0: 3181 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3182 return error; 3183 out0: 3184 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3185 *stat = 0; 3186 return 0; 3187 } 3188 3189 STATIC int 3190 xfs_btree_make_block_unfull( 3191 struct xfs_btree_cur *cur, /* btree cursor */ 3192 int level, /* btree level */ 3193 int numrecs,/* # of recs in block */ 3194 int *oindex,/* old tree index */ 3195 int *index, /* new tree index */ 3196 union xfs_btree_ptr *nptr, /* new btree ptr */ 3197 struct xfs_btree_cur **ncur, /* new btree cursor */ 3198 union xfs_btree_key *key, /* key of new block */ 3199 int *stat) 3200 { 3201 int error = 0; 3202 3203 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3204 level == cur->bc_nlevels - 1) { 3205 struct xfs_inode *ip = cur->bc_private.b.ip; 3206 3207 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3208 /* A root block that can be made bigger. */ 3209 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3210 *stat = 1; 3211 } else { 3212 /* A root block that needs replacing */ 3213 int logflags = 0; 3214 3215 error = xfs_btree_new_iroot(cur, &logflags, stat); 3216 if (error || *stat == 0) 3217 return error; 3218 3219 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3220 } 3221 3222 return 0; 3223 } 3224 3225 /* First, try shifting an entry to the right neighbor. */ 3226 error = xfs_btree_rshift(cur, level, stat); 3227 if (error || *stat) 3228 return error; 3229 3230 /* Next, try shifting an entry to the left neighbor. */ 3231 error = xfs_btree_lshift(cur, level, stat); 3232 if (error) 3233 return error; 3234 3235 if (*stat) { 3236 *oindex = *index = cur->bc_ptrs[level]; 3237 return 0; 3238 } 3239 3240 /* 3241 * Next, try splitting the current block in half. 3242 * 3243 * If this works we have to re-set our variables because we 3244 * could be in a different block now. 3245 */ 3246 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3247 if (error || *stat == 0) 3248 return error; 3249 3250 3251 *index = cur->bc_ptrs[level]; 3252 return 0; 3253 } 3254 3255 /* 3256 * Insert one record/level. Return information to the caller 3257 * allowing the next level up to proceed if necessary. 3258 */ 3259 STATIC int 3260 xfs_btree_insrec( 3261 struct xfs_btree_cur *cur, /* btree cursor */ 3262 int level, /* level to insert record at */ 3263 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3264 union xfs_btree_rec *rec, /* record to insert */ 3265 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3266 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3267 int *stat) /* success/failure */ 3268 { 3269 struct xfs_btree_block *block; /* btree block */ 3270 struct xfs_buf *bp; /* buffer for block */ 3271 union xfs_btree_ptr nptr; /* new block ptr */ 3272 struct xfs_btree_cur *ncur; /* new btree cursor */ 3273 union xfs_btree_key nkey; /* new block key */ 3274 union xfs_btree_key *lkey; 3275 int optr; /* old key/record index */ 3276 int ptr; /* key/record index */ 3277 int numrecs;/* number of records */ 3278 int error; /* error return value */ 3279 #ifdef DEBUG 3280 int i; 3281 #endif 3282 xfs_daddr_t old_bn; 3283 3284 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3285 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); 3286 3287 ncur = NULL; 3288 lkey = &nkey; 3289 3290 /* 3291 * If we have an external root pointer, and we've made it to the 3292 * root level, allocate a new root block and we're done. 3293 */ 3294 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3295 (level >= cur->bc_nlevels)) { 3296 error = xfs_btree_new_root(cur, stat); 3297 xfs_btree_set_ptr_null(cur, ptrp); 3298 3299 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3300 return error; 3301 } 3302 3303 /* If we're off the left edge, return failure. */ 3304 ptr = cur->bc_ptrs[level]; 3305 if (ptr == 0) { 3306 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3307 *stat = 0; 3308 return 0; 3309 } 3310 3311 optr = ptr; 3312 3313 XFS_BTREE_STATS_INC(cur, insrec); 3314 3315 /* Get pointers to the btree buffer and block. */ 3316 block = xfs_btree_get_block(cur, level, &bp); 3317 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3318 numrecs = xfs_btree_get_numrecs(block); 3319 3320 #ifdef DEBUG 3321 error = xfs_btree_check_block(cur, block, level, bp); 3322 if (error) 3323 goto error0; 3324 3325 /* Check that the new entry is being inserted in the right place. */ 3326 if (ptr <= numrecs) { 3327 if (level == 0) { 3328 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3329 xfs_btree_rec_addr(cur, ptr, block))); 3330 } else { 3331 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3332 xfs_btree_key_addr(cur, ptr, block))); 3333 } 3334 } 3335 #endif 3336 3337 /* 3338 * If the block is full, we can't insert the new entry until we 3339 * make the block un-full. 3340 */ 3341 xfs_btree_set_ptr_null(cur, &nptr); 3342 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3343 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3344 &optr, &ptr, &nptr, &ncur, lkey, stat); 3345 if (error || *stat == 0) 3346 goto error0; 3347 } 3348 3349 /* 3350 * The current block may have changed if the block was 3351 * previously full and we have just made space in it. 3352 */ 3353 block = xfs_btree_get_block(cur, level, &bp); 3354 numrecs = xfs_btree_get_numrecs(block); 3355 3356 #ifdef DEBUG 3357 error = xfs_btree_check_block(cur, block, level, bp); 3358 if (error) 3359 return error; 3360 #endif 3361 3362 /* 3363 * At this point we know there's room for our new entry in the block 3364 * we're pointing at. 3365 */ 3366 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3367 3368 if (level > 0) { 3369 /* It's a nonleaf. make a hole in the keys and ptrs */ 3370 union xfs_btree_key *kp; 3371 union xfs_btree_ptr *pp; 3372 3373 kp = xfs_btree_key_addr(cur, ptr, block); 3374 pp = xfs_btree_ptr_addr(cur, ptr, block); 3375 3376 #ifdef DEBUG 3377 for (i = numrecs - ptr; i >= 0; i--) { 3378 error = xfs_btree_check_ptr(cur, pp, i, level); 3379 if (error) 3380 return error; 3381 } 3382 #endif 3383 3384 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3385 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3386 3387 #ifdef DEBUG 3388 error = xfs_btree_check_ptr(cur, ptrp, 0, level); 3389 if (error) 3390 goto error0; 3391 #endif 3392 3393 /* Now put the new data in, bump numrecs and log it. */ 3394 xfs_btree_copy_keys(cur, kp, key, 1); 3395 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3396 numrecs++; 3397 xfs_btree_set_numrecs(block, numrecs); 3398 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3399 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3400 #ifdef DEBUG 3401 if (ptr < numrecs) { 3402 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3403 xfs_btree_key_addr(cur, ptr + 1, block))); 3404 } 3405 #endif 3406 } else { 3407 /* It's a leaf. make a hole in the records */ 3408 union xfs_btree_rec *rp; 3409 3410 rp = xfs_btree_rec_addr(cur, ptr, block); 3411 3412 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3413 3414 /* Now put the new data in, bump numrecs and log it. */ 3415 xfs_btree_copy_recs(cur, rp, rec, 1); 3416 xfs_btree_set_numrecs(block, ++numrecs); 3417 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3418 #ifdef DEBUG 3419 if (ptr < numrecs) { 3420 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3421 xfs_btree_rec_addr(cur, ptr + 1, block))); 3422 } 3423 #endif 3424 } 3425 3426 /* Log the new number of records in the btree header. */ 3427 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3428 3429 /* 3430 * If we just inserted into a new tree block, we have to 3431 * recalculate nkey here because nkey is out of date. 3432 * 3433 * Otherwise we're just updating an existing block (having shoved 3434 * some records into the new tree block), so use the regular key 3435 * update mechanism. 3436 */ 3437 if (bp && bp->b_bn != old_bn) { 3438 xfs_btree_get_keys(cur, block, lkey); 3439 } else if (xfs_btree_needs_key_update(cur, optr)) { 3440 error = xfs_btree_update_keys(cur, level); 3441 if (error) 3442 goto error0; 3443 } 3444 3445 /* 3446 * If we are tracking the last record in the tree and 3447 * we are at the far right edge of the tree, update it. 3448 */ 3449 if (xfs_btree_is_lastrec(cur, block, level)) { 3450 cur->bc_ops->update_lastrec(cur, block, rec, 3451 ptr, LASTREC_INSREC); 3452 } 3453 3454 /* 3455 * Return the new block number, if any. 3456 * If there is one, give back a record value and a cursor too. 3457 */ 3458 *ptrp = nptr; 3459 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3460 xfs_btree_copy_keys(cur, key, lkey, 1); 3461 *curp = ncur; 3462 } 3463 3464 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3465 *stat = 1; 3466 return 0; 3467 3468 error0: 3469 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3470 return error; 3471 } 3472 3473 /* 3474 * Insert the record at the point referenced by cur. 3475 * 3476 * A multi-level split of the tree on insert will invalidate the original 3477 * cursor. All callers of this function should assume that the cursor is 3478 * no longer valid and revalidate it. 3479 */ 3480 int 3481 xfs_btree_insert( 3482 struct xfs_btree_cur *cur, 3483 int *stat) 3484 { 3485 int error; /* error return value */ 3486 int i; /* result value, 0 for failure */ 3487 int level; /* current level number in btree */ 3488 union xfs_btree_ptr nptr; /* new block number (split result) */ 3489 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3490 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3491 union xfs_btree_key bkey; /* key of block to insert */ 3492 union xfs_btree_key *key; 3493 union xfs_btree_rec rec; /* record to insert */ 3494 3495 level = 0; 3496 ncur = NULL; 3497 pcur = cur; 3498 key = &bkey; 3499 3500 xfs_btree_set_ptr_null(cur, &nptr); 3501 3502 /* Make a key out of the record data to be inserted, and save it. */ 3503 cur->bc_ops->init_rec_from_cur(cur, &rec); 3504 cur->bc_ops->init_key_from_rec(key, &rec); 3505 3506 /* 3507 * Loop going up the tree, starting at the leaf level. 3508 * Stop when we don't get a split block, that must mean that 3509 * the insert is finished with this level. 3510 */ 3511 do { 3512 /* 3513 * Insert nrec/nptr into this level of the tree. 3514 * Note if we fail, nptr will be null. 3515 */ 3516 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3517 &ncur, &i); 3518 if (error) { 3519 if (pcur != cur) 3520 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3521 goto error0; 3522 } 3523 3524 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3525 level++; 3526 3527 /* 3528 * See if the cursor we just used is trash. 3529 * Can't trash the caller's cursor, but otherwise we should 3530 * if ncur is a new cursor or we're about to be done. 3531 */ 3532 if (pcur != cur && 3533 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3534 /* Save the state from the cursor before we trash it */ 3535 if (cur->bc_ops->update_cursor) 3536 cur->bc_ops->update_cursor(pcur, cur); 3537 cur->bc_nlevels = pcur->bc_nlevels; 3538 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3539 } 3540 /* If we got a new cursor, switch to it. */ 3541 if (ncur) { 3542 pcur = ncur; 3543 ncur = NULL; 3544 } 3545 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3546 3547 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3548 *stat = i; 3549 return 0; 3550 error0: 3551 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3552 return error; 3553 } 3554 3555 /* 3556 * Try to merge a non-leaf block back into the inode root. 3557 * 3558 * Note: the killroot names comes from the fact that we're effectively 3559 * killing the old root block. But because we can't just delete the 3560 * inode we have to copy the single block it was pointing to into the 3561 * inode. 3562 */ 3563 STATIC int 3564 xfs_btree_kill_iroot( 3565 struct xfs_btree_cur *cur) 3566 { 3567 int whichfork = cur->bc_private.b.whichfork; 3568 struct xfs_inode *ip = cur->bc_private.b.ip; 3569 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3570 struct xfs_btree_block *block; 3571 struct xfs_btree_block *cblock; 3572 union xfs_btree_key *kp; 3573 union xfs_btree_key *ckp; 3574 union xfs_btree_ptr *pp; 3575 union xfs_btree_ptr *cpp; 3576 struct xfs_buf *cbp; 3577 int level; 3578 int index; 3579 int numrecs; 3580 int error; 3581 #ifdef DEBUG 3582 union xfs_btree_ptr ptr; 3583 int i; 3584 #endif 3585 3586 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3587 3588 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3589 ASSERT(cur->bc_nlevels > 1); 3590 3591 /* 3592 * Don't deal with the root block needs to be a leaf case. 3593 * We're just going to turn the thing back into extents anyway. 3594 */ 3595 level = cur->bc_nlevels - 1; 3596 if (level == 1) 3597 goto out0; 3598 3599 /* 3600 * Give up if the root has multiple children. 3601 */ 3602 block = xfs_btree_get_iroot(cur); 3603 if (xfs_btree_get_numrecs(block) != 1) 3604 goto out0; 3605 3606 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3607 numrecs = xfs_btree_get_numrecs(cblock); 3608 3609 /* 3610 * Only do this if the next level will fit. 3611 * Then the data must be copied up to the inode, 3612 * instead of freeing the root you free the next level. 3613 */ 3614 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3615 goto out0; 3616 3617 XFS_BTREE_STATS_INC(cur, killroot); 3618 3619 #ifdef DEBUG 3620 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3621 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3622 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3623 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3624 #endif 3625 3626 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3627 if (index) { 3628 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3629 cur->bc_private.b.whichfork); 3630 block = ifp->if_broot; 3631 } 3632 3633 be16_add_cpu(&block->bb_numrecs, index); 3634 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3635 3636 kp = xfs_btree_key_addr(cur, 1, block); 3637 ckp = xfs_btree_key_addr(cur, 1, cblock); 3638 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3639 3640 pp = xfs_btree_ptr_addr(cur, 1, block); 3641 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3642 #ifdef DEBUG 3643 for (i = 0; i < numrecs; i++) { 3644 error = xfs_btree_check_ptr(cur, cpp, i, level - 1); 3645 if (error) { 3646 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3647 return error; 3648 } 3649 } 3650 #endif 3651 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3652 3653 error = xfs_btree_free_block(cur, cbp); 3654 if (error) { 3655 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3656 return error; 3657 } 3658 3659 cur->bc_bufs[level - 1] = NULL; 3660 be16_add_cpu(&block->bb_level, -1); 3661 xfs_trans_log_inode(cur->bc_tp, ip, 3662 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3663 cur->bc_nlevels--; 3664 out0: 3665 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3666 return 0; 3667 } 3668 3669 /* 3670 * Kill the current root node, and replace it with it's only child node. 3671 */ 3672 STATIC int 3673 xfs_btree_kill_root( 3674 struct xfs_btree_cur *cur, 3675 struct xfs_buf *bp, 3676 int level, 3677 union xfs_btree_ptr *newroot) 3678 { 3679 int error; 3680 3681 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3682 XFS_BTREE_STATS_INC(cur, killroot); 3683 3684 /* 3685 * Update the root pointer, decreasing the level by 1 and then 3686 * free the old root. 3687 */ 3688 cur->bc_ops->set_root(cur, newroot, -1); 3689 3690 error = xfs_btree_free_block(cur, bp); 3691 if (error) { 3692 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3693 return error; 3694 } 3695 3696 cur->bc_bufs[level] = NULL; 3697 cur->bc_ra[level] = 0; 3698 cur->bc_nlevels--; 3699 3700 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3701 return 0; 3702 } 3703 3704 STATIC int 3705 xfs_btree_dec_cursor( 3706 struct xfs_btree_cur *cur, 3707 int level, 3708 int *stat) 3709 { 3710 int error; 3711 int i; 3712 3713 if (level > 0) { 3714 error = xfs_btree_decrement(cur, level, &i); 3715 if (error) 3716 return error; 3717 } 3718 3719 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3720 *stat = 1; 3721 return 0; 3722 } 3723 3724 /* 3725 * Single level of the btree record deletion routine. 3726 * Delete record pointed to by cur/level. 3727 * Remove the record from its block then rebalance the tree. 3728 * Return 0 for error, 1 for done, 2 to go on to the next level. 3729 */ 3730 STATIC int /* error */ 3731 xfs_btree_delrec( 3732 struct xfs_btree_cur *cur, /* btree cursor */ 3733 int level, /* level removing record from */ 3734 int *stat) /* fail/done/go-on */ 3735 { 3736 struct xfs_btree_block *block; /* btree block */ 3737 union xfs_btree_ptr cptr; /* current block ptr */ 3738 struct xfs_buf *bp; /* buffer for block */ 3739 int error; /* error return value */ 3740 int i; /* loop counter */ 3741 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3742 struct xfs_buf *lbp; /* left buffer pointer */ 3743 struct xfs_btree_block *left; /* left btree block */ 3744 int lrecs = 0; /* left record count */ 3745 int ptr; /* key/record index */ 3746 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3747 struct xfs_buf *rbp; /* right buffer pointer */ 3748 struct xfs_btree_block *right; /* right btree block */ 3749 struct xfs_btree_block *rrblock; /* right-right btree block */ 3750 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3751 int rrecs = 0; /* right record count */ 3752 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3753 int numrecs; /* temporary numrec count */ 3754 3755 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3756 XFS_BTREE_TRACE_ARGI(cur, level); 3757 3758 tcur = NULL; 3759 3760 /* Get the index of the entry being deleted, check for nothing there. */ 3761 ptr = cur->bc_ptrs[level]; 3762 if (ptr == 0) { 3763 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3764 *stat = 0; 3765 return 0; 3766 } 3767 3768 /* Get the buffer & block containing the record or key/ptr. */ 3769 block = xfs_btree_get_block(cur, level, &bp); 3770 numrecs = xfs_btree_get_numrecs(block); 3771 3772 #ifdef DEBUG 3773 error = xfs_btree_check_block(cur, block, level, bp); 3774 if (error) 3775 goto error0; 3776 #endif 3777 3778 /* Fail if we're off the end of the block. */ 3779 if (ptr > numrecs) { 3780 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3781 *stat = 0; 3782 return 0; 3783 } 3784 3785 XFS_BTREE_STATS_INC(cur, delrec); 3786 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3787 3788 /* Excise the entries being deleted. */ 3789 if (level > 0) { 3790 /* It's a nonleaf. operate on keys and ptrs */ 3791 union xfs_btree_key *lkp; 3792 union xfs_btree_ptr *lpp; 3793 3794 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3795 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3796 3797 #ifdef DEBUG 3798 for (i = 0; i < numrecs - ptr; i++) { 3799 error = xfs_btree_check_ptr(cur, lpp, i, level); 3800 if (error) 3801 goto error0; 3802 } 3803 #endif 3804 3805 if (ptr < numrecs) { 3806 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3807 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3808 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3809 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3810 } 3811 } else { 3812 /* It's a leaf. operate on records */ 3813 if (ptr < numrecs) { 3814 xfs_btree_shift_recs(cur, 3815 xfs_btree_rec_addr(cur, ptr + 1, block), 3816 -1, numrecs - ptr); 3817 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3818 } 3819 } 3820 3821 /* 3822 * Decrement and log the number of entries in the block. 3823 */ 3824 xfs_btree_set_numrecs(block, --numrecs); 3825 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3826 3827 /* 3828 * If we are tracking the last record in the tree and 3829 * we are at the far right edge of the tree, update it. 3830 */ 3831 if (xfs_btree_is_lastrec(cur, block, level)) { 3832 cur->bc_ops->update_lastrec(cur, block, NULL, 3833 ptr, LASTREC_DELREC); 3834 } 3835 3836 /* 3837 * We're at the root level. First, shrink the root block in-memory. 3838 * Try to get rid of the next level down. If we can't then there's 3839 * nothing left to do. 3840 */ 3841 if (level == cur->bc_nlevels - 1) { 3842 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3843 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3844 cur->bc_private.b.whichfork); 3845 3846 error = xfs_btree_kill_iroot(cur); 3847 if (error) 3848 goto error0; 3849 3850 error = xfs_btree_dec_cursor(cur, level, stat); 3851 if (error) 3852 goto error0; 3853 *stat = 1; 3854 return 0; 3855 } 3856 3857 /* 3858 * If this is the root level, and there's only one entry left, 3859 * and it's NOT the leaf level, then we can get rid of this 3860 * level. 3861 */ 3862 if (numrecs == 1 && level > 0) { 3863 union xfs_btree_ptr *pp; 3864 /* 3865 * pp is still set to the first pointer in the block. 3866 * Make it the new root of the btree. 3867 */ 3868 pp = xfs_btree_ptr_addr(cur, 1, block); 3869 error = xfs_btree_kill_root(cur, bp, level, pp); 3870 if (error) 3871 goto error0; 3872 } else if (level > 0) { 3873 error = xfs_btree_dec_cursor(cur, level, stat); 3874 if (error) 3875 goto error0; 3876 } 3877 *stat = 1; 3878 return 0; 3879 } 3880 3881 /* 3882 * If we deleted the leftmost entry in the block, update the 3883 * key values above us in the tree. 3884 */ 3885 if (xfs_btree_needs_key_update(cur, ptr)) { 3886 error = xfs_btree_update_keys(cur, level); 3887 if (error) 3888 goto error0; 3889 } 3890 3891 /* 3892 * If the number of records remaining in the block is at least 3893 * the minimum, we're done. 3894 */ 3895 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3896 error = xfs_btree_dec_cursor(cur, level, stat); 3897 if (error) 3898 goto error0; 3899 return 0; 3900 } 3901 3902 /* 3903 * Otherwise, we have to move some records around to keep the 3904 * tree balanced. Look at the left and right sibling blocks to 3905 * see if we can re-balance by moving only one record. 3906 */ 3907 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3908 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3909 3910 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3911 /* 3912 * One child of root, need to get a chance to copy its contents 3913 * into the root and delete it. Can't go up to next level, 3914 * there's nothing to delete there. 3915 */ 3916 if (xfs_btree_ptr_is_null(cur, &rptr) && 3917 xfs_btree_ptr_is_null(cur, &lptr) && 3918 level == cur->bc_nlevels - 2) { 3919 error = xfs_btree_kill_iroot(cur); 3920 if (!error) 3921 error = xfs_btree_dec_cursor(cur, level, stat); 3922 if (error) 3923 goto error0; 3924 return 0; 3925 } 3926 } 3927 3928 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3929 !xfs_btree_ptr_is_null(cur, &lptr)); 3930 3931 /* 3932 * Duplicate the cursor so our btree manipulations here won't 3933 * disrupt the next level up. 3934 */ 3935 error = xfs_btree_dup_cursor(cur, &tcur); 3936 if (error) 3937 goto error0; 3938 3939 /* 3940 * If there's a right sibling, see if it's ok to shift an entry 3941 * out of it. 3942 */ 3943 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3944 /* 3945 * Move the temp cursor to the last entry in the next block. 3946 * Actually any entry but the first would suffice. 3947 */ 3948 i = xfs_btree_lastrec(tcur, level); 3949 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3950 3951 error = xfs_btree_increment(tcur, level, &i); 3952 if (error) 3953 goto error0; 3954 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3955 3956 i = xfs_btree_lastrec(tcur, level); 3957 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3958 3959 /* Grab a pointer to the block. */ 3960 right = xfs_btree_get_block(tcur, level, &rbp); 3961 #ifdef DEBUG 3962 error = xfs_btree_check_block(tcur, right, level, rbp); 3963 if (error) 3964 goto error0; 3965 #endif 3966 /* Grab the current block number, for future use. */ 3967 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3968 3969 /* 3970 * If right block is full enough so that removing one entry 3971 * won't make it too empty, and left-shifting an entry out 3972 * of right to us works, we're done. 3973 */ 3974 if (xfs_btree_get_numrecs(right) - 1 >= 3975 cur->bc_ops->get_minrecs(tcur, level)) { 3976 error = xfs_btree_lshift(tcur, level, &i); 3977 if (error) 3978 goto error0; 3979 if (i) { 3980 ASSERT(xfs_btree_get_numrecs(block) >= 3981 cur->bc_ops->get_minrecs(tcur, level)); 3982 3983 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3984 tcur = NULL; 3985 3986 error = xfs_btree_dec_cursor(cur, level, stat); 3987 if (error) 3988 goto error0; 3989 return 0; 3990 } 3991 } 3992 3993 /* 3994 * Otherwise, grab the number of records in right for 3995 * future reference, and fix up the temp cursor to point 3996 * to our block again (last record). 3997 */ 3998 rrecs = xfs_btree_get_numrecs(right); 3999 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 4000 i = xfs_btree_firstrec(tcur, level); 4001 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 4002 4003 error = xfs_btree_decrement(tcur, level, &i); 4004 if (error) 4005 goto error0; 4006 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 4007 } 4008 } 4009 4010 /* 4011 * If there's a left sibling, see if it's ok to shift an entry 4012 * out of it. 4013 */ 4014 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 4015 /* 4016 * Move the temp cursor to the first entry in the 4017 * previous block. 4018 */ 4019 i = xfs_btree_firstrec(tcur, level); 4020 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 4021 4022 error = xfs_btree_decrement(tcur, level, &i); 4023 if (error) 4024 goto error0; 4025 i = xfs_btree_firstrec(tcur, level); 4026 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 4027 4028 /* Grab a pointer to the block. */ 4029 left = xfs_btree_get_block(tcur, level, &lbp); 4030 #ifdef DEBUG 4031 error = xfs_btree_check_block(cur, left, level, lbp); 4032 if (error) 4033 goto error0; 4034 #endif 4035 /* Grab the current block number, for future use. */ 4036 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 4037 4038 /* 4039 * If left block is full enough so that removing one entry 4040 * won't make it too empty, and right-shifting an entry out 4041 * of left to us works, we're done. 4042 */ 4043 if (xfs_btree_get_numrecs(left) - 1 >= 4044 cur->bc_ops->get_minrecs(tcur, level)) { 4045 error = xfs_btree_rshift(tcur, level, &i); 4046 if (error) 4047 goto error0; 4048 if (i) { 4049 ASSERT(xfs_btree_get_numrecs(block) >= 4050 cur->bc_ops->get_minrecs(tcur, level)); 4051 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4052 tcur = NULL; 4053 if (level == 0) 4054 cur->bc_ptrs[0]++; 4055 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4056 *stat = 1; 4057 return 0; 4058 } 4059 } 4060 4061 /* 4062 * Otherwise, grab the number of records in right for 4063 * future reference. 4064 */ 4065 lrecs = xfs_btree_get_numrecs(left); 4066 } 4067 4068 /* Delete the temp cursor, we're done with it. */ 4069 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4070 tcur = NULL; 4071 4072 /* If here, we need to do a join to keep the tree balanced. */ 4073 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4074 4075 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4076 lrecs + xfs_btree_get_numrecs(block) <= 4077 cur->bc_ops->get_maxrecs(cur, level)) { 4078 /* 4079 * Set "right" to be the starting block, 4080 * "left" to be the left neighbor. 4081 */ 4082 rptr = cptr; 4083 right = block; 4084 rbp = bp; 4085 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4086 if (error) 4087 goto error0; 4088 4089 /* 4090 * If that won't work, see if we can join with the right neighbor block. 4091 */ 4092 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4093 rrecs + xfs_btree_get_numrecs(block) <= 4094 cur->bc_ops->get_maxrecs(cur, level)) { 4095 /* 4096 * Set "left" to be the starting block, 4097 * "right" to be the right neighbor. 4098 */ 4099 lptr = cptr; 4100 left = block; 4101 lbp = bp; 4102 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4103 if (error) 4104 goto error0; 4105 4106 /* 4107 * Otherwise, we can't fix the imbalance. 4108 * Just return. This is probably a logic error, but it's not fatal. 4109 */ 4110 } else { 4111 error = xfs_btree_dec_cursor(cur, level, stat); 4112 if (error) 4113 goto error0; 4114 return 0; 4115 } 4116 4117 rrecs = xfs_btree_get_numrecs(right); 4118 lrecs = xfs_btree_get_numrecs(left); 4119 4120 /* 4121 * We're now going to join "left" and "right" by moving all the stuff 4122 * in "right" to "left" and deleting "right". 4123 */ 4124 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4125 if (level > 0) { 4126 /* It's a non-leaf. Move keys and pointers. */ 4127 union xfs_btree_key *lkp; /* left btree key */ 4128 union xfs_btree_ptr *lpp; /* left address pointer */ 4129 union xfs_btree_key *rkp; /* right btree key */ 4130 union xfs_btree_ptr *rpp; /* right address pointer */ 4131 4132 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4133 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4134 rkp = xfs_btree_key_addr(cur, 1, right); 4135 rpp = xfs_btree_ptr_addr(cur, 1, right); 4136 #ifdef DEBUG 4137 for (i = 1; i < rrecs; i++) { 4138 error = xfs_btree_check_ptr(cur, rpp, i, level); 4139 if (error) 4140 goto error0; 4141 } 4142 #endif 4143 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4144 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4145 4146 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4147 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4148 } else { 4149 /* It's a leaf. Move records. */ 4150 union xfs_btree_rec *lrp; /* left record pointer */ 4151 union xfs_btree_rec *rrp; /* right record pointer */ 4152 4153 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4154 rrp = xfs_btree_rec_addr(cur, 1, right); 4155 4156 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4157 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4158 } 4159 4160 XFS_BTREE_STATS_INC(cur, join); 4161 4162 /* 4163 * Fix up the number of records and right block pointer in the 4164 * surviving block, and log it. 4165 */ 4166 xfs_btree_set_numrecs(left, lrecs + rrecs); 4167 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 4168 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4169 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4170 4171 /* If there is a right sibling, point it to the remaining block. */ 4172 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4173 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4174 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4175 if (error) 4176 goto error0; 4177 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4178 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4179 } 4180 4181 /* Free the deleted block. */ 4182 error = xfs_btree_free_block(cur, rbp); 4183 if (error) 4184 goto error0; 4185 4186 /* 4187 * If we joined with the left neighbor, set the buffer in the 4188 * cursor to the left block, and fix up the index. 4189 */ 4190 if (bp != lbp) { 4191 cur->bc_bufs[level] = lbp; 4192 cur->bc_ptrs[level] += lrecs; 4193 cur->bc_ra[level] = 0; 4194 } 4195 /* 4196 * If we joined with the right neighbor and there's a level above 4197 * us, increment the cursor at that level. 4198 */ 4199 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4200 (level + 1 < cur->bc_nlevels)) { 4201 error = xfs_btree_increment(cur, level + 1, &i); 4202 if (error) 4203 goto error0; 4204 } 4205 4206 /* 4207 * Readjust the ptr at this level if it's not a leaf, since it's 4208 * still pointing at the deletion point, which makes the cursor 4209 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4210 * We can't use decrement because it would change the next level up. 4211 */ 4212 if (level > 0) 4213 cur->bc_ptrs[level]--; 4214 4215 /* 4216 * We combined blocks, so we have to update the parent keys if the 4217 * btree supports overlapped intervals. However, bc_ptrs[level + 1] 4218 * points to the old block so that the caller knows which record to 4219 * delete. Therefore, the caller must be savvy enough to call updkeys 4220 * for us if we return stat == 2. The other exit points from this 4221 * function don't require deletions further up the tree, so they can 4222 * call updkeys directly. 4223 */ 4224 4225 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4226 /* Return value means the next level up has something to do. */ 4227 *stat = 2; 4228 return 0; 4229 4230 error0: 4231 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 4232 if (tcur) 4233 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4234 return error; 4235 } 4236 4237 /* 4238 * Delete the record pointed to by cur. 4239 * The cursor refers to the place where the record was (could be inserted) 4240 * when the operation returns. 4241 */ 4242 int /* error */ 4243 xfs_btree_delete( 4244 struct xfs_btree_cur *cur, 4245 int *stat) /* success/failure */ 4246 { 4247 int error; /* error return value */ 4248 int level; 4249 int i; 4250 bool joined = false; 4251 4252 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 4253 4254 /* 4255 * Go up the tree, starting at leaf level. 4256 * 4257 * If 2 is returned then a join was done; go to the next level. 4258 * Otherwise we are done. 4259 */ 4260 for (level = 0, i = 2; i == 2; level++) { 4261 error = xfs_btree_delrec(cur, level, &i); 4262 if (error) 4263 goto error0; 4264 if (i == 2) 4265 joined = true; 4266 } 4267 4268 /* 4269 * If we combined blocks as part of deleting the record, delrec won't 4270 * have updated the parent high keys so we have to do that here. 4271 */ 4272 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4273 error = xfs_btree_updkeys_force(cur, 0); 4274 if (error) 4275 goto error0; 4276 } 4277 4278 if (i == 0) { 4279 for (level = 1; level < cur->bc_nlevels; level++) { 4280 if (cur->bc_ptrs[level] == 0) { 4281 error = xfs_btree_decrement(cur, level, &i); 4282 if (error) 4283 goto error0; 4284 break; 4285 } 4286 } 4287 } 4288 4289 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 4290 *stat = i; 4291 return 0; 4292 error0: 4293 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 4294 return error; 4295 } 4296 4297 /* 4298 * Get the data from the pointed-to record. 4299 */ 4300 int /* error */ 4301 xfs_btree_get_rec( 4302 struct xfs_btree_cur *cur, /* btree cursor */ 4303 union xfs_btree_rec **recp, /* output: btree record */ 4304 int *stat) /* output: success/failure */ 4305 { 4306 struct xfs_btree_block *block; /* btree block */ 4307 struct xfs_buf *bp; /* buffer pointer */ 4308 int ptr; /* record number */ 4309 #ifdef DEBUG 4310 int error; /* error return value */ 4311 #endif 4312 4313 ptr = cur->bc_ptrs[0]; 4314 block = xfs_btree_get_block(cur, 0, &bp); 4315 4316 #ifdef DEBUG 4317 error = xfs_btree_check_block(cur, block, 0, bp); 4318 if (error) 4319 return error; 4320 #endif 4321 4322 /* 4323 * Off the right end or left end, return failure. 4324 */ 4325 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4326 *stat = 0; 4327 return 0; 4328 } 4329 4330 /* 4331 * Point to the record and extract its data. 4332 */ 4333 *recp = xfs_btree_rec_addr(cur, ptr, block); 4334 *stat = 1; 4335 return 0; 4336 } 4337 4338 /* Visit a block in a btree. */ 4339 STATIC int 4340 xfs_btree_visit_block( 4341 struct xfs_btree_cur *cur, 4342 int level, 4343 xfs_btree_visit_blocks_fn fn, 4344 void *data) 4345 { 4346 struct xfs_btree_block *block; 4347 struct xfs_buf *bp; 4348 union xfs_btree_ptr rptr; 4349 int error; 4350 4351 /* do right sibling readahead */ 4352 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4353 block = xfs_btree_get_block(cur, level, &bp); 4354 4355 /* process the block */ 4356 error = fn(cur, level, data); 4357 if (error) 4358 return error; 4359 4360 /* now read rh sibling block for next iteration */ 4361 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4362 if (xfs_btree_ptr_is_null(cur, &rptr)) 4363 return -ENOENT; 4364 4365 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4366 } 4367 4368 4369 /* Visit every block in a btree. */ 4370 int 4371 xfs_btree_visit_blocks( 4372 struct xfs_btree_cur *cur, 4373 xfs_btree_visit_blocks_fn fn, 4374 void *data) 4375 { 4376 union xfs_btree_ptr lptr; 4377 int level; 4378 struct xfs_btree_block *block = NULL; 4379 int error = 0; 4380 4381 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4382 4383 /* for each level */ 4384 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4385 /* grab the left hand block */ 4386 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4387 if (error) 4388 return error; 4389 4390 /* readahead the left most block for the next level down */ 4391 if (level > 0) { 4392 union xfs_btree_ptr *ptr; 4393 4394 ptr = xfs_btree_ptr_addr(cur, 1, block); 4395 xfs_btree_readahead_ptr(cur, ptr, 1); 4396 4397 /* save for the next iteration of the loop */ 4398 lptr = *ptr; 4399 } 4400 4401 /* for each buffer in the level */ 4402 do { 4403 error = xfs_btree_visit_block(cur, level, fn, data); 4404 } while (!error); 4405 4406 if (error != -ENOENT) 4407 return error; 4408 } 4409 4410 return 0; 4411 } 4412 4413 /* 4414 * Change the owner of a btree. 4415 * 4416 * The mechanism we use here is ordered buffer logging. Because we don't know 4417 * how many buffers were are going to need to modify, we don't really want to 4418 * have to make transaction reservations for the worst case of every buffer in a 4419 * full size btree as that may be more space that we can fit in the log.... 4420 * 4421 * We do the btree walk in the most optimal manner possible - we have sibling 4422 * pointers so we can just walk all the blocks on each level from left to right 4423 * in a single pass, and then move to the next level and do the same. We can 4424 * also do readahead on the sibling pointers to get IO moving more quickly, 4425 * though for slow disks this is unlikely to make much difference to performance 4426 * as the amount of CPU work we have to do before moving to the next block is 4427 * relatively small. 4428 * 4429 * For each btree block that we load, modify the owner appropriately, set the 4430 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4431 * we mark the region we change dirty so that if the buffer is relogged in 4432 * a subsequent transaction the changes we make here as an ordered buffer are 4433 * correctly relogged in that transaction. If we are in recovery context, then 4434 * just queue the modified buffer as delayed write buffer so the transaction 4435 * recovery completion writes the changes to disk. 4436 */ 4437 struct xfs_btree_block_change_owner_info { 4438 __uint64_t new_owner; 4439 struct list_head *buffer_list; 4440 }; 4441 4442 static int 4443 xfs_btree_block_change_owner( 4444 struct xfs_btree_cur *cur, 4445 int level, 4446 void *data) 4447 { 4448 struct xfs_btree_block_change_owner_info *bbcoi = data; 4449 struct xfs_btree_block *block; 4450 struct xfs_buf *bp; 4451 4452 /* modify the owner */ 4453 block = xfs_btree_get_block(cur, level, &bp); 4454 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4455 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4456 else 4457 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4458 4459 /* 4460 * If the block is a root block hosted in an inode, we might not have a 4461 * buffer pointer here and we shouldn't attempt to log the change as the 4462 * information is already held in the inode and discarded when the root 4463 * block is formatted into the on-disk inode fork. We still change it, 4464 * though, so everything is consistent in memory. 4465 */ 4466 if (bp) { 4467 if (cur->bc_tp) { 4468 xfs_trans_ordered_buf(cur->bc_tp, bp); 4469 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4470 } else { 4471 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4472 } 4473 } else { 4474 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4475 ASSERT(level == cur->bc_nlevels - 1); 4476 } 4477 4478 return 0; 4479 } 4480 4481 int 4482 xfs_btree_change_owner( 4483 struct xfs_btree_cur *cur, 4484 __uint64_t new_owner, 4485 struct list_head *buffer_list) 4486 { 4487 struct xfs_btree_block_change_owner_info bbcoi; 4488 4489 bbcoi.new_owner = new_owner; 4490 bbcoi.buffer_list = buffer_list; 4491 4492 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4493 &bbcoi); 4494 } 4495 4496 /** 4497 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4498 * btree block 4499 * 4500 * @bp: buffer containing the btree block 4501 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4502 * @pag_max_level: pointer to the per-ag max level field 4503 */ 4504 bool 4505 xfs_btree_sblock_v5hdr_verify( 4506 struct xfs_buf *bp) 4507 { 4508 struct xfs_mount *mp = bp->b_target->bt_mount; 4509 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4510 struct xfs_perag *pag = bp->b_pag; 4511 4512 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4513 return false; 4514 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4515 return false; 4516 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4517 return false; 4518 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4519 return false; 4520 return true; 4521 } 4522 4523 /** 4524 * xfs_btree_sblock_verify() -- verify a short-format btree block 4525 * 4526 * @bp: buffer containing the btree block 4527 * @max_recs: maximum records allowed in this btree node 4528 */ 4529 bool 4530 xfs_btree_sblock_verify( 4531 struct xfs_buf *bp, 4532 unsigned int max_recs) 4533 { 4534 struct xfs_mount *mp = bp->b_target->bt_mount; 4535 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4536 4537 /* numrecs verification */ 4538 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4539 return false; 4540 4541 /* sibling pointer verification */ 4542 if (!block->bb_u.s.bb_leftsib || 4543 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && 4544 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) 4545 return false; 4546 if (!block->bb_u.s.bb_rightsib || 4547 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && 4548 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) 4549 return false; 4550 4551 return true; 4552 } 4553 4554 /* 4555 * Calculate the number of btree levels needed to store a given number of 4556 * records in a short-format btree. 4557 */ 4558 uint 4559 xfs_btree_compute_maxlevels( 4560 struct xfs_mount *mp, 4561 uint *limits, 4562 unsigned long len) 4563 { 4564 uint level; 4565 unsigned long maxblocks; 4566 4567 maxblocks = (len + limits[0] - 1) / limits[0]; 4568 for (level = 1; maxblocks > 1; level++) 4569 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4570 return level; 4571 } 4572 4573 /* 4574 * Query a regular btree for all records overlapping a given interval. 4575 * Start with a LE lookup of the key of low_rec and return all records 4576 * until we find a record with a key greater than the key of high_rec. 4577 */ 4578 STATIC int 4579 xfs_btree_simple_query_range( 4580 struct xfs_btree_cur *cur, 4581 union xfs_btree_key *low_key, 4582 union xfs_btree_key *high_key, 4583 xfs_btree_query_range_fn fn, 4584 void *priv) 4585 { 4586 union xfs_btree_rec *recp; 4587 union xfs_btree_key rec_key; 4588 __int64_t diff; 4589 int stat; 4590 bool firstrec = true; 4591 int error; 4592 4593 ASSERT(cur->bc_ops->init_high_key_from_rec); 4594 ASSERT(cur->bc_ops->diff_two_keys); 4595 4596 /* 4597 * Find the leftmost record. The btree cursor must be set 4598 * to the low record used to generate low_key. 4599 */ 4600 stat = 0; 4601 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4602 if (error) 4603 goto out; 4604 4605 /* Nothing? See if there's anything to the right. */ 4606 if (!stat) { 4607 error = xfs_btree_increment(cur, 0, &stat); 4608 if (error) 4609 goto out; 4610 } 4611 4612 while (stat) { 4613 /* Find the record. */ 4614 error = xfs_btree_get_rec(cur, &recp, &stat); 4615 if (error || !stat) 4616 break; 4617 4618 /* Skip if high_key(rec) < low_key. */ 4619 if (firstrec) { 4620 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4621 firstrec = false; 4622 diff = cur->bc_ops->diff_two_keys(cur, low_key, 4623 &rec_key); 4624 if (diff > 0) 4625 goto advloop; 4626 } 4627 4628 /* Stop if high_key < low_key(rec). */ 4629 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4630 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4631 if (diff > 0) 4632 break; 4633 4634 /* Callback */ 4635 error = fn(cur, recp, priv); 4636 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) 4637 break; 4638 4639 advloop: 4640 /* Move on to the next record. */ 4641 error = xfs_btree_increment(cur, 0, &stat); 4642 if (error) 4643 break; 4644 } 4645 4646 out: 4647 return error; 4648 } 4649 4650 /* 4651 * Query an overlapped interval btree for all records overlapping a given 4652 * interval. This function roughly follows the algorithm given in 4653 * "Interval Trees" of _Introduction to Algorithms_, which is section 4654 * 14.3 in the 2nd and 3rd editions. 4655 * 4656 * First, generate keys for the low and high records passed in. 4657 * 4658 * For any leaf node, generate the high and low keys for the record. 4659 * If the record keys overlap with the query low/high keys, pass the 4660 * record to the function iterator. 4661 * 4662 * For any internal node, compare the low and high keys of each 4663 * pointer against the query low/high keys. If there's an overlap, 4664 * follow the pointer. 4665 * 4666 * As an optimization, we stop scanning a block when we find a low key 4667 * that is greater than the query's high key. 4668 */ 4669 STATIC int 4670 xfs_btree_overlapped_query_range( 4671 struct xfs_btree_cur *cur, 4672 union xfs_btree_key *low_key, 4673 union xfs_btree_key *high_key, 4674 xfs_btree_query_range_fn fn, 4675 void *priv) 4676 { 4677 union xfs_btree_ptr ptr; 4678 union xfs_btree_ptr *pp; 4679 union xfs_btree_key rec_key; 4680 union xfs_btree_key rec_hkey; 4681 union xfs_btree_key *lkp; 4682 union xfs_btree_key *hkp; 4683 union xfs_btree_rec *recp; 4684 struct xfs_btree_block *block; 4685 __int64_t ldiff; 4686 __int64_t hdiff; 4687 int level; 4688 struct xfs_buf *bp; 4689 int i; 4690 int error; 4691 4692 /* Load the root of the btree. */ 4693 level = cur->bc_nlevels - 1; 4694 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4695 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4696 if (error) 4697 return error; 4698 xfs_btree_get_block(cur, level, &bp); 4699 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4700 #ifdef DEBUG 4701 error = xfs_btree_check_block(cur, block, level, bp); 4702 if (error) 4703 goto out; 4704 #endif 4705 cur->bc_ptrs[level] = 1; 4706 4707 while (level < cur->bc_nlevels) { 4708 block = xfs_btree_get_block(cur, level, &bp); 4709 4710 /* End of node, pop back towards the root. */ 4711 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4712 pop_up: 4713 if (level < cur->bc_nlevels - 1) 4714 cur->bc_ptrs[level + 1]++; 4715 level++; 4716 continue; 4717 } 4718 4719 if (level == 0) { 4720 /* Handle a leaf node. */ 4721 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4722 4723 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4724 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4725 low_key); 4726 4727 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4728 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4729 &rec_key); 4730 4731 /* 4732 * If (record's high key >= query's low key) and 4733 * (query's high key >= record's low key), then 4734 * this record overlaps the query range; callback. 4735 */ 4736 if (ldiff >= 0 && hdiff >= 0) { 4737 error = fn(cur, recp, priv); 4738 if (error < 0 || 4739 error == XFS_BTREE_QUERY_RANGE_ABORT) 4740 break; 4741 } else if (hdiff < 0) { 4742 /* Record is larger than high key; pop. */ 4743 goto pop_up; 4744 } 4745 cur->bc_ptrs[level]++; 4746 continue; 4747 } 4748 4749 /* Handle an internal node. */ 4750 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4751 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4752 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4753 4754 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4755 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4756 4757 /* 4758 * If (pointer's high key >= query's low key) and 4759 * (query's high key >= pointer's low key), then 4760 * this record overlaps the query range; follow pointer. 4761 */ 4762 if (ldiff >= 0 && hdiff >= 0) { 4763 level--; 4764 error = xfs_btree_lookup_get_block(cur, level, pp, 4765 &block); 4766 if (error) 4767 goto out; 4768 xfs_btree_get_block(cur, level, &bp); 4769 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4770 #ifdef DEBUG 4771 error = xfs_btree_check_block(cur, block, level, bp); 4772 if (error) 4773 goto out; 4774 #endif 4775 cur->bc_ptrs[level] = 1; 4776 continue; 4777 } else if (hdiff < 0) { 4778 /* The low key is larger than the upper range; pop. */ 4779 goto pop_up; 4780 } 4781 cur->bc_ptrs[level]++; 4782 } 4783 4784 out: 4785 /* 4786 * If we don't end this function with the cursor pointing at a record 4787 * block, a subsequent non-error cursor deletion will not release 4788 * node-level buffers, causing a buffer leak. This is quite possible 4789 * with a zero-results range query, so release the buffers if we 4790 * failed to return any results. 4791 */ 4792 if (cur->bc_bufs[0] == NULL) { 4793 for (i = 0; i < cur->bc_nlevels; i++) { 4794 if (cur->bc_bufs[i]) { 4795 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4796 cur->bc_bufs[i] = NULL; 4797 cur->bc_ptrs[i] = 0; 4798 cur->bc_ra[i] = 0; 4799 } 4800 } 4801 } 4802 4803 return error; 4804 } 4805 4806 /* 4807 * Query a btree for all records overlapping a given interval of keys. The 4808 * supplied function will be called with each record found; return one of the 4809 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4810 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a 4811 * negative error code. 4812 */ 4813 int 4814 xfs_btree_query_range( 4815 struct xfs_btree_cur *cur, 4816 union xfs_btree_irec *low_rec, 4817 union xfs_btree_irec *high_rec, 4818 xfs_btree_query_range_fn fn, 4819 void *priv) 4820 { 4821 union xfs_btree_rec rec; 4822 union xfs_btree_key low_key; 4823 union xfs_btree_key high_key; 4824 4825 /* Find the keys of both ends of the interval. */ 4826 cur->bc_rec = *high_rec; 4827 cur->bc_ops->init_rec_from_cur(cur, &rec); 4828 cur->bc_ops->init_key_from_rec(&high_key, &rec); 4829 4830 cur->bc_rec = *low_rec; 4831 cur->bc_ops->init_rec_from_cur(cur, &rec); 4832 cur->bc_ops->init_key_from_rec(&low_key, &rec); 4833 4834 /* Enforce low key < high key. */ 4835 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4836 return -EINVAL; 4837 4838 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4839 return xfs_btree_simple_query_range(cur, &low_key, 4840 &high_key, fn, priv); 4841 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4842 fn, priv); 4843 } 4844 4845 /* 4846 * Calculate the number of blocks needed to store a given number of records 4847 * in a short-format (per-AG metadata) btree. 4848 */ 4849 xfs_extlen_t 4850 xfs_btree_calc_size( 4851 struct xfs_mount *mp, 4852 uint *limits, 4853 unsigned long long len) 4854 { 4855 int level; 4856 int maxrecs; 4857 xfs_extlen_t rval; 4858 4859 maxrecs = limits[0]; 4860 for (level = 0, rval = 0; len > 1; level++) { 4861 len += maxrecs - 1; 4862 do_div(len, maxrecs); 4863 maxrecs = limits[1]; 4864 rval += len; 4865 } 4866 return rval; 4867 } 4868 4869 static int 4870 xfs_btree_count_blocks_helper( 4871 struct xfs_btree_cur *cur, 4872 int level, 4873 void *data) 4874 { 4875 xfs_extlen_t *blocks = data; 4876 (*blocks)++; 4877 4878 return 0; 4879 } 4880 4881 /* Count the blocks in a btree and return the result in *blocks. */ 4882 int 4883 xfs_btree_count_blocks( 4884 struct xfs_btree_cur *cur, 4885 xfs_extlen_t *blocks) 4886 { 4887 *blocks = 0; 4888 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4889 blocks); 4890 } 4891