1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include "ctree.h" 21 #include "disk-io.h" 22 #include "transaction.h" 23 #include "print-tree.h" 24 #include "locking.h" 25 26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 27 *root, struct btrfs_path *path, int level); 28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 29 *root, struct btrfs_key *ins_key, 30 struct btrfs_path *path, int data_size, int extend); 31 static int push_node_left(struct btrfs_trans_handle *trans, 32 struct btrfs_root *root, struct extent_buffer *dst, 33 struct extent_buffer *src, int empty); 34 static int balance_node_right(struct btrfs_trans_handle *trans, 35 struct btrfs_root *root, 36 struct extent_buffer *dst_buf, 37 struct extent_buffer *src_buf); 38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39 struct btrfs_path *path, int level, int slot); 40 41 struct btrfs_path *btrfs_alloc_path(void) 42 { 43 struct btrfs_path *path; 44 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 if (path) 46 path->reada = 1; 47 return path; 48 } 49 50 /* 51 * set all locked nodes in the path to blocking locks. This should 52 * be done before scheduling 53 */ 54 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 55 { 56 int i; 57 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 58 if (p->nodes[i] && p->locks[i]) 59 btrfs_set_lock_blocking(p->nodes[i]); 60 } 61 } 62 63 /* 64 * reset all the locked nodes in the patch to spinning locks. 65 * 66 * held is used to keep lockdep happy, when lockdep is enabled 67 * we set held to a blocking lock before we go around and 68 * retake all the spinlocks in the path. You can safely use NULL 69 * for held 70 */ 71 noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 72 struct extent_buffer *held) 73 { 74 int i; 75 76 #ifdef CONFIG_DEBUG_LOCK_ALLOC 77 /* lockdep really cares that we take all of these spinlocks 78 * in the right order. If any of the locks in the path are not 79 * currently blocking, it is going to complain. So, make really 80 * really sure by forcing the path to blocking before we clear 81 * the path blocking. 82 */ 83 if (held) 84 btrfs_set_lock_blocking(held); 85 btrfs_set_path_blocking(p); 86 #endif 87 88 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 89 if (p->nodes[i] && p->locks[i]) 90 btrfs_clear_lock_blocking(p->nodes[i]); 91 } 92 93 #ifdef CONFIG_DEBUG_LOCK_ALLOC 94 if (held) 95 btrfs_clear_lock_blocking(held); 96 #endif 97 } 98 99 /* this also releases the path */ 100 void btrfs_free_path(struct btrfs_path *p) 101 { 102 btrfs_release_path(NULL, p); 103 kmem_cache_free(btrfs_path_cachep, p); 104 } 105 106 /* 107 * path release drops references on the extent buffers in the path 108 * and it drops any locks held by this path 109 * 110 * It is safe to call this on paths that no locks or extent buffers held. 111 */ 112 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 113 { 114 int i; 115 116 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 117 p->slots[i] = 0; 118 if (!p->nodes[i]) 119 continue; 120 if (p->locks[i]) { 121 btrfs_tree_unlock(p->nodes[i]); 122 p->locks[i] = 0; 123 } 124 free_extent_buffer(p->nodes[i]); 125 p->nodes[i] = NULL; 126 } 127 } 128 129 /* 130 * safely gets a reference on the root node of a tree. A lock 131 * is not taken, so a concurrent writer may put a different node 132 * at the root of the tree. See btrfs_lock_root_node for the 133 * looping required. 134 * 135 * The extent buffer returned by this has a reference taken, so 136 * it won't disappear. It may stop being the root of the tree 137 * at any time because there are no locks held. 138 */ 139 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 140 { 141 struct extent_buffer *eb; 142 spin_lock(&root->node_lock); 143 eb = root->node; 144 extent_buffer_get(eb); 145 spin_unlock(&root->node_lock); 146 return eb; 147 } 148 149 /* loop around taking references on and locking the root node of the 150 * tree until you end up with a lock on the root. A locked buffer 151 * is returned, with a reference held. 152 */ 153 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 154 { 155 struct extent_buffer *eb; 156 157 while (1) { 158 eb = btrfs_root_node(root); 159 btrfs_tree_lock(eb); 160 161 spin_lock(&root->node_lock); 162 if (eb == root->node) { 163 spin_unlock(&root->node_lock); 164 break; 165 } 166 spin_unlock(&root->node_lock); 167 168 btrfs_tree_unlock(eb); 169 free_extent_buffer(eb); 170 } 171 return eb; 172 } 173 174 /* cowonly root (everything not a reference counted cow subvolume), just get 175 * put onto a simple dirty list. transaction.c walks this to make sure they 176 * get properly updated on disk. 177 */ 178 static void add_root_to_dirty_list(struct btrfs_root *root) 179 { 180 if (root->track_dirty && list_empty(&root->dirty_list)) { 181 list_add(&root->dirty_list, 182 &root->fs_info->dirty_cowonly_roots); 183 } 184 } 185 186 /* 187 * used by snapshot creation to make a copy of a root for a tree with 188 * a given objectid. The buffer with the new root node is returned in 189 * cow_ret, and this func returns zero on success or a negative error code. 190 */ 191 int btrfs_copy_root(struct btrfs_trans_handle *trans, 192 struct btrfs_root *root, 193 struct extent_buffer *buf, 194 struct extent_buffer **cow_ret, u64 new_root_objectid) 195 { 196 struct extent_buffer *cow; 197 u32 nritems; 198 int ret = 0; 199 int level; 200 struct btrfs_disk_key disk_key; 201 202 WARN_ON(root->ref_cows && trans->transid != 203 root->fs_info->running_transaction->transid); 204 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 205 206 level = btrfs_header_level(buf); 207 nritems = btrfs_header_nritems(buf); 208 if (level == 0) 209 btrfs_item_key(buf, &disk_key, 0); 210 else 211 btrfs_node_key(buf, &disk_key, 0); 212 213 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 214 new_root_objectid, &disk_key, level, 215 buf->start, 0); 216 if (IS_ERR(cow)) 217 return PTR_ERR(cow); 218 219 copy_extent_buffer(cow, buf, 0, 0, cow->len); 220 btrfs_set_header_bytenr(cow, cow->start); 221 btrfs_set_header_generation(cow, trans->transid); 222 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 223 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 224 BTRFS_HEADER_FLAG_RELOC); 225 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 226 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 227 else 228 btrfs_set_header_owner(cow, new_root_objectid); 229 230 write_extent_buffer(cow, root->fs_info->fsid, 231 (unsigned long)btrfs_header_fsid(cow), 232 BTRFS_FSID_SIZE); 233 234 WARN_ON(btrfs_header_generation(buf) > trans->transid); 235 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 236 ret = btrfs_inc_ref(trans, root, cow, 1); 237 else 238 ret = btrfs_inc_ref(trans, root, cow, 0); 239 240 if (ret) 241 return ret; 242 243 btrfs_mark_buffer_dirty(cow); 244 *cow_ret = cow; 245 return 0; 246 } 247 248 /* 249 * check if the tree block can be shared by multiple trees 250 */ 251 int btrfs_block_can_be_shared(struct btrfs_root *root, 252 struct extent_buffer *buf) 253 { 254 /* 255 * Tree blocks not in refernece counted trees and tree roots 256 * are never shared. If a block was allocated after the last 257 * snapshot and the block was not allocated by tree relocation, 258 * we know the block is not shared. 259 */ 260 if (root->ref_cows && 261 buf != root->node && buf != root->commit_root && 262 (btrfs_header_generation(buf) <= 263 btrfs_root_last_snapshot(&root->root_item) || 264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 265 return 1; 266 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 267 if (root->ref_cows && 268 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 269 return 1; 270 #endif 271 return 0; 272 } 273 274 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 275 struct btrfs_root *root, 276 struct extent_buffer *buf, 277 struct extent_buffer *cow) 278 { 279 u64 refs; 280 u64 owner; 281 u64 flags; 282 u64 new_flags = 0; 283 int ret; 284 285 /* 286 * Backrefs update rules: 287 * 288 * Always use full backrefs for extent pointers in tree block 289 * allocated by tree relocation. 290 * 291 * If a shared tree block is no longer referenced by its owner 292 * tree (btrfs_header_owner(buf) == root->root_key.objectid), 293 * use full backrefs for extent pointers in tree block. 294 * 295 * If a tree block is been relocating 296 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 297 * use full backrefs for extent pointers in tree block. 298 * The reason for this is some operations (such as drop tree) 299 * are only allowed for blocks use full backrefs. 300 */ 301 302 if (btrfs_block_can_be_shared(root, buf)) { 303 ret = btrfs_lookup_extent_info(trans, root, buf->start, 304 buf->len, &refs, &flags); 305 BUG_ON(ret); 306 BUG_ON(refs == 0); 307 } else { 308 refs = 1; 309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 310 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 311 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 312 else 313 flags = 0; 314 } 315 316 owner = btrfs_header_owner(buf); 317 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 318 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 319 320 if (refs > 1) { 321 if ((owner == root->root_key.objectid || 322 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 324 ret = btrfs_inc_ref(trans, root, buf, 1); 325 BUG_ON(ret); 326 327 if (root->root_key.objectid == 328 BTRFS_TREE_RELOC_OBJECTID) { 329 ret = btrfs_dec_ref(trans, root, buf, 0); 330 BUG_ON(ret); 331 ret = btrfs_inc_ref(trans, root, cow, 1); 332 BUG_ON(ret); 333 } 334 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 335 } else { 336 337 if (root->root_key.objectid == 338 BTRFS_TREE_RELOC_OBJECTID) 339 ret = btrfs_inc_ref(trans, root, cow, 1); 340 else 341 ret = btrfs_inc_ref(trans, root, cow, 0); 342 BUG_ON(ret); 343 } 344 if (new_flags != 0) { 345 ret = btrfs_set_disk_extent_flags(trans, root, 346 buf->start, 347 buf->len, 348 new_flags, 0); 349 BUG_ON(ret); 350 } 351 } else { 352 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 353 if (root->root_key.objectid == 354 BTRFS_TREE_RELOC_OBJECTID) 355 ret = btrfs_inc_ref(trans, root, cow, 1); 356 else 357 ret = btrfs_inc_ref(trans, root, cow, 0); 358 BUG_ON(ret); 359 ret = btrfs_dec_ref(trans, root, buf, 1); 360 BUG_ON(ret); 361 } 362 clean_tree_block(trans, root, buf); 363 } 364 return 0; 365 } 366 367 /* 368 * does the dirty work in cow of a single block. The parent block (if 369 * supplied) is updated to point to the new cow copy. The new buffer is marked 370 * dirty and returned locked. If you modify the block it needs to be marked 371 * dirty again. 372 * 373 * search_start -- an allocation hint for the new block 374 * 375 * empty_size -- a hint that you plan on doing more cow. This is the size in 376 * bytes the allocator should try to find free next to the block it returns. 377 * This is just a hint and may be ignored by the allocator. 378 */ 379 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 380 struct btrfs_root *root, 381 struct extent_buffer *buf, 382 struct extent_buffer *parent, int parent_slot, 383 struct extent_buffer **cow_ret, 384 u64 search_start, u64 empty_size) 385 { 386 struct btrfs_disk_key disk_key; 387 struct extent_buffer *cow; 388 int level; 389 int unlock_orig = 0; 390 u64 parent_start; 391 392 if (*cow_ret == buf) 393 unlock_orig = 1; 394 395 btrfs_assert_tree_locked(buf); 396 397 WARN_ON(root->ref_cows && trans->transid != 398 root->fs_info->running_transaction->transid); 399 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 400 401 level = btrfs_header_level(buf); 402 403 if (level == 0) 404 btrfs_item_key(buf, &disk_key, 0); 405 else 406 btrfs_node_key(buf, &disk_key, 0); 407 408 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 409 if (parent) 410 parent_start = parent->start; 411 else 412 parent_start = 0; 413 } else 414 parent_start = 0; 415 416 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 417 root->root_key.objectid, &disk_key, 418 level, search_start, empty_size); 419 if (IS_ERR(cow)) 420 return PTR_ERR(cow); 421 422 /* cow is set to blocking by btrfs_init_new_buffer */ 423 424 copy_extent_buffer(cow, buf, 0, 0, cow->len); 425 btrfs_set_header_bytenr(cow, cow->start); 426 btrfs_set_header_generation(cow, trans->transid); 427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 429 BTRFS_HEADER_FLAG_RELOC); 430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 432 else 433 btrfs_set_header_owner(cow, root->root_key.objectid); 434 435 write_extent_buffer(cow, root->fs_info->fsid, 436 (unsigned long)btrfs_header_fsid(cow), 437 BTRFS_FSID_SIZE); 438 439 update_ref_for_cow(trans, root, buf, cow); 440 441 if (buf == root->node) { 442 WARN_ON(parent && parent != buf); 443 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 444 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 445 parent_start = buf->start; 446 else 447 parent_start = 0; 448 449 spin_lock(&root->node_lock); 450 root->node = cow; 451 extent_buffer_get(cow); 452 spin_unlock(&root->node_lock); 453 454 btrfs_free_extent(trans, root, buf->start, buf->len, 455 parent_start, root->root_key.objectid, 456 level, 0); 457 free_extent_buffer(buf); 458 add_root_to_dirty_list(root); 459 } else { 460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 461 parent_start = parent->start; 462 else 463 parent_start = 0; 464 465 WARN_ON(trans->transid != btrfs_header_generation(parent)); 466 btrfs_set_node_blockptr(parent, parent_slot, 467 cow->start); 468 btrfs_set_node_ptr_generation(parent, parent_slot, 469 trans->transid); 470 btrfs_mark_buffer_dirty(parent); 471 btrfs_free_extent(trans, root, buf->start, buf->len, 472 parent_start, root->root_key.objectid, 473 level, 0); 474 } 475 if (unlock_orig) 476 btrfs_tree_unlock(buf); 477 free_extent_buffer(buf); 478 btrfs_mark_buffer_dirty(cow); 479 *cow_ret = cow; 480 return 0; 481 } 482 483 static inline int should_cow_block(struct btrfs_trans_handle *trans, 484 struct btrfs_root *root, 485 struct extent_buffer *buf) 486 { 487 if (btrfs_header_generation(buf) == trans->transid && 488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 489 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 490 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 491 return 0; 492 return 1; 493 } 494 495 /* 496 * cows a single block, see __btrfs_cow_block for the real work. 497 * This version of it has extra checks so that a block isn't cow'd more than 498 * once per transaction, as long as it hasn't been written yet 499 */ 500 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 501 struct btrfs_root *root, struct extent_buffer *buf, 502 struct extent_buffer *parent, int parent_slot, 503 struct extent_buffer **cow_ret) 504 { 505 u64 search_start; 506 int ret; 507 508 if (trans->transaction != root->fs_info->running_transaction) { 509 printk(KERN_CRIT "trans %llu running %llu\n", 510 (unsigned long long)trans->transid, 511 (unsigned long long) 512 root->fs_info->running_transaction->transid); 513 WARN_ON(1); 514 } 515 if (trans->transid != root->fs_info->generation) { 516 printk(KERN_CRIT "trans %llu running %llu\n", 517 (unsigned long long)trans->transid, 518 (unsigned long long)root->fs_info->generation); 519 WARN_ON(1); 520 } 521 522 if (!should_cow_block(trans, root, buf)) { 523 *cow_ret = buf; 524 return 0; 525 } 526 527 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 528 529 if (parent) 530 btrfs_set_lock_blocking(parent); 531 btrfs_set_lock_blocking(buf); 532 533 ret = __btrfs_cow_block(trans, root, buf, parent, 534 parent_slot, cow_ret, search_start, 0); 535 return ret; 536 } 537 538 /* 539 * helper function for defrag to decide if two blocks pointed to by a 540 * node are actually close by 541 */ 542 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 543 { 544 if (blocknr < other && other - (blocknr + blocksize) < 32768) 545 return 1; 546 if (blocknr > other && blocknr - (other + blocksize) < 32768) 547 return 1; 548 return 0; 549 } 550 551 /* 552 * compare two keys in a memcmp fashion 553 */ 554 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 555 { 556 struct btrfs_key k1; 557 558 btrfs_disk_key_to_cpu(&k1, disk); 559 560 if (k1.objectid > k2->objectid) 561 return 1; 562 if (k1.objectid < k2->objectid) 563 return -1; 564 if (k1.type > k2->type) 565 return 1; 566 if (k1.type < k2->type) 567 return -1; 568 if (k1.offset > k2->offset) 569 return 1; 570 if (k1.offset < k2->offset) 571 return -1; 572 return 0; 573 } 574 575 /* 576 * same as comp_keys only with two btrfs_key's 577 */ 578 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 579 { 580 if (k1->objectid > k2->objectid) 581 return 1; 582 if (k1->objectid < k2->objectid) 583 return -1; 584 if (k1->type > k2->type) 585 return 1; 586 if (k1->type < k2->type) 587 return -1; 588 if (k1->offset > k2->offset) 589 return 1; 590 if (k1->offset < k2->offset) 591 return -1; 592 return 0; 593 } 594 595 /* 596 * this is used by the defrag code to go through all the 597 * leaves pointed to by a node and reallocate them so that 598 * disk order is close to key order 599 */ 600 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 601 struct btrfs_root *root, struct extent_buffer *parent, 602 int start_slot, int cache_only, u64 *last_ret, 603 struct btrfs_key *progress) 604 { 605 struct extent_buffer *cur; 606 u64 blocknr; 607 u64 gen; 608 u64 search_start = *last_ret; 609 u64 last_block = 0; 610 u64 other; 611 u32 parent_nritems; 612 int end_slot; 613 int i; 614 int err = 0; 615 int parent_level; 616 int uptodate; 617 u32 blocksize; 618 int progress_passed = 0; 619 struct btrfs_disk_key disk_key; 620 621 parent_level = btrfs_header_level(parent); 622 if (cache_only && parent_level != 1) 623 return 0; 624 625 if (trans->transaction != root->fs_info->running_transaction) 626 WARN_ON(1); 627 if (trans->transid != root->fs_info->generation) 628 WARN_ON(1); 629 630 parent_nritems = btrfs_header_nritems(parent); 631 blocksize = btrfs_level_size(root, parent_level - 1); 632 end_slot = parent_nritems; 633 634 if (parent_nritems == 1) 635 return 0; 636 637 btrfs_set_lock_blocking(parent); 638 639 for (i = start_slot; i < end_slot; i++) { 640 int close = 1; 641 642 if (!parent->map_token) { 643 map_extent_buffer(parent, 644 btrfs_node_key_ptr_offset(i), 645 sizeof(struct btrfs_key_ptr), 646 &parent->map_token, &parent->kaddr, 647 &parent->map_start, &parent->map_len, 648 KM_USER1); 649 } 650 btrfs_node_key(parent, &disk_key, i); 651 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 652 continue; 653 654 progress_passed = 1; 655 blocknr = btrfs_node_blockptr(parent, i); 656 gen = btrfs_node_ptr_generation(parent, i); 657 if (last_block == 0) 658 last_block = blocknr; 659 660 if (i > 0) { 661 other = btrfs_node_blockptr(parent, i - 1); 662 close = close_blocks(blocknr, other, blocksize); 663 } 664 if (!close && i < end_slot - 2) { 665 other = btrfs_node_blockptr(parent, i + 1); 666 close = close_blocks(blocknr, other, blocksize); 667 } 668 if (close) { 669 last_block = blocknr; 670 continue; 671 } 672 if (parent->map_token) { 673 unmap_extent_buffer(parent, parent->map_token, 674 KM_USER1); 675 parent->map_token = NULL; 676 } 677 678 cur = btrfs_find_tree_block(root, blocknr, blocksize); 679 if (cur) 680 uptodate = btrfs_buffer_uptodate(cur, gen); 681 else 682 uptodate = 0; 683 if (!cur || !uptodate) { 684 if (cache_only) { 685 free_extent_buffer(cur); 686 continue; 687 } 688 if (!cur) { 689 cur = read_tree_block(root, blocknr, 690 blocksize, gen); 691 } else if (!uptodate) { 692 btrfs_read_buffer(cur, gen); 693 } 694 } 695 if (search_start == 0) 696 search_start = last_block; 697 698 btrfs_tree_lock(cur); 699 btrfs_set_lock_blocking(cur); 700 err = __btrfs_cow_block(trans, root, cur, parent, i, 701 &cur, search_start, 702 min(16 * blocksize, 703 (end_slot - i) * blocksize)); 704 if (err) { 705 btrfs_tree_unlock(cur); 706 free_extent_buffer(cur); 707 break; 708 } 709 search_start = cur->start; 710 last_block = cur->start; 711 *last_ret = search_start; 712 btrfs_tree_unlock(cur); 713 free_extent_buffer(cur); 714 } 715 if (parent->map_token) { 716 unmap_extent_buffer(parent, parent->map_token, 717 KM_USER1); 718 parent->map_token = NULL; 719 } 720 return err; 721 } 722 723 /* 724 * The leaf data grows from end-to-front in the node. 725 * this returns the address of the start of the last item, 726 * which is the stop of the leaf data stack 727 */ 728 static inline unsigned int leaf_data_end(struct btrfs_root *root, 729 struct extent_buffer *leaf) 730 { 731 u32 nr = btrfs_header_nritems(leaf); 732 if (nr == 0) 733 return BTRFS_LEAF_DATA_SIZE(root); 734 return btrfs_item_offset_nr(leaf, nr - 1); 735 } 736 737 /* 738 * extra debugging checks to make sure all the items in a key are 739 * well formed and in the proper order 740 */ 741 static int check_node(struct btrfs_root *root, struct btrfs_path *path, 742 int level) 743 { 744 struct extent_buffer *parent = NULL; 745 struct extent_buffer *node = path->nodes[level]; 746 struct btrfs_disk_key parent_key; 747 struct btrfs_disk_key node_key; 748 int parent_slot; 749 int slot; 750 struct btrfs_key cpukey; 751 u32 nritems = btrfs_header_nritems(node); 752 753 if (path->nodes[level + 1]) 754 parent = path->nodes[level + 1]; 755 756 slot = path->slots[level]; 757 BUG_ON(nritems == 0); 758 if (parent) { 759 parent_slot = path->slots[level + 1]; 760 btrfs_node_key(parent, &parent_key, parent_slot); 761 btrfs_node_key(node, &node_key, 0); 762 BUG_ON(memcmp(&parent_key, &node_key, 763 sizeof(struct btrfs_disk_key))); 764 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 765 btrfs_header_bytenr(node)); 766 } 767 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 768 if (slot != 0) { 769 btrfs_node_key_to_cpu(node, &cpukey, slot - 1); 770 btrfs_node_key(node, &node_key, slot); 771 BUG_ON(comp_keys(&node_key, &cpukey) <= 0); 772 } 773 if (slot < nritems - 1) { 774 btrfs_node_key_to_cpu(node, &cpukey, slot + 1); 775 btrfs_node_key(node, &node_key, slot); 776 BUG_ON(comp_keys(&node_key, &cpukey) >= 0); 777 } 778 return 0; 779 } 780 781 /* 782 * extra checking to make sure all the items in a leaf are 783 * well formed and in the proper order 784 */ 785 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 786 int level) 787 { 788 struct extent_buffer *leaf = path->nodes[level]; 789 struct extent_buffer *parent = NULL; 790 int parent_slot; 791 struct btrfs_key cpukey; 792 struct btrfs_disk_key parent_key; 793 struct btrfs_disk_key leaf_key; 794 int slot = path->slots[0]; 795 796 u32 nritems = btrfs_header_nritems(leaf); 797 798 if (path->nodes[level + 1]) 799 parent = path->nodes[level + 1]; 800 801 if (nritems == 0) 802 return 0; 803 804 if (parent) { 805 parent_slot = path->slots[level + 1]; 806 btrfs_node_key(parent, &parent_key, parent_slot); 807 btrfs_item_key(leaf, &leaf_key, 0); 808 809 BUG_ON(memcmp(&parent_key, &leaf_key, 810 sizeof(struct btrfs_disk_key))); 811 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 812 btrfs_header_bytenr(leaf)); 813 } 814 if (slot != 0 && slot < nritems - 1) { 815 btrfs_item_key(leaf, &leaf_key, slot); 816 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); 817 if (comp_keys(&leaf_key, &cpukey) <= 0) { 818 btrfs_print_leaf(root, leaf); 819 printk(KERN_CRIT "slot %d offset bad key\n", slot); 820 BUG_ON(1); 821 } 822 if (btrfs_item_offset_nr(leaf, slot - 1) != 823 btrfs_item_end_nr(leaf, slot)) { 824 btrfs_print_leaf(root, leaf); 825 printk(KERN_CRIT "slot %d offset bad\n", slot); 826 BUG_ON(1); 827 } 828 } 829 if (slot < nritems - 1) { 830 btrfs_item_key(leaf, &leaf_key, slot); 831 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); 832 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); 833 if (btrfs_item_offset_nr(leaf, slot) != 834 btrfs_item_end_nr(leaf, slot + 1)) { 835 btrfs_print_leaf(root, leaf); 836 printk(KERN_CRIT "slot %d offset bad\n", slot); 837 BUG_ON(1); 838 } 839 } 840 BUG_ON(btrfs_item_offset_nr(leaf, 0) + 841 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); 842 return 0; 843 } 844 845 static noinline int check_block(struct btrfs_root *root, 846 struct btrfs_path *path, int level) 847 { 848 return 0; 849 if (level == 0) 850 return check_leaf(root, path, level); 851 return check_node(root, path, level); 852 } 853 854 /* 855 * search for key in the extent_buffer. The items start at offset p, 856 * and they are item_size apart. There are 'max' items in p. 857 * 858 * the slot in the array is returned via slot, and it points to 859 * the place where you would insert key if it is not found in 860 * the array. 861 * 862 * slot may point to max if the key is bigger than all of the keys 863 */ 864 static noinline int generic_bin_search(struct extent_buffer *eb, 865 unsigned long p, 866 int item_size, struct btrfs_key *key, 867 int max, int *slot) 868 { 869 int low = 0; 870 int high = max; 871 int mid; 872 int ret; 873 struct btrfs_disk_key *tmp = NULL; 874 struct btrfs_disk_key unaligned; 875 unsigned long offset; 876 char *map_token = NULL; 877 char *kaddr = NULL; 878 unsigned long map_start = 0; 879 unsigned long map_len = 0; 880 int err; 881 882 while (low < high) { 883 mid = (low + high) / 2; 884 offset = p + mid * item_size; 885 886 if (!map_token || offset < map_start || 887 (offset + sizeof(struct btrfs_disk_key)) > 888 map_start + map_len) { 889 if (map_token) { 890 unmap_extent_buffer(eb, map_token, KM_USER0); 891 map_token = NULL; 892 } 893 894 err = map_private_extent_buffer(eb, offset, 895 sizeof(struct btrfs_disk_key), 896 &map_token, &kaddr, 897 &map_start, &map_len, KM_USER0); 898 899 if (!err) { 900 tmp = (struct btrfs_disk_key *)(kaddr + offset - 901 map_start); 902 } else { 903 read_extent_buffer(eb, &unaligned, 904 offset, sizeof(unaligned)); 905 tmp = &unaligned; 906 } 907 908 } else { 909 tmp = (struct btrfs_disk_key *)(kaddr + offset - 910 map_start); 911 } 912 ret = comp_keys(tmp, key); 913 914 if (ret < 0) 915 low = mid + 1; 916 else if (ret > 0) 917 high = mid; 918 else { 919 *slot = mid; 920 if (map_token) 921 unmap_extent_buffer(eb, map_token, KM_USER0); 922 return 0; 923 } 924 } 925 *slot = low; 926 if (map_token) 927 unmap_extent_buffer(eb, map_token, KM_USER0); 928 return 1; 929 } 930 931 /* 932 * simple bin_search frontend that does the right thing for 933 * leaves vs nodes 934 */ 935 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 936 int level, int *slot) 937 { 938 if (level == 0) { 939 return generic_bin_search(eb, 940 offsetof(struct btrfs_leaf, items), 941 sizeof(struct btrfs_item), 942 key, btrfs_header_nritems(eb), 943 slot); 944 } else { 945 return generic_bin_search(eb, 946 offsetof(struct btrfs_node, ptrs), 947 sizeof(struct btrfs_key_ptr), 948 key, btrfs_header_nritems(eb), 949 slot); 950 } 951 return -1; 952 } 953 954 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 955 int level, int *slot) 956 { 957 return bin_search(eb, key, level, slot); 958 } 959 960 /* given a node and slot number, this reads the blocks it points to. The 961 * extent buffer is returned with a reference taken (but unlocked). 962 * NULL is returned on error. 963 */ 964 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 965 struct extent_buffer *parent, int slot) 966 { 967 int level = btrfs_header_level(parent); 968 if (slot < 0) 969 return NULL; 970 if (slot >= btrfs_header_nritems(parent)) 971 return NULL; 972 973 BUG_ON(level == 0); 974 975 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 976 btrfs_level_size(root, level - 1), 977 btrfs_node_ptr_generation(parent, slot)); 978 } 979 980 /* 981 * node level balancing, used to make sure nodes are in proper order for 982 * item deletion. We balance from the top down, so we have to make sure 983 * that a deletion won't leave an node completely empty later on. 984 */ 985 static noinline int balance_level(struct btrfs_trans_handle *trans, 986 struct btrfs_root *root, 987 struct btrfs_path *path, int level) 988 { 989 struct extent_buffer *right = NULL; 990 struct extent_buffer *mid; 991 struct extent_buffer *left = NULL; 992 struct extent_buffer *parent = NULL; 993 int ret = 0; 994 int wret; 995 int pslot; 996 int orig_slot = path->slots[level]; 997 int err_on_enospc = 0; 998 u64 orig_ptr; 999 1000 if (level == 0) 1001 return 0; 1002 1003 mid = path->nodes[level]; 1004 1005 WARN_ON(!path->locks[level]); 1006 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1007 1008 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1009 1010 if (level < BTRFS_MAX_LEVEL - 1) 1011 parent = path->nodes[level + 1]; 1012 pslot = path->slots[level + 1]; 1013 1014 /* 1015 * deal with the case where there is only one pointer in the root 1016 * by promoting the node below to a root 1017 */ 1018 if (!parent) { 1019 struct extent_buffer *child; 1020 1021 if (btrfs_header_nritems(mid) != 1) 1022 return 0; 1023 1024 /* promote the child to a root */ 1025 child = read_node_slot(root, mid, 0); 1026 BUG_ON(!child); 1027 btrfs_tree_lock(child); 1028 btrfs_set_lock_blocking(child); 1029 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 1030 BUG_ON(ret); 1031 1032 spin_lock(&root->node_lock); 1033 root->node = child; 1034 spin_unlock(&root->node_lock); 1035 1036 add_root_to_dirty_list(root); 1037 btrfs_tree_unlock(child); 1038 1039 path->locks[level] = 0; 1040 path->nodes[level] = NULL; 1041 clean_tree_block(trans, root, mid); 1042 btrfs_tree_unlock(mid); 1043 /* once for the path */ 1044 free_extent_buffer(mid); 1045 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1046 0, root->root_key.objectid, level, 1); 1047 /* once for the root ptr */ 1048 free_extent_buffer(mid); 1049 return ret; 1050 } 1051 if (btrfs_header_nritems(mid) > 1052 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1053 return 0; 1054 1055 if (btrfs_header_nritems(mid) > 2) 1056 return 0; 1057 1058 if (btrfs_header_nritems(mid) < 2) 1059 err_on_enospc = 1; 1060 1061 left = read_node_slot(root, parent, pslot - 1); 1062 if (left) { 1063 btrfs_tree_lock(left); 1064 btrfs_set_lock_blocking(left); 1065 wret = btrfs_cow_block(trans, root, left, 1066 parent, pslot - 1, &left); 1067 if (wret) { 1068 ret = wret; 1069 goto enospc; 1070 } 1071 } 1072 right = read_node_slot(root, parent, pslot + 1); 1073 if (right) { 1074 btrfs_tree_lock(right); 1075 btrfs_set_lock_blocking(right); 1076 wret = btrfs_cow_block(trans, root, right, 1077 parent, pslot + 1, &right); 1078 if (wret) { 1079 ret = wret; 1080 goto enospc; 1081 } 1082 } 1083 1084 /* first, try to make some room in the middle buffer */ 1085 if (left) { 1086 orig_slot += btrfs_header_nritems(left); 1087 wret = push_node_left(trans, root, left, mid, 1); 1088 if (wret < 0) 1089 ret = wret; 1090 if (btrfs_header_nritems(mid) < 2) 1091 err_on_enospc = 1; 1092 } 1093 1094 /* 1095 * then try to empty the right most buffer into the middle 1096 */ 1097 if (right) { 1098 wret = push_node_left(trans, root, mid, right, 1); 1099 if (wret < 0 && wret != -ENOSPC) 1100 ret = wret; 1101 if (btrfs_header_nritems(right) == 0) { 1102 u64 bytenr = right->start; 1103 u32 blocksize = right->len; 1104 1105 clean_tree_block(trans, root, right); 1106 btrfs_tree_unlock(right); 1107 free_extent_buffer(right); 1108 right = NULL; 1109 wret = del_ptr(trans, root, path, level + 1, pslot + 1110 1); 1111 if (wret) 1112 ret = wret; 1113 wret = btrfs_free_extent(trans, root, bytenr, 1114 blocksize, 0, 1115 root->root_key.objectid, 1116 level, 0); 1117 if (wret) 1118 ret = wret; 1119 } else { 1120 struct btrfs_disk_key right_key; 1121 btrfs_node_key(right, &right_key, 0); 1122 btrfs_set_node_key(parent, &right_key, pslot + 1); 1123 btrfs_mark_buffer_dirty(parent); 1124 } 1125 } 1126 if (btrfs_header_nritems(mid) == 1) { 1127 /* 1128 * we're not allowed to leave a node with one item in the 1129 * tree during a delete. A deletion from lower in the tree 1130 * could try to delete the only pointer in this node. 1131 * So, pull some keys from the left. 1132 * There has to be a left pointer at this point because 1133 * otherwise we would have pulled some pointers from the 1134 * right 1135 */ 1136 BUG_ON(!left); 1137 wret = balance_node_right(trans, root, mid, left); 1138 if (wret < 0) { 1139 ret = wret; 1140 goto enospc; 1141 } 1142 if (wret == 1) { 1143 wret = push_node_left(trans, root, left, mid, 1); 1144 if (wret < 0) 1145 ret = wret; 1146 } 1147 BUG_ON(wret == 1); 1148 } 1149 if (btrfs_header_nritems(mid) == 0) { 1150 /* we've managed to empty the middle node, drop it */ 1151 u64 bytenr = mid->start; 1152 u32 blocksize = mid->len; 1153 1154 clean_tree_block(trans, root, mid); 1155 btrfs_tree_unlock(mid); 1156 free_extent_buffer(mid); 1157 mid = NULL; 1158 wret = del_ptr(trans, root, path, level + 1, pslot); 1159 if (wret) 1160 ret = wret; 1161 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1162 0, root->root_key.objectid, 1163 level, 0); 1164 if (wret) 1165 ret = wret; 1166 } else { 1167 /* update the parent key to reflect our changes */ 1168 struct btrfs_disk_key mid_key; 1169 btrfs_node_key(mid, &mid_key, 0); 1170 btrfs_set_node_key(parent, &mid_key, pslot); 1171 btrfs_mark_buffer_dirty(parent); 1172 } 1173 1174 /* update the path */ 1175 if (left) { 1176 if (btrfs_header_nritems(left) > orig_slot) { 1177 extent_buffer_get(left); 1178 /* left was locked after cow */ 1179 path->nodes[level] = left; 1180 path->slots[level + 1] -= 1; 1181 path->slots[level] = orig_slot; 1182 if (mid) { 1183 btrfs_tree_unlock(mid); 1184 free_extent_buffer(mid); 1185 } 1186 } else { 1187 orig_slot -= btrfs_header_nritems(left); 1188 path->slots[level] = orig_slot; 1189 } 1190 } 1191 /* double check we haven't messed things up */ 1192 check_block(root, path, level); 1193 if (orig_ptr != 1194 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1195 BUG(); 1196 enospc: 1197 if (right) { 1198 btrfs_tree_unlock(right); 1199 free_extent_buffer(right); 1200 } 1201 if (left) { 1202 if (path->nodes[level] != left) 1203 btrfs_tree_unlock(left); 1204 free_extent_buffer(left); 1205 } 1206 return ret; 1207 } 1208 1209 /* Node balancing for insertion. Here we only split or push nodes around 1210 * when they are completely full. This is also done top down, so we 1211 * have to be pessimistic. 1212 */ 1213 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1214 struct btrfs_root *root, 1215 struct btrfs_path *path, int level) 1216 { 1217 struct extent_buffer *right = NULL; 1218 struct extent_buffer *mid; 1219 struct extent_buffer *left = NULL; 1220 struct extent_buffer *parent = NULL; 1221 int ret = 0; 1222 int wret; 1223 int pslot; 1224 int orig_slot = path->slots[level]; 1225 u64 orig_ptr; 1226 1227 if (level == 0) 1228 return 1; 1229 1230 mid = path->nodes[level]; 1231 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1232 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1233 1234 if (level < BTRFS_MAX_LEVEL - 1) 1235 parent = path->nodes[level + 1]; 1236 pslot = path->slots[level + 1]; 1237 1238 if (!parent) 1239 return 1; 1240 1241 left = read_node_slot(root, parent, pslot - 1); 1242 1243 /* first, try to make some room in the middle buffer */ 1244 if (left) { 1245 u32 left_nr; 1246 1247 btrfs_tree_lock(left); 1248 btrfs_set_lock_blocking(left); 1249 1250 left_nr = btrfs_header_nritems(left); 1251 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1252 wret = 1; 1253 } else { 1254 ret = btrfs_cow_block(trans, root, left, parent, 1255 pslot - 1, &left); 1256 if (ret) 1257 wret = 1; 1258 else { 1259 wret = push_node_left(trans, root, 1260 left, mid, 0); 1261 } 1262 } 1263 if (wret < 0) 1264 ret = wret; 1265 if (wret == 0) { 1266 struct btrfs_disk_key disk_key; 1267 orig_slot += left_nr; 1268 btrfs_node_key(mid, &disk_key, 0); 1269 btrfs_set_node_key(parent, &disk_key, pslot); 1270 btrfs_mark_buffer_dirty(parent); 1271 if (btrfs_header_nritems(left) > orig_slot) { 1272 path->nodes[level] = left; 1273 path->slots[level + 1] -= 1; 1274 path->slots[level] = orig_slot; 1275 btrfs_tree_unlock(mid); 1276 free_extent_buffer(mid); 1277 } else { 1278 orig_slot -= 1279 btrfs_header_nritems(left); 1280 path->slots[level] = orig_slot; 1281 btrfs_tree_unlock(left); 1282 free_extent_buffer(left); 1283 } 1284 return 0; 1285 } 1286 btrfs_tree_unlock(left); 1287 free_extent_buffer(left); 1288 } 1289 right = read_node_slot(root, parent, pslot + 1); 1290 1291 /* 1292 * then try to empty the right most buffer into the middle 1293 */ 1294 if (right) { 1295 u32 right_nr; 1296 1297 btrfs_tree_lock(right); 1298 btrfs_set_lock_blocking(right); 1299 1300 right_nr = btrfs_header_nritems(right); 1301 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1302 wret = 1; 1303 } else { 1304 ret = btrfs_cow_block(trans, root, right, 1305 parent, pslot + 1, 1306 &right); 1307 if (ret) 1308 wret = 1; 1309 else { 1310 wret = balance_node_right(trans, root, 1311 right, mid); 1312 } 1313 } 1314 if (wret < 0) 1315 ret = wret; 1316 if (wret == 0) { 1317 struct btrfs_disk_key disk_key; 1318 1319 btrfs_node_key(right, &disk_key, 0); 1320 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1321 btrfs_mark_buffer_dirty(parent); 1322 1323 if (btrfs_header_nritems(mid) <= orig_slot) { 1324 path->nodes[level] = right; 1325 path->slots[level + 1] += 1; 1326 path->slots[level] = orig_slot - 1327 btrfs_header_nritems(mid); 1328 btrfs_tree_unlock(mid); 1329 free_extent_buffer(mid); 1330 } else { 1331 btrfs_tree_unlock(right); 1332 free_extent_buffer(right); 1333 } 1334 return 0; 1335 } 1336 btrfs_tree_unlock(right); 1337 free_extent_buffer(right); 1338 } 1339 return 1; 1340 } 1341 1342 /* 1343 * readahead one full node of leaves, finding things that are close 1344 * to the block in 'slot', and triggering ra on them. 1345 */ 1346 static void reada_for_search(struct btrfs_root *root, 1347 struct btrfs_path *path, 1348 int level, int slot, u64 objectid) 1349 { 1350 struct extent_buffer *node; 1351 struct btrfs_disk_key disk_key; 1352 u32 nritems; 1353 u64 search; 1354 u64 target; 1355 u64 nread = 0; 1356 int direction = path->reada; 1357 struct extent_buffer *eb; 1358 u32 nr; 1359 u32 blocksize; 1360 u32 nscan = 0; 1361 1362 if (level != 1) 1363 return; 1364 1365 if (!path->nodes[level]) 1366 return; 1367 1368 node = path->nodes[level]; 1369 1370 search = btrfs_node_blockptr(node, slot); 1371 blocksize = btrfs_level_size(root, level - 1); 1372 eb = btrfs_find_tree_block(root, search, blocksize); 1373 if (eb) { 1374 free_extent_buffer(eb); 1375 return; 1376 } 1377 1378 target = search; 1379 1380 nritems = btrfs_header_nritems(node); 1381 nr = slot; 1382 while (1) { 1383 if (direction < 0) { 1384 if (nr == 0) 1385 break; 1386 nr--; 1387 } else if (direction > 0) { 1388 nr++; 1389 if (nr >= nritems) 1390 break; 1391 } 1392 if (path->reada < 0 && objectid) { 1393 btrfs_node_key(node, &disk_key, nr); 1394 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1395 break; 1396 } 1397 search = btrfs_node_blockptr(node, nr); 1398 if ((search <= target && target - search <= 65536) || 1399 (search > target && search - target <= 65536)) { 1400 readahead_tree_block(root, search, blocksize, 1401 btrfs_node_ptr_generation(node, nr)); 1402 nread += blocksize; 1403 } 1404 nscan++; 1405 if ((nread > 65536 || nscan > 32)) 1406 break; 1407 } 1408 } 1409 1410 /* 1411 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1412 * cache 1413 */ 1414 static noinline int reada_for_balance(struct btrfs_root *root, 1415 struct btrfs_path *path, int level) 1416 { 1417 int slot; 1418 int nritems; 1419 struct extent_buffer *parent; 1420 struct extent_buffer *eb; 1421 u64 gen; 1422 u64 block1 = 0; 1423 u64 block2 = 0; 1424 int ret = 0; 1425 int blocksize; 1426 1427 parent = path->nodes[level + 1]; 1428 if (!parent) 1429 return 0; 1430 1431 nritems = btrfs_header_nritems(parent); 1432 slot = path->slots[level + 1]; 1433 blocksize = btrfs_level_size(root, level); 1434 1435 if (slot > 0) { 1436 block1 = btrfs_node_blockptr(parent, slot - 1); 1437 gen = btrfs_node_ptr_generation(parent, slot - 1); 1438 eb = btrfs_find_tree_block(root, block1, blocksize); 1439 if (eb && btrfs_buffer_uptodate(eb, gen)) 1440 block1 = 0; 1441 free_extent_buffer(eb); 1442 } 1443 if (slot + 1 < nritems) { 1444 block2 = btrfs_node_blockptr(parent, slot + 1); 1445 gen = btrfs_node_ptr_generation(parent, slot + 1); 1446 eb = btrfs_find_tree_block(root, block2, blocksize); 1447 if (eb && btrfs_buffer_uptodate(eb, gen)) 1448 block2 = 0; 1449 free_extent_buffer(eb); 1450 } 1451 if (block1 || block2) { 1452 ret = -EAGAIN; 1453 1454 /* release the whole path */ 1455 btrfs_release_path(root, path); 1456 1457 /* read the blocks */ 1458 if (block1) 1459 readahead_tree_block(root, block1, blocksize, 0); 1460 if (block2) 1461 readahead_tree_block(root, block2, blocksize, 0); 1462 1463 if (block1) { 1464 eb = read_tree_block(root, block1, blocksize, 0); 1465 free_extent_buffer(eb); 1466 } 1467 if (block2) { 1468 eb = read_tree_block(root, block2, blocksize, 0); 1469 free_extent_buffer(eb); 1470 } 1471 } 1472 return ret; 1473 } 1474 1475 1476 /* 1477 * when we walk down the tree, it is usually safe to unlock the higher layers 1478 * in the tree. The exceptions are when our path goes through slot 0, because 1479 * operations on the tree might require changing key pointers higher up in the 1480 * tree. 1481 * 1482 * callers might also have set path->keep_locks, which tells this code to keep 1483 * the lock if the path points to the last slot in the block. This is part of 1484 * walking through the tree, and selecting the next slot in the higher block. 1485 * 1486 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1487 * if lowest_unlock is 1, level 0 won't be unlocked 1488 */ 1489 static noinline void unlock_up(struct btrfs_path *path, int level, 1490 int lowest_unlock) 1491 { 1492 int i; 1493 int skip_level = level; 1494 int no_skips = 0; 1495 struct extent_buffer *t; 1496 1497 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1498 if (!path->nodes[i]) 1499 break; 1500 if (!path->locks[i]) 1501 break; 1502 if (!no_skips && path->slots[i] == 0) { 1503 skip_level = i + 1; 1504 continue; 1505 } 1506 if (!no_skips && path->keep_locks) { 1507 u32 nritems; 1508 t = path->nodes[i]; 1509 nritems = btrfs_header_nritems(t); 1510 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1511 skip_level = i + 1; 1512 continue; 1513 } 1514 } 1515 if (skip_level < i && i >= lowest_unlock) 1516 no_skips = 1; 1517 1518 t = path->nodes[i]; 1519 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1520 btrfs_tree_unlock(t); 1521 path->locks[i] = 0; 1522 } 1523 } 1524 } 1525 1526 /* 1527 * This releases any locks held in the path starting at level and 1528 * going all the way up to the root. 1529 * 1530 * btrfs_search_slot will keep the lock held on higher nodes in a few 1531 * corner cases, such as COW of the block at slot zero in the node. This 1532 * ignores those rules, and it should only be called when there are no 1533 * more updates to be done higher up in the tree. 1534 */ 1535 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1536 { 1537 int i; 1538 1539 if (path->keep_locks) 1540 return; 1541 1542 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1543 if (!path->nodes[i]) 1544 continue; 1545 if (!path->locks[i]) 1546 continue; 1547 btrfs_tree_unlock(path->nodes[i]); 1548 path->locks[i] = 0; 1549 } 1550 } 1551 1552 /* 1553 * helper function for btrfs_search_slot. The goal is to find a block 1554 * in cache without setting the path to blocking. If we find the block 1555 * we return zero and the path is unchanged. 1556 * 1557 * If we can't find the block, we set the path blocking and do some 1558 * reada. -EAGAIN is returned and the search must be repeated. 1559 */ 1560 static int 1561 read_block_for_search(struct btrfs_trans_handle *trans, 1562 struct btrfs_root *root, struct btrfs_path *p, 1563 struct extent_buffer **eb_ret, int level, int slot, 1564 struct btrfs_key *key) 1565 { 1566 u64 blocknr; 1567 u64 gen; 1568 u32 blocksize; 1569 struct extent_buffer *b = *eb_ret; 1570 struct extent_buffer *tmp; 1571 int ret; 1572 1573 blocknr = btrfs_node_blockptr(b, slot); 1574 gen = btrfs_node_ptr_generation(b, slot); 1575 blocksize = btrfs_level_size(root, level - 1); 1576 1577 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1578 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1579 /* 1580 * we found an up to date block without sleeping, return 1581 * right away 1582 */ 1583 *eb_ret = tmp; 1584 return 0; 1585 } 1586 1587 /* 1588 * reduce lock contention at high levels 1589 * of the btree by dropping locks before 1590 * we read. Don't release the lock on the current 1591 * level because we need to walk this node to figure 1592 * out which blocks to read. 1593 */ 1594 btrfs_unlock_up_safe(p, level + 1); 1595 btrfs_set_path_blocking(p); 1596 1597 if (tmp) 1598 free_extent_buffer(tmp); 1599 if (p->reada) 1600 reada_for_search(root, p, level, slot, key->objectid); 1601 1602 btrfs_release_path(NULL, p); 1603 1604 ret = -EAGAIN; 1605 tmp = read_tree_block(root, blocknr, blocksize, gen); 1606 if (tmp) { 1607 /* 1608 * If the read above didn't mark this buffer up to date, 1609 * it will never end up being up to date. Set ret to EIO now 1610 * and give up so that our caller doesn't loop forever 1611 * on our EAGAINs. 1612 */ 1613 if (!btrfs_buffer_uptodate(tmp, 0)) 1614 ret = -EIO; 1615 free_extent_buffer(tmp); 1616 } 1617 return ret; 1618 } 1619 1620 /* 1621 * helper function for btrfs_search_slot. This does all of the checks 1622 * for node-level blocks and does any balancing required based on 1623 * the ins_len. 1624 * 1625 * If no extra work was required, zero is returned. If we had to 1626 * drop the path, -EAGAIN is returned and btrfs_search_slot must 1627 * start over 1628 */ 1629 static int 1630 setup_nodes_for_search(struct btrfs_trans_handle *trans, 1631 struct btrfs_root *root, struct btrfs_path *p, 1632 struct extent_buffer *b, int level, int ins_len) 1633 { 1634 int ret; 1635 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1636 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1637 int sret; 1638 1639 sret = reada_for_balance(root, p, level); 1640 if (sret) 1641 goto again; 1642 1643 btrfs_set_path_blocking(p); 1644 sret = split_node(trans, root, p, level); 1645 btrfs_clear_path_blocking(p, NULL); 1646 1647 BUG_ON(sret > 0); 1648 if (sret) { 1649 ret = sret; 1650 goto done; 1651 } 1652 b = p->nodes[level]; 1653 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1654 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1655 int sret; 1656 1657 sret = reada_for_balance(root, p, level); 1658 if (sret) 1659 goto again; 1660 1661 btrfs_set_path_blocking(p); 1662 sret = balance_level(trans, root, p, level); 1663 btrfs_clear_path_blocking(p, NULL); 1664 1665 if (sret) { 1666 ret = sret; 1667 goto done; 1668 } 1669 b = p->nodes[level]; 1670 if (!b) { 1671 btrfs_release_path(NULL, p); 1672 goto again; 1673 } 1674 BUG_ON(btrfs_header_nritems(b) == 1); 1675 } 1676 return 0; 1677 1678 again: 1679 ret = -EAGAIN; 1680 done: 1681 return ret; 1682 } 1683 1684 /* 1685 * look for key in the tree. path is filled in with nodes along the way 1686 * if key is found, we return zero and you can find the item in the leaf 1687 * level of the path (level 0) 1688 * 1689 * If the key isn't found, the path points to the slot where it should 1690 * be inserted, and 1 is returned. If there are other errors during the 1691 * search a negative error number is returned. 1692 * 1693 * if ins_len > 0, nodes and leaves will be split as we walk down the 1694 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1695 * possible) 1696 */ 1697 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1698 *root, struct btrfs_key *key, struct btrfs_path *p, int 1699 ins_len, int cow) 1700 { 1701 struct extent_buffer *b; 1702 int slot; 1703 int ret; 1704 int level; 1705 int lowest_unlock = 1; 1706 u8 lowest_level = 0; 1707 1708 lowest_level = p->lowest_level; 1709 WARN_ON(lowest_level && ins_len > 0); 1710 WARN_ON(p->nodes[0] != NULL); 1711 1712 if (ins_len < 0) 1713 lowest_unlock = 2; 1714 1715 again: 1716 if (p->search_commit_root) { 1717 b = root->commit_root; 1718 extent_buffer_get(b); 1719 if (!p->skip_locking) 1720 btrfs_tree_lock(b); 1721 } else { 1722 if (p->skip_locking) 1723 b = btrfs_root_node(root); 1724 else 1725 b = btrfs_lock_root_node(root); 1726 } 1727 1728 while (b) { 1729 level = btrfs_header_level(b); 1730 1731 /* 1732 * setup the path here so we can release it under lock 1733 * contention with the cow code 1734 */ 1735 p->nodes[level] = b; 1736 if (!p->skip_locking) 1737 p->locks[level] = 1; 1738 1739 if (cow) { 1740 int wret; 1741 1742 /* 1743 * if we don't really need to cow this block 1744 * then we don't want to set the path blocking, 1745 * so we test it here 1746 */ 1747 if (!should_cow_block(trans, root, b)) 1748 goto cow_done; 1749 1750 btrfs_set_path_blocking(p); 1751 1752 wret = btrfs_cow_block(trans, root, b, 1753 p->nodes[level + 1], 1754 p->slots[level + 1], &b); 1755 if (wret) { 1756 free_extent_buffer(b); 1757 ret = wret; 1758 goto done; 1759 } 1760 } 1761 cow_done: 1762 BUG_ON(!cow && ins_len); 1763 if (level != btrfs_header_level(b)) 1764 WARN_ON(1); 1765 level = btrfs_header_level(b); 1766 1767 p->nodes[level] = b; 1768 if (!p->skip_locking) 1769 p->locks[level] = 1; 1770 1771 btrfs_clear_path_blocking(p, NULL); 1772 1773 /* 1774 * we have a lock on b and as long as we aren't changing 1775 * the tree, there is no way to for the items in b to change. 1776 * It is safe to drop the lock on our parent before we 1777 * go through the expensive btree search on b. 1778 * 1779 * If cow is true, then we might be changing slot zero, 1780 * which may require changing the parent. So, we can't 1781 * drop the lock until after we know which slot we're 1782 * operating on. 1783 */ 1784 if (!cow) 1785 btrfs_unlock_up_safe(p, level + 1); 1786 1787 ret = check_block(root, p, level); 1788 if (ret) { 1789 ret = -1; 1790 goto done; 1791 } 1792 1793 ret = bin_search(b, key, level, &slot); 1794 1795 if (level != 0) { 1796 if (ret && slot > 0) 1797 slot -= 1; 1798 p->slots[level] = slot; 1799 ret = setup_nodes_for_search(trans, root, p, b, level, 1800 ins_len); 1801 if (ret == -EAGAIN) 1802 goto again; 1803 else if (ret) 1804 goto done; 1805 b = p->nodes[level]; 1806 slot = p->slots[level]; 1807 1808 unlock_up(p, level, lowest_unlock); 1809 1810 /* this is only true while dropping a snapshot */ 1811 if (level == lowest_level) { 1812 ret = 0; 1813 goto done; 1814 } 1815 1816 ret = read_block_for_search(trans, root, p, 1817 &b, level, slot, key); 1818 if (ret == -EAGAIN) 1819 goto again; 1820 1821 if (ret == -EIO) 1822 goto done; 1823 1824 if (!p->skip_locking) { 1825 int lret; 1826 1827 btrfs_clear_path_blocking(p, NULL); 1828 lret = btrfs_try_spin_lock(b); 1829 1830 if (!lret) { 1831 btrfs_set_path_blocking(p); 1832 btrfs_tree_lock(b); 1833 btrfs_clear_path_blocking(p, b); 1834 } 1835 } 1836 } else { 1837 p->slots[level] = slot; 1838 if (ins_len > 0 && 1839 btrfs_leaf_free_space(root, b) < ins_len) { 1840 int sret; 1841 1842 btrfs_set_path_blocking(p); 1843 sret = split_leaf(trans, root, key, 1844 p, ins_len, ret == 0); 1845 btrfs_clear_path_blocking(p, NULL); 1846 1847 BUG_ON(sret > 0); 1848 if (sret) { 1849 ret = sret; 1850 goto done; 1851 } 1852 } 1853 if (!p->search_for_split) 1854 unlock_up(p, level, lowest_unlock); 1855 goto done; 1856 } 1857 } 1858 ret = 1; 1859 done: 1860 /* 1861 * we don't really know what they plan on doing with the path 1862 * from here on, so for now just mark it as blocking 1863 */ 1864 if (!p->leave_spinning) 1865 btrfs_set_path_blocking(p); 1866 if (ret < 0) 1867 btrfs_release_path(root, p); 1868 return ret; 1869 } 1870 1871 /* 1872 * adjust the pointers going up the tree, starting at level 1873 * making sure the right key of each node is points to 'key'. 1874 * This is used after shifting pointers to the left, so it stops 1875 * fixing up pointers when a given leaf/node is not in slot 0 of the 1876 * higher levels 1877 * 1878 * If this fails to write a tree block, it returns -1, but continues 1879 * fixing up the blocks in ram so the tree is consistent. 1880 */ 1881 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1882 struct btrfs_root *root, struct btrfs_path *path, 1883 struct btrfs_disk_key *key, int level) 1884 { 1885 int i; 1886 int ret = 0; 1887 struct extent_buffer *t; 1888 1889 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1890 int tslot = path->slots[i]; 1891 if (!path->nodes[i]) 1892 break; 1893 t = path->nodes[i]; 1894 btrfs_set_node_key(t, key, tslot); 1895 btrfs_mark_buffer_dirty(path->nodes[i]); 1896 if (tslot != 0) 1897 break; 1898 } 1899 return ret; 1900 } 1901 1902 /* 1903 * update item key. 1904 * 1905 * This function isn't completely safe. It's the caller's responsibility 1906 * that the new key won't break the order 1907 */ 1908 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1909 struct btrfs_root *root, struct btrfs_path *path, 1910 struct btrfs_key *new_key) 1911 { 1912 struct btrfs_disk_key disk_key; 1913 struct extent_buffer *eb; 1914 int slot; 1915 1916 eb = path->nodes[0]; 1917 slot = path->slots[0]; 1918 if (slot > 0) { 1919 btrfs_item_key(eb, &disk_key, slot - 1); 1920 if (comp_keys(&disk_key, new_key) >= 0) 1921 return -1; 1922 } 1923 if (slot < btrfs_header_nritems(eb) - 1) { 1924 btrfs_item_key(eb, &disk_key, slot + 1); 1925 if (comp_keys(&disk_key, new_key) <= 0) 1926 return -1; 1927 } 1928 1929 btrfs_cpu_key_to_disk(&disk_key, new_key); 1930 btrfs_set_item_key(eb, &disk_key, slot); 1931 btrfs_mark_buffer_dirty(eb); 1932 if (slot == 0) 1933 fixup_low_keys(trans, root, path, &disk_key, 1); 1934 return 0; 1935 } 1936 1937 /* 1938 * try to push data from one node into the next node left in the 1939 * tree. 1940 * 1941 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1942 * error, and > 0 if there was no room in the left hand block. 1943 */ 1944 static int push_node_left(struct btrfs_trans_handle *trans, 1945 struct btrfs_root *root, struct extent_buffer *dst, 1946 struct extent_buffer *src, int empty) 1947 { 1948 int push_items = 0; 1949 int src_nritems; 1950 int dst_nritems; 1951 int ret = 0; 1952 1953 src_nritems = btrfs_header_nritems(src); 1954 dst_nritems = btrfs_header_nritems(dst); 1955 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1956 WARN_ON(btrfs_header_generation(src) != trans->transid); 1957 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1958 1959 if (!empty && src_nritems <= 8) 1960 return 1; 1961 1962 if (push_items <= 0) 1963 return 1; 1964 1965 if (empty) { 1966 push_items = min(src_nritems, push_items); 1967 if (push_items < src_nritems) { 1968 /* leave at least 8 pointers in the node if 1969 * we aren't going to empty it 1970 */ 1971 if (src_nritems - push_items < 8) { 1972 if (push_items <= 8) 1973 return 1; 1974 push_items -= 8; 1975 } 1976 } 1977 } else 1978 push_items = min(src_nritems - 8, push_items); 1979 1980 copy_extent_buffer(dst, src, 1981 btrfs_node_key_ptr_offset(dst_nritems), 1982 btrfs_node_key_ptr_offset(0), 1983 push_items * sizeof(struct btrfs_key_ptr)); 1984 1985 if (push_items < src_nritems) { 1986 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1987 btrfs_node_key_ptr_offset(push_items), 1988 (src_nritems - push_items) * 1989 sizeof(struct btrfs_key_ptr)); 1990 } 1991 btrfs_set_header_nritems(src, src_nritems - push_items); 1992 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1993 btrfs_mark_buffer_dirty(src); 1994 btrfs_mark_buffer_dirty(dst); 1995 1996 return ret; 1997 } 1998 1999 /* 2000 * try to push data from one node into the next node right in the 2001 * tree. 2002 * 2003 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 2004 * error, and > 0 if there was no room in the right hand block. 2005 * 2006 * this will only push up to 1/2 the contents of the left node over 2007 */ 2008 static int balance_node_right(struct btrfs_trans_handle *trans, 2009 struct btrfs_root *root, 2010 struct extent_buffer *dst, 2011 struct extent_buffer *src) 2012 { 2013 int push_items = 0; 2014 int max_push; 2015 int src_nritems; 2016 int dst_nritems; 2017 int ret = 0; 2018 2019 WARN_ON(btrfs_header_generation(src) != trans->transid); 2020 WARN_ON(btrfs_header_generation(dst) != trans->transid); 2021 2022 src_nritems = btrfs_header_nritems(src); 2023 dst_nritems = btrfs_header_nritems(dst); 2024 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 2025 if (push_items <= 0) 2026 return 1; 2027 2028 if (src_nritems < 4) 2029 return 1; 2030 2031 max_push = src_nritems / 2 + 1; 2032 /* don't try to empty the node */ 2033 if (max_push >= src_nritems) 2034 return 1; 2035 2036 if (max_push < push_items) 2037 push_items = max_push; 2038 2039 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2040 btrfs_node_key_ptr_offset(0), 2041 (dst_nritems) * 2042 sizeof(struct btrfs_key_ptr)); 2043 2044 copy_extent_buffer(dst, src, 2045 btrfs_node_key_ptr_offset(0), 2046 btrfs_node_key_ptr_offset(src_nritems - push_items), 2047 push_items * sizeof(struct btrfs_key_ptr)); 2048 2049 btrfs_set_header_nritems(src, src_nritems - push_items); 2050 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2051 2052 btrfs_mark_buffer_dirty(src); 2053 btrfs_mark_buffer_dirty(dst); 2054 2055 return ret; 2056 } 2057 2058 /* 2059 * helper function to insert a new root level in the tree. 2060 * A new node is allocated, and a single item is inserted to 2061 * point to the existing root 2062 * 2063 * returns zero on success or < 0 on failure. 2064 */ 2065 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 2066 struct btrfs_root *root, 2067 struct btrfs_path *path, int level) 2068 { 2069 u64 lower_gen; 2070 struct extent_buffer *lower; 2071 struct extent_buffer *c; 2072 struct extent_buffer *old; 2073 struct btrfs_disk_key lower_key; 2074 2075 BUG_ON(path->nodes[level]); 2076 BUG_ON(path->nodes[level-1] != root->node); 2077 2078 lower = path->nodes[level-1]; 2079 if (level == 1) 2080 btrfs_item_key(lower, &lower_key, 0); 2081 else 2082 btrfs_node_key(lower, &lower_key, 0); 2083 2084 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2085 root->root_key.objectid, &lower_key, 2086 level, root->node->start, 0); 2087 if (IS_ERR(c)) 2088 return PTR_ERR(c); 2089 2090 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 2091 btrfs_set_header_nritems(c, 1); 2092 btrfs_set_header_level(c, level); 2093 btrfs_set_header_bytenr(c, c->start); 2094 btrfs_set_header_generation(c, trans->transid); 2095 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 2096 btrfs_set_header_owner(c, root->root_key.objectid); 2097 2098 write_extent_buffer(c, root->fs_info->fsid, 2099 (unsigned long)btrfs_header_fsid(c), 2100 BTRFS_FSID_SIZE); 2101 2102 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 2103 (unsigned long)btrfs_header_chunk_tree_uuid(c), 2104 BTRFS_UUID_SIZE); 2105 2106 btrfs_set_node_key(c, &lower_key, 0); 2107 btrfs_set_node_blockptr(c, 0, lower->start); 2108 lower_gen = btrfs_header_generation(lower); 2109 WARN_ON(lower_gen != trans->transid); 2110 2111 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2112 2113 btrfs_mark_buffer_dirty(c); 2114 2115 spin_lock(&root->node_lock); 2116 old = root->node; 2117 root->node = c; 2118 spin_unlock(&root->node_lock); 2119 2120 /* the super has an extra ref to root->node */ 2121 free_extent_buffer(old); 2122 2123 add_root_to_dirty_list(root); 2124 extent_buffer_get(c); 2125 path->nodes[level] = c; 2126 path->locks[level] = 1; 2127 path->slots[level] = 0; 2128 return 0; 2129 } 2130 2131 /* 2132 * worker function to insert a single pointer in a node. 2133 * the node should have enough room for the pointer already 2134 * 2135 * slot and level indicate where you want the key to go, and 2136 * blocknr is the block the key points to. 2137 * 2138 * returns zero on success and < 0 on any error 2139 */ 2140 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2141 *root, struct btrfs_path *path, struct btrfs_disk_key 2142 *key, u64 bytenr, int slot, int level) 2143 { 2144 struct extent_buffer *lower; 2145 int nritems; 2146 2147 BUG_ON(!path->nodes[level]); 2148 lower = path->nodes[level]; 2149 nritems = btrfs_header_nritems(lower); 2150 BUG_ON(slot > nritems); 2151 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2152 BUG(); 2153 if (slot != nritems) { 2154 memmove_extent_buffer(lower, 2155 btrfs_node_key_ptr_offset(slot + 1), 2156 btrfs_node_key_ptr_offset(slot), 2157 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2158 } 2159 btrfs_set_node_key(lower, key, slot); 2160 btrfs_set_node_blockptr(lower, slot, bytenr); 2161 WARN_ON(trans->transid == 0); 2162 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2163 btrfs_set_header_nritems(lower, nritems + 1); 2164 btrfs_mark_buffer_dirty(lower); 2165 return 0; 2166 } 2167 2168 /* 2169 * split the node at the specified level in path in two. 2170 * The path is corrected to point to the appropriate node after the split 2171 * 2172 * Before splitting this tries to make some room in the node by pushing 2173 * left and right, if either one works, it returns right away. 2174 * 2175 * returns 0 on success and < 0 on failure 2176 */ 2177 static noinline int split_node(struct btrfs_trans_handle *trans, 2178 struct btrfs_root *root, 2179 struct btrfs_path *path, int level) 2180 { 2181 struct extent_buffer *c; 2182 struct extent_buffer *split; 2183 struct btrfs_disk_key disk_key; 2184 int mid; 2185 int ret; 2186 int wret; 2187 u32 c_nritems; 2188 2189 c = path->nodes[level]; 2190 WARN_ON(btrfs_header_generation(c) != trans->transid); 2191 if (c == root->node) { 2192 /* trying to split the root, lets make a new one */ 2193 ret = insert_new_root(trans, root, path, level + 1); 2194 if (ret) 2195 return ret; 2196 } else { 2197 ret = push_nodes_for_insert(trans, root, path, level); 2198 c = path->nodes[level]; 2199 if (!ret && btrfs_header_nritems(c) < 2200 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2201 return 0; 2202 if (ret < 0) 2203 return ret; 2204 } 2205 2206 c_nritems = btrfs_header_nritems(c); 2207 mid = (c_nritems + 1) / 2; 2208 btrfs_node_key(c, &disk_key, mid); 2209 2210 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2211 root->root_key.objectid, 2212 &disk_key, level, c->start, 0); 2213 if (IS_ERR(split)) 2214 return PTR_ERR(split); 2215 2216 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2217 btrfs_set_header_level(split, btrfs_header_level(c)); 2218 btrfs_set_header_bytenr(split, split->start); 2219 btrfs_set_header_generation(split, trans->transid); 2220 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 2221 btrfs_set_header_owner(split, root->root_key.objectid); 2222 write_extent_buffer(split, root->fs_info->fsid, 2223 (unsigned long)btrfs_header_fsid(split), 2224 BTRFS_FSID_SIZE); 2225 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2226 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2227 BTRFS_UUID_SIZE); 2228 2229 2230 copy_extent_buffer(split, c, 2231 btrfs_node_key_ptr_offset(0), 2232 btrfs_node_key_ptr_offset(mid), 2233 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2234 btrfs_set_header_nritems(split, c_nritems - mid); 2235 btrfs_set_header_nritems(c, mid); 2236 ret = 0; 2237 2238 btrfs_mark_buffer_dirty(c); 2239 btrfs_mark_buffer_dirty(split); 2240 2241 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2242 path->slots[level + 1] + 1, 2243 level + 1); 2244 if (wret) 2245 ret = wret; 2246 2247 if (path->slots[level] >= mid) { 2248 path->slots[level] -= mid; 2249 btrfs_tree_unlock(c); 2250 free_extent_buffer(c); 2251 path->nodes[level] = split; 2252 path->slots[level + 1] += 1; 2253 } else { 2254 btrfs_tree_unlock(split); 2255 free_extent_buffer(split); 2256 } 2257 return ret; 2258 } 2259 2260 /* 2261 * how many bytes are required to store the items in a leaf. start 2262 * and nr indicate which items in the leaf to check. This totals up the 2263 * space used both by the item structs and the item data 2264 */ 2265 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2266 { 2267 int data_len; 2268 int nritems = btrfs_header_nritems(l); 2269 int end = min(nritems, start + nr) - 1; 2270 2271 if (!nr) 2272 return 0; 2273 data_len = btrfs_item_end_nr(l, start); 2274 data_len = data_len - btrfs_item_offset_nr(l, end); 2275 data_len += sizeof(struct btrfs_item) * nr; 2276 WARN_ON(data_len < 0); 2277 return data_len; 2278 } 2279 2280 /* 2281 * The space between the end of the leaf items and 2282 * the start of the leaf data. IOW, how much room 2283 * the leaf has left for both items and data 2284 */ 2285 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2286 struct extent_buffer *leaf) 2287 { 2288 int nritems = btrfs_header_nritems(leaf); 2289 int ret; 2290 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2291 if (ret < 0) { 2292 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2293 "used %d nritems %d\n", 2294 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2295 leaf_space_used(leaf, 0, nritems), nritems); 2296 } 2297 return ret; 2298 } 2299 2300 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2301 struct btrfs_root *root, 2302 struct btrfs_path *path, 2303 int data_size, int empty, 2304 struct extent_buffer *right, 2305 int free_space, u32 left_nritems) 2306 { 2307 struct extent_buffer *left = path->nodes[0]; 2308 struct extent_buffer *upper = path->nodes[1]; 2309 struct btrfs_disk_key disk_key; 2310 int slot; 2311 u32 i; 2312 int push_space = 0; 2313 int push_items = 0; 2314 struct btrfs_item *item; 2315 u32 nr; 2316 u32 right_nritems; 2317 u32 data_end; 2318 u32 this_item_size; 2319 2320 if (empty) 2321 nr = 0; 2322 else 2323 nr = 1; 2324 2325 if (path->slots[0] >= left_nritems) 2326 push_space += data_size; 2327 2328 slot = path->slots[1]; 2329 i = left_nritems - 1; 2330 while (i >= nr) { 2331 item = btrfs_item_nr(left, i); 2332 2333 if (!empty && push_items > 0) { 2334 if (path->slots[0] > i) 2335 break; 2336 if (path->slots[0] == i) { 2337 int space = btrfs_leaf_free_space(root, left); 2338 if (space + push_space * 2 > free_space) 2339 break; 2340 } 2341 } 2342 2343 if (path->slots[0] == i) 2344 push_space += data_size; 2345 2346 if (!left->map_token) { 2347 map_extent_buffer(left, (unsigned long)item, 2348 sizeof(struct btrfs_item), 2349 &left->map_token, &left->kaddr, 2350 &left->map_start, &left->map_len, 2351 KM_USER1); 2352 } 2353 2354 this_item_size = btrfs_item_size(left, item); 2355 if (this_item_size + sizeof(*item) + push_space > free_space) 2356 break; 2357 2358 push_items++; 2359 push_space += this_item_size + sizeof(*item); 2360 if (i == 0) 2361 break; 2362 i--; 2363 } 2364 if (left->map_token) { 2365 unmap_extent_buffer(left, left->map_token, KM_USER1); 2366 left->map_token = NULL; 2367 } 2368 2369 if (push_items == 0) 2370 goto out_unlock; 2371 2372 if (!empty && push_items == left_nritems) 2373 WARN_ON(1); 2374 2375 /* push left to right */ 2376 right_nritems = btrfs_header_nritems(right); 2377 2378 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2379 push_space -= leaf_data_end(root, left); 2380 2381 /* make room in the right data area */ 2382 data_end = leaf_data_end(root, right); 2383 memmove_extent_buffer(right, 2384 btrfs_leaf_data(right) + data_end - push_space, 2385 btrfs_leaf_data(right) + data_end, 2386 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2387 2388 /* copy from the left data area */ 2389 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2390 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2391 btrfs_leaf_data(left) + leaf_data_end(root, left), 2392 push_space); 2393 2394 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2395 btrfs_item_nr_offset(0), 2396 right_nritems * sizeof(struct btrfs_item)); 2397 2398 /* copy the items from left to right */ 2399 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2400 btrfs_item_nr_offset(left_nritems - push_items), 2401 push_items * sizeof(struct btrfs_item)); 2402 2403 /* update the item pointers */ 2404 right_nritems += push_items; 2405 btrfs_set_header_nritems(right, right_nritems); 2406 push_space = BTRFS_LEAF_DATA_SIZE(root); 2407 for (i = 0; i < right_nritems; i++) { 2408 item = btrfs_item_nr(right, i); 2409 if (!right->map_token) { 2410 map_extent_buffer(right, (unsigned long)item, 2411 sizeof(struct btrfs_item), 2412 &right->map_token, &right->kaddr, 2413 &right->map_start, &right->map_len, 2414 KM_USER1); 2415 } 2416 push_space -= btrfs_item_size(right, item); 2417 btrfs_set_item_offset(right, item, push_space); 2418 } 2419 2420 if (right->map_token) { 2421 unmap_extent_buffer(right, right->map_token, KM_USER1); 2422 right->map_token = NULL; 2423 } 2424 left_nritems -= push_items; 2425 btrfs_set_header_nritems(left, left_nritems); 2426 2427 if (left_nritems) 2428 btrfs_mark_buffer_dirty(left); 2429 btrfs_mark_buffer_dirty(right); 2430 2431 btrfs_item_key(right, &disk_key, 0); 2432 btrfs_set_node_key(upper, &disk_key, slot + 1); 2433 btrfs_mark_buffer_dirty(upper); 2434 2435 /* then fixup the leaf pointer in the path */ 2436 if (path->slots[0] >= left_nritems) { 2437 path->slots[0] -= left_nritems; 2438 if (btrfs_header_nritems(path->nodes[0]) == 0) 2439 clean_tree_block(trans, root, path->nodes[0]); 2440 btrfs_tree_unlock(path->nodes[0]); 2441 free_extent_buffer(path->nodes[0]); 2442 path->nodes[0] = right; 2443 path->slots[1] += 1; 2444 } else { 2445 btrfs_tree_unlock(right); 2446 free_extent_buffer(right); 2447 } 2448 return 0; 2449 2450 out_unlock: 2451 btrfs_tree_unlock(right); 2452 free_extent_buffer(right); 2453 return 1; 2454 } 2455 2456 /* 2457 * push some data in the path leaf to the right, trying to free up at 2458 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2459 * 2460 * returns 1 if the push failed because the other node didn't have enough 2461 * room, 0 if everything worked out and < 0 if there were major errors. 2462 */ 2463 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2464 *root, struct btrfs_path *path, int data_size, 2465 int empty) 2466 { 2467 struct extent_buffer *left = path->nodes[0]; 2468 struct extent_buffer *right; 2469 struct extent_buffer *upper; 2470 int slot; 2471 int free_space; 2472 u32 left_nritems; 2473 int ret; 2474 2475 if (!path->nodes[1]) 2476 return 1; 2477 2478 slot = path->slots[1]; 2479 upper = path->nodes[1]; 2480 if (slot >= btrfs_header_nritems(upper) - 1) 2481 return 1; 2482 2483 btrfs_assert_tree_locked(path->nodes[1]); 2484 2485 right = read_node_slot(root, upper, slot + 1); 2486 btrfs_tree_lock(right); 2487 btrfs_set_lock_blocking(right); 2488 2489 free_space = btrfs_leaf_free_space(root, right); 2490 if (free_space < data_size) 2491 goto out_unlock; 2492 2493 /* cow and double check */ 2494 ret = btrfs_cow_block(trans, root, right, upper, 2495 slot + 1, &right); 2496 if (ret) 2497 goto out_unlock; 2498 2499 free_space = btrfs_leaf_free_space(root, right); 2500 if (free_space < data_size) 2501 goto out_unlock; 2502 2503 left_nritems = btrfs_header_nritems(left); 2504 if (left_nritems == 0) 2505 goto out_unlock; 2506 2507 return __push_leaf_right(trans, root, path, data_size, empty, 2508 right, free_space, left_nritems); 2509 out_unlock: 2510 btrfs_tree_unlock(right); 2511 free_extent_buffer(right); 2512 return 1; 2513 } 2514 2515 /* 2516 * push some data in the path leaf to the left, trying to free up at 2517 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2518 */ 2519 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2520 struct btrfs_root *root, 2521 struct btrfs_path *path, int data_size, 2522 int empty, struct extent_buffer *left, 2523 int free_space, int right_nritems) 2524 { 2525 struct btrfs_disk_key disk_key; 2526 struct extent_buffer *right = path->nodes[0]; 2527 int slot; 2528 int i; 2529 int push_space = 0; 2530 int push_items = 0; 2531 struct btrfs_item *item; 2532 u32 old_left_nritems; 2533 u32 nr; 2534 int ret = 0; 2535 int wret; 2536 u32 this_item_size; 2537 u32 old_left_item_size; 2538 2539 slot = path->slots[1]; 2540 2541 if (empty) 2542 nr = right_nritems; 2543 else 2544 nr = right_nritems - 1; 2545 2546 for (i = 0; i < nr; i++) { 2547 item = btrfs_item_nr(right, i); 2548 if (!right->map_token) { 2549 map_extent_buffer(right, (unsigned long)item, 2550 sizeof(struct btrfs_item), 2551 &right->map_token, &right->kaddr, 2552 &right->map_start, &right->map_len, 2553 KM_USER1); 2554 } 2555 2556 if (!empty && push_items > 0) { 2557 if (path->slots[0] < i) 2558 break; 2559 if (path->slots[0] == i) { 2560 int space = btrfs_leaf_free_space(root, right); 2561 if (space + push_space * 2 > free_space) 2562 break; 2563 } 2564 } 2565 2566 if (path->slots[0] == i) 2567 push_space += data_size; 2568 2569 this_item_size = btrfs_item_size(right, item); 2570 if (this_item_size + sizeof(*item) + push_space > free_space) 2571 break; 2572 2573 push_items++; 2574 push_space += this_item_size + sizeof(*item); 2575 } 2576 2577 if (right->map_token) { 2578 unmap_extent_buffer(right, right->map_token, KM_USER1); 2579 right->map_token = NULL; 2580 } 2581 2582 if (push_items == 0) { 2583 ret = 1; 2584 goto out; 2585 } 2586 if (!empty && push_items == btrfs_header_nritems(right)) 2587 WARN_ON(1); 2588 2589 /* push data from right to left */ 2590 copy_extent_buffer(left, right, 2591 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2592 btrfs_item_nr_offset(0), 2593 push_items * sizeof(struct btrfs_item)); 2594 2595 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2596 btrfs_item_offset_nr(right, push_items - 1); 2597 2598 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2599 leaf_data_end(root, left) - push_space, 2600 btrfs_leaf_data(right) + 2601 btrfs_item_offset_nr(right, push_items - 1), 2602 push_space); 2603 old_left_nritems = btrfs_header_nritems(left); 2604 BUG_ON(old_left_nritems <= 0); 2605 2606 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2607 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2608 u32 ioff; 2609 2610 item = btrfs_item_nr(left, i); 2611 if (!left->map_token) { 2612 map_extent_buffer(left, (unsigned long)item, 2613 sizeof(struct btrfs_item), 2614 &left->map_token, &left->kaddr, 2615 &left->map_start, &left->map_len, 2616 KM_USER1); 2617 } 2618 2619 ioff = btrfs_item_offset(left, item); 2620 btrfs_set_item_offset(left, item, 2621 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2622 } 2623 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2624 if (left->map_token) { 2625 unmap_extent_buffer(left, left->map_token, KM_USER1); 2626 left->map_token = NULL; 2627 } 2628 2629 /* fixup right node */ 2630 if (push_items > right_nritems) { 2631 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2632 right_nritems); 2633 WARN_ON(1); 2634 } 2635 2636 if (push_items < right_nritems) { 2637 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2638 leaf_data_end(root, right); 2639 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2640 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2641 btrfs_leaf_data(right) + 2642 leaf_data_end(root, right), push_space); 2643 2644 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2645 btrfs_item_nr_offset(push_items), 2646 (btrfs_header_nritems(right) - push_items) * 2647 sizeof(struct btrfs_item)); 2648 } 2649 right_nritems -= push_items; 2650 btrfs_set_header_nritems(right, right_nritems); 2651 push_space = BTRFS_LEAF_DATA_SIZE(root); 2652 for (i = 0; i < right_nritems; i++) { 2653 item = btrfs_item_nr(right, i); 2654 2655 if (!right->map_token) { 2656 map_extent_buffer(right, (unsigned long)item, 2657 sizeof(struct btrfs_item), 2658 &right->map_token, &right->kaddr, 2659 &right->map_start, &right->map_len, 2660 KM_USER1); 2661 } 2662 2663 push_space = push_space - btrfs_item_size(right, item); 2664 btrfs_set_item_offset(right, item, push_space); 2665 } 2666 if (right->map_token) { 2667 unmap_extent_buffer(right, right->map_token, KM_USER1); 2668 right->map_token = NULL; 2669 } 2670 2671 btrfs_mark_buffer_dirty(left); 2672 if (right_nritems) 2673 btrfs_mark_buffer_dirty(right); 2674 2675 btrfs_item_key(right, &disk_key, 0); 2676 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 if (wret) 2678 ret = wret; 2679 2680 /* then fixup the leaf pointer in the path */ 2681 if (path->slots[0] < push_items) { 2682 path->slots[0] += old_left_nritems; 2683 if (btrfs_header_nritems(path->nodes[0]) == 0) 2684 clean_tree_block(trans, root, path->nodes[0]); 2685 btrfs_tree_unlock(path->nodes[0]); 2686 free_extent_buffer(path->nodes[0]); 2687 path->nodes[0] = left; 2688 path->slots[1] -= 1; 2689 } else { 2690 btrfs_tree_unlock(left); 2691 free_extent_buffer(left); 2692 path->slots[0] -= push_items; 2693 } 2694 BUG_ON(path->slots[0] < 0); 2695 return ret; 2696 out: 2697 btrfs_tree_unlock(left); 2698 free_extent_buffer(left); 2699 return ret; 2700 } 2701 2702 /* 2703 * push some data in the path leaf to the left, trying to free up at 2704 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2705 */ 2706 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2707 *root, struct btrfs_path *path, int data_size, 2708 int empty) 2709 { 2710 struct extent_buffer *right = path->nodes[0]; 2711 struct extent_buffer *left; 2712 int slot; 2713 int free_space; 2714 u32 right_nritems; 2715 int ret = 0; 2716 2717 slot = path->slots[1]; 2718 if (slot == 0) 2719 return 1; 2720 if (!path->nodes[1]) 2721 return 1; 2722 2723 right_nritems = btrfs_header_nritems(right); 2724 if (right_nritems == 0) 2725 return 1; 2726 2727 btrfs_assert_tree_locked(path->nodes[1]); 2728 2729 left = read_node_slot(root, path->nodes[1], slot - 1); 2730 btrfs_tree_lock(left); 2731 btrfs_set_lock_blocking(left); 2732 2733 free_space = btrfs_leaf_free_space(root, left); 2734 if (free_space < data_size) { 2735 ret = 1; 2736 goto out; 2737 } 2738 2739 /* cow and double check */ 2740 ret = btrfs_cow_block(trans, root, left, 2741 path->nodes[1], slot - 1, &left); 2742 if (ret) { 2743 /* we hit -ENOSPC, but it isn't fatal here */ 2744 ret = 1; 2745 goto out; 2746 } 2747 2748 free_space = btrfs_leaf_free_space(root, left); 2749 if (free_space < data_size) { 2750 ret = 1; 2751 goto out; 2752 } 2753 2754 return __push_leaf_left(trans, root, path, data_size, 2755 empty, left, free_space, right_nritems); 2756 out: 2757 btrfs_tree_unlock(left); 2758 free_extent_buffer(left); 2759 return ret; 2760 } 2761 2762 /* 2763 * split the path's leaf in two, making sure there is at least data_size 2764 * available for the resulting leaf level of the path. 2765 * 2766 * returns 0 if all went well and < 0 on failure. 2767 */ 2768 static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2769 struct btrfs_root *root, 2770 struct btrfs_path *path, 2771 struct extent_buffer *l, 2772 struct extent_buffer *right, 2773 int slot, int mid, int nritems) 2774 { 2775 int data_copy_size; 2776 int rt_data_off; 2777 int i; 2778 int ret = 0; 2779 int wret; 2780 struct btrfs_disk_key disk_key; 2781 2782 nritems = nritems - mid; 2783 btrfs_set_header_nritems(right, nritems); 2784 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2785 2786 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2787 btrfs_item_nr_offset(mid), 2788 nritems * sizeof(struct btrfs_item)); 2789 2790 copy_extent_buffer(right, l, 2791 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2792 data_copy_size, btrfs_leaf_data(l) + 2793 leaf_data_end(root, l), data_copy_size); 2794 2795 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2796 btrfs_item_end_nr(l, mid); 2797 2798 for (i = 0; i < nritems; i++) { 2799 struct btrfs_item *item = btrfs_item_nr(right, i); 2800 u32 ioff; 2801 2802 if (!right->map_token) { 2803 map_extent_buffer(right, (unsigned long)item, 2804 sizeof(struct btrfs_item), 2805 &right->map_token, &right->kaddr, 2806 &right->map_start, &right->map_len, 2807 KM_USER1); 2808 } 2809 2810 ioff = btrfs_item_offset(right, item); 2811 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2812 } 2813 2814 if (right->map_token) { 2815 unmap_extent_buffer(right, right->map_token, KM_USER1); 2816 right->map_token = NULL; 2817 } 2818 2819 btrfs_set_header_nritems(l, mid); 2820 ret = 0; 2821 btrfs_item_key(right, &disk_key, 0); 2822 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2823 path->slots[1] + 1, 1); 2824 if (wret) 2825 ret = wret; 2826 2827 btrfs_mark_buffer_dirty(right); 2828 btrfs_mark_buffer_dirty(l); 2829 BUG_ON(path->slots[0] != slot); 2830 2831 if (mid <= slot) { 2832 btrfs_tree_unlock(path->nodes[0]); 2833 free_extent_buffer(path->nodes[0]); 2834 path->nodes[0] = right; 2835 path->slots[0] -= mid; 2836 path->slots[1] += 1; 2837 } else { 2838 btrfs_tree_unlock(right); 2839 free_extent_buffer(right); 2840 } 2841 2842 BUG_ON(path->slots[0] < 0); 2843 2844 return ret; 2845 } 2846 2847 /* 2848 * split the path's leaf in two, making sure there is at least data_size 2849 * available for the resulting leaf level of the path. 2850 * 2851 * returns 0 if all went well and < 0 on failure. 2852 */ 2853 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2854 struct btrfs_root *root, 2855 struct btrfs_key *ins_key, 2856 struct btrfs_path *path, int data_size, 2857 int extend) 2858 { 2859 struct btrfs_disk_key disk_key; 2860 struct extent_buffer *l; 2861 u32 nritems; 2862 int mid; 2863 int slot; 2864 struct extent_buffer *right; 2865 int ret = 0; 2866 int wret; 2867 int split; 2868 int num_doubles = 0; 2869 2870 /* first try to make some room by pushing left and right */ 2871 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2872 wret = push_leaf_right(trans, root, path, data_size, 0); 2873 if (wret < 0) 2874 return wret; 2875 if (wret) { 2876 wret = push_leaf_left(trans, root, path, data_size, 0); 2877 if (wret < 0) 2878 return wret; 2879 } 2880 l = path->nodes[0]; 2881 2882 /* did the pushes work? */ 2883 if (btrfs_leaf_free_space(root, l) >= data_size) 2884 return 0; 2885 } 2886 2887 if (!path->nodes[1]) { 2888 ret = insert_new_root(trans, root, path, 1); 2889 if (ret) 2890 return ret; 2891 } 2892 again: 2893 split = 1; 2894 l = path->nodes[0]; 2895 slot = path->slots[0]; 2896 nritems = btrfs_header_nritems(l); 2897 mid = (nritems + 1) / 2; 2898 2899 if (mid <= slot) { 2900 if (nritems == 1 || 2901 leaf_space_used(l, mid, nritems - mid) + data_size > 2902 BTRFS_LEAF_DATA_SIZE(root)) { 2903 if (slot >= nritems) { 2904 split = 0; 2905 } else { 2906 mid = slot; 2907 if (mid != nritems && 2908 leaf_space_used(l, mid, nritems - mid) + 2909 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2910 split = 2; 2911 } 2912 } 2913 } 2914 } else { 2915 if (leaf_space_used(l, 0, mid) + data_size > 2916 BTRFS_LEAF_DATA_SIZE(root)) { 2917 if (!extend && data_size && slot == 0) { 2918 split = 0; 2919 } else if ((extend || !data_size) && slot == 0) { 2920 mid = 1; 2921 } else { 2922 mid = slot; 2923 if (mid != nritems && 2924 leaf_space_used(l, mid, nritems - mid) + 2925 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2926 split = 2 ; 2927 } 2928 } 2929 } 2930 } 2931 2932 if (split == 0) 2933 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2934 else 2935 btrfs_item_key(l, &disk_key, mid); 2936 2937 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 2938 root->root_key.objectid, 2939 &disk_key, 0, l->start, 0); 2940 if (IS_ERR(right)) { 2941 BUG_ON(1); 2942 return PTR_ERR(right); 2943 } 2944 2945 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2946 btrfs_set_header_bytenr(right, right->start); 2947 btrfs_set_header_generation(right, trans->transid); 2948 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 2949 btrfs_set_header_owner(right, root->root_key.objectid); 2950 btrfs_set_header_level(right, 0); 2951 write_extent_buffer(right, root->fs_info->fsid, 2952 (unsigned long)btrfs_header_fsid(right), 2953 BTRFS_FSID_SIZE); 2954 2955 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2956 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2957 BTRFS_UUID_SIZE); 2958 2959 if (split == 0) { 2960 if (mid <= slot) { 2961 btrfs_set_header_nritems(right, 0); 2962 wret = insert_ptr(trans, root, path, 2963 &disk_key, right->start, 2964 path->slots[1] + 1, 1); 2965 if (wret) 2966 ret = wret; 2967 2968 btrfs_tree_unlock(path->nodes[0]); 2969 free_extent_buffer(path->nodes[0]); 2970 path->nodes[0] = right; 2971 path->slots[0] = 0; 2972 path->slots[1] += 1; 2973 } else { 2974 btrfs_set_header_nritems(right, 0); 2975 wret = insert_ptr(trans, root, path, 2976 &disk_key, 2977 right->start, 2978 path->slots[1], 1); 2979 if (wret) 2980 ret = wret; 2981 btrfs_tree_unlock(path->nodes[0]); 2982 free_extent_buffer(path->nodes[0]); 2983 path->nodes[0] = right; 2984 path->slots[0] = 0; 2985 if (path->slots[1] == 0) { 2986 wret = fixup_low_keys(trans, root, 2987 path, &disk_key, 1); 2988 if (wret) 2989 ret = wret; 2990 } 2991 } 2992 btrfs_mark_buffer_dirty(right); 2993 return ret; 2994 } 2995 2996 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2997 BUG_ON(ret); 2998 2999 if (split == 2) { 3000 BUG_ON(num_doubles != 0); 3001 num_doubles++; 3002 goto again; 3003 } 3004 3005 return ret; 3006 } 3007 3008 /* 3009 * This function splits a single item into two items, 3010 * giving 'new_key' to the new item and splitting the 3011 * old one at split_offset (from the start of the item). 3012 * 3013 * The path may be released by this operation. After 3014 * the split, the path is pointing to the old item. The 3015 * new item is going to be in the same node as the old one. 3016 * 3017 * Note, the item being split must be smaller enough to live alone on 3018 * a tree block with room for one extra struct btrfs_item 3019 * 3020 * This allows us to split the item in place, keeping a lock on the 3021 * leaf the entire time. 3022 */ 3023 int btrfs_split_item(struct btrfs_trans_handle *trans, 3024 struct btrfs_root *root, 3025 struct btrfs_path *path, 3026 struct btrfs_key *new_key, 3027 unsigned long split_offset) 3028 { 3029 u32 item_size; 3030 struct extent_buffer *leaf; 3031 struct btrfs_key orig_key; 3032 struct btrfs_item *item; 3033 struct btrfs_item *new_item; 3034 int ret = 0; 3035 int slot; 3036 u32 nritems; 3037 u32 orig_offset; 3038 struct btrfs_disk_key disk_key; 3039 char *buf; 3040 3041 leaf = path->nodes[0]; 3042 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 3043 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 3044 goto split; 3045 3046 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3047 btrfs_release_path(root, path); 3048 3049 path->search_for_split = 1; 3050 path->keep_locks = 1; 3051 3052 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 3053 path->search_for_split = 0; 3054 3055 /* if our item isn't there or got smaller, return now */ 3056 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 3057 path->slots[0])) { 3058 path->keep_locks = 0; 3059 return -EAGAIN; 3060 } 3061 3062 btrfs_set_path_blocking(path); 3063 ret = split_leaf(trans, root, &orig_key, path, 3064 sizeof(struct btrfs_item), 1); 3065 path->keep_locks = 0; 3066 BUG_ON(ret); 3067 3068 btrfs_unlock_up_safe(path, 1); 3069 leaf = path->nodes[0]; 3070 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3071 3072 split: 3073 /* 3074 * make sure any changes to the path from split_leaf leave it 3075 * in a blocking state 3076 */ 3077 btrfs_set_path_blocking(path); 3078 3079 item = btrfs_item_nr(leaf, path->slots[0]); 3080 orig_offset = btrfs_item_offset(leaf, item); 3081 item_size = btrfs_item_size(leaf, item); 3082 3083 buf = kmalloc(item_size, GFP_NOFS); 3084 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3085 path->slots[0]), item_size); 3086 slot = path->slots[0] + 1; 3087 leaf = path->nodes[0]; 3088 3089 nritems = btrfs_header_nritems(leaf); 3090 3091 if (slot != nritems) { 3092 /* shift the items */ 3093 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3094 btrfs_item_nr_offset(slot), 3095 (nritems - slot) * sizeof(struct btrfs_item)); 3096 3097 } 3098 3099 btrfs_cpu_key_to_disk(&disk_key, new_key); 3100 btrfs_set_item_key(leaf, &disk_key, slot); 3101 3102 new_item = btrfs_item_nr(leaf, slot); 3103 3104 btrfs_set_item_offset(leaf, new_item, orig_offset); 3105 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3106 3107 btrfs_set_item_offset(leaf, item, 3108 orig_offset + item_size - split_offset); 3109 btrfs_set_item_size(leaf, item, split_offset); 3110 3111 btrfs_set_header_nritems(leaf, nritems + 1); 3112 3113 /* write the data for the start of the original item */ 3114 write_extent_buffer(leaf, buf, 3115 btrfs_item_ptr_offset(leaf, path->slots[0]), 3116 split_offset); 3117 3118 /* write the data for the new item */ 3119 write_extent_buffer(leaf, buf + split_offset, 3120 btrfs_item_ptr_offset(leaf, slot), 3121 item_size - split_offset); 3122 btrfs_mark_buffer_dirty(leaf); 3123 3124 ret = 0; 3125 if (btrfs_leaf_free_space(root, leaf) < 0) { 3126 btrfs_print_leaf(root, leaf); 3127 BUG(); 3128 } 3129 kfree(buf); 3130 return ret; 3131 } 3132 3133 /* 3134 * make the item pointed to by the path smaller. new_size indicates 3135 * how small to make it, and from_end tells us if we just chop bytes 3136 * off the end of the item or if we shift the item to chop bytes off 3137 * the front. 3138 */ 3139 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3140 struct btrfs_root *root, 3141 struct btrfs_path *path, 3142 u32 new_size, int from_end) 3143 { 3144 int ret = 0; 3145 int slot; 3146 int slot_orig; 3147 struct extent_buffer *leaf; 3148 struct btrfs_item *item; 3149 u32 nritems; 3150 unsigned int data_end; 3151 unsigned int old_data_start; 3152 unsigned int old_size; 3153 unsigned int size_diff; 3154 int i; 3155 3156 slot_orig = path->slots[0]; 3157 leaf = path->nodes[0]; 3158 slot = path->slots[0]; 3159 3160 old_size = btrfs_item_size_nr(leaf, slot); 3161 if (old_size == new_size) 3162 return 0; 3163 3164 nritems = btrfs_header_nritems(leaf); 3165 data_end = leaf_data_end(root, leaf); 3166 3167 old_data_start = btrfs_item_offset_nr(leaf, slot); 3168 3169 size_diff = old_size - new_size; 3170 3171 BUG_ON(slot < 0); 3172 BUG_ON(slot >= nritems); 3173 3174 /* 3175 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3176 */ 3177 /* first correct the data pointers */ 3178 for (i = slot; i < nritems; i++) { 3179 u32 ioff; 3180 item = btrfs_item_nr(leaf, i); 3181 3182 if (!leaf->map_token) { 3183 map_extent_buffer(leaf, (unsigned long)item, 3184 sizeof(struct btrfs_item), 3185 &leaf->map_token, &leaf->kaddr, 3186 &leaf->map_start, &leaf->map_len, 3187 KM_USER1); 3188 } 3189 3190 ioff = btrfs_item_offset(leaf, item); 3191 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3192 } 3193 3194 if (leaf->map_token) { 3195 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3196 leaf->map_token = NULL; 3197 } 3198 3199 /* shift the data */ 3200 if (from_end) { 3201 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3202 data_end + size_diff, btrfs_leaf_data(leaf) + 3203 data_end, old_data_start + new_size - data_end); 3204 } else { 3205 struct btrfs_disk_key disk_key; 3206 u64 offset; 3207 3208 btrfs_item_key(leaf, &disk_key, slot); 3209 3210 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3211 unsigned long ptr; 3212 struct btrfs_file_extent_item *fi; 3213 3214 fi = btrfs_item_ptr(leaf, slot, 3215 struct btrfs_file_extent_item); 3216 fi = (struct btrfs_file_extent_item *)( 3217 (unsigned long)fi - size_diff); 3218 3219 if (btrfs_file_extent_type(leaf, fi) == 3220 BTRFS_FILE_EXTENT_INLINE) { 3221 ptr = btrfs_item_ptr_offset(leaf, slot); 3222 memmove_extent_buffer(leaf, ptr, 3223 (unsigned long)fi, 3224 offsetof(struct btrfs_file_extent_item, 3225 disk_bytenr)); 3226 } 3227 } 3228 3229 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3230 data_end + size_diff, btrfs_leaf_data(leaf) + 3231 data_end, old_data_start - data_end); 3232 3233 offset = btrfs_disk_key_offset(&disk_key); 3234 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3235 btrfs_set_item_key(leaf, &disk_key, slot); 3236 if (slot == 0) 3237 fixup_low_keys(trans, root, path, &disk_key, 1); 3238 } 3239 3240 item = btrfs_item_nr(leaf, slot); 3241 btrfs_set_item_size(leaf, item, new_size); 3242 btrfs_mark_buffer_dirty(leaf); 3243 3244 ret = 0; 3245 if (btrfs_leaf_free_space(root, leaf) < 0) { 3246 btrfs_print_leaf(root, leaf); 3247 BUG(); 3248 } 3249 return ret; 3250 } 3251 3252 /* 3253 * make the item pointed to by the path bigger, data_size is the new size. 3254 */ 3255 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3256 struct btrfs_root *root, struct btrfs_path *path, 3257 u32 data_size) 3258 { 3259 int ret = 0; 3260 int slot; 3261 int slot_orig; 3262 struct extent_buffer *leaf; 3263 struct btrfs_item *item; 3264 u32 nritems; 3265 unsigned int data_end; 3266 unsigned int old_data; 3267 unsigned int old_size; 3268 int i; 3269 3270 slot_orig = path->slots[0]; 3271 leaf = path->nodes[0]; 3272 3273 nritems = btrfs_header_nritems(leaf); 3274 data_end = leaf_data_end(root, leaf); 3275 3276 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3277 btrfs_print_leaf(root, leaf); 3278 BUG(); 3279 } 3280 slot = path->slots[0]; 3281 old_data = btrfs_item_end_nr(leaf, slot); 3282 3283 BUG_ON(slot < 0); 3284 if (slot >= nritems) { 3285 btrfs_print_leaf(root, leaf); 3286 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3287 slot, nritems); 3288 BUG_ON(1); 3289 } 3290 3291 /* 3292 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3293 */ 3294 /* first correct the data pointers */ 3295 for (i = slot; i < nritems; i++) { 3296 u32 ioff; 3297 item = btrfs_item_nr(leaf, i); 3298 3299 if (!leaf->map_token) { 3300 map_extent_buffer(leaf, (unsigned long)item, 3301 sizeof(struct btrfs_item), 3302 &leaf->map_token, &leaf->kaddr, 3303 &leaf->map_start, &leaf->map_len, 3304 KM_USER1); 3305 } 3306 ioff = btrfs_item_offset(leaf, item); 3307 btrfs_set_item_offset(leaf, item, ioff - data_size); 3308 } 3309 3310 if (leaf->map_token) { 3311 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3312 leaf->map_token = NULL; 3313 } 3314 3315 /* shift the data */ 3316 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3317 data_end - data_size, btrfs_leaf_data(leaf) + 3318 data_end, old_data - data_end); 3319 3320 data_end = old_data; 3321 old_size = btrfs_item_size_nr(leaf, slot); 3322 item = btrfs_item_nr(leaf, slot); 3323 btrfs_set_item_size(leaf, item, old_size + data_size); 3324 btrfs_mark_buffer_dirty(leaf); 3325 3326 ret = 0; 3327 if (btrfs_leaf_free_space(root, leaf) < 0) { 3328 btrfs_print_leaf(root, leaf); 3329 BUG(); 3330 } 3331 return ret; 3332 } 3333 3334 /* 3335 * Given a key and some data, insert items into the tree. 3336 * This does all the path init required, making room in the tree if needed. 3337 * Returns the number of keys that were inserted. 3338 */ 3339 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3340 struct btrfs_root *root, 3341 struct btrfs_path *path, 3342 struct btrfs_key *cpu_key, u32 *data_size, 3343 int nr) 3344 { 3345 struct extent_buffer *leaf; 3346 struct btrfs_item *item; 3347 int ret = 0; 3348 int slot; 3349 int i; 3350 u32 nritems; 3351 u32 total_data = 0; 3352 u32 total_size = 0; 3353 unsigned int data_end; 3354 struct btrfs_disk_key disk_key; 3355 struct btrfs_key found_key; 3356 3357 for (i = 0; i < nr; i++) { 3358 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3359 BTRFS_LEAF_DATA_SIZE(root)) { 3360 break; 3361 nr = i; 3362 } 3363 total_data += data_size[i]; 3364 total_size += data_size[i] + sizeof(struct btrfs_item); 3365 } 3366 BUG_ON(nr == 0); 3367 3368 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3369 if (ret == 0) 3370 return -EEXIST; 3371 if (ret < 0) 3372 goto out; 3373 3374 leaf = path->nodes[0]; 3375 3376 nritems = btrfs_header_nritems(leaf); 3377 data_end = leaf_data_end(root, leaf); 3378 3379 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3380 for (i = nr; i >= 0; i--) { 3381 total_data -= data_size[i]; 3382 total_size -= data_size[i] + sizeof(struct btrfs_item); 3383 if (total_size < btrfs_leaf_free_space(root, leaf)) 3384 break; 3385 } 3386 nr = i; 3387 } 3388 3389 slot = path->slots[0]; 3390 BUG_ON(slot < 0); 3391 3392 if (slot != nritems) { 3393 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3394 3395 item = btrfs_item_nr(leaf, slot); 3396 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3397 3398 /* figure out how many keys we can insert in here */ 3399 total_data = data_size[0]; 3400 for (i = 1; i < nr; i++) { 3401 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3402 break; 3403 total_data += data_size[i]; 3404 } 3405 nr = i; 3406 3407 if (old_data < data_end) { 3408 btrfs_print_leaf(root, leaf); 3409 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3410 slot, old_data, data_end); 3411 BUG_ON(1); 3412 } 3413 /* 3414 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3415 */ 3416 /* first correct the data pointers */ 3417 WARN_ON(leaf->map_token); 3418 for (i = slot; i < nritems; i++) { 3419 u32 ioff; 3420 3421 item = btrfs_item_nr(leaf, i); 3422 if (!leaf->map_token) { 3423 map_extent_buffer(leaf, (unsigned long)item, 3424 sizeof(struct btrfs_item), 3425 &leaf->map_token, &leaf->kaddr, 3426 &leaf->map_start, &leaf->map_len, 3427 KM_USER1); 3428 } 3429 3430 ioff = btrfs_item_offset(leaf, item); 3431 btrfs_set_item_offset(leaf, item, ioff - total_data); 3432 } 3433 if (leaf->map_token) { 3434 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3435 leaf->map_token = NULL; 3436 } 3437 3438 /* shift the items */ 3439 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3440 btrfs_item_nr_offset(slot), 3441 (nritems - slot) * sizeof(struct btrfs_item)); 3442 3443 /* shift the data */ 3444 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3445 data_end - total_data, btrfs_leaf_data(leaf) + 3446 data_end, old_data - data_end); 3447 data_end = old_data; 3448 } else { 3449 /* 3450 * this sucks but it has to be done, if we are inserting at 3451 * the end of the leaf only insert 1 of the items, since we 3452 * have no way of knowing whats on the next leaf and we'd have 3453 * to drop our current locks to figure it out 3454 */ 3455 nr = 1; 3456 } 3457 3458 /* setup the item for the new data */ 3459 for (i = 0; i < nr; i++) { 3460 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3461 btrfs_set_item_key(leaf, &disk_key, slot + i); 3462 item = btrfs_item_nr(leaf, slot + i); 3463 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3464 data_end -= data_size[i]; 3465 btrfs_set_item_size(leaf, item, data_size[i]); 3466 } 3467 btrfs_set_header_nritems(leaf, nritems + nr); 3468 btrfs_mark_buffer_dirty(leaf); 3469 3470 ret = 0; 3471 if (slot == 0) { 3472 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3473 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3474 } 3475 3476 if (btrfs_leaf_free_space(root, leaf) < 0) { 3477 btrfs_print_leaf(root, leaf); 3478 BUG(); 3479 } 3480 out: 3481 if (!ret) 3482 ret = nr; 3483 return ret; 3484 } 3485 3486 /* 3487 * this is a helper for btrfs_insert_empty_items, the main goal here is 3488 * to save stack depth by doing the bulk of the work in a function 3489 * that doesn't call btrfs_search_slot 3490 */ 3491 static noinline_for_stack int 3492 setup_items_for_insert(struct btrfs_trans_handle *trans, 3493 struct btrfs_root *root, struct btrfs_path *path, 3494 struct btrfs_key *cpu_key, u32 *data_size, 3495 u32 total_data, u32 total_size, int nr) 3496 { 3497 struct btrfs_item *item; 3498 int i; 3499 u32 nritems; 3500 unsigned int data_end; 3501 struct btrfs_disk_key disk_key; 3502 int ret; 3503 struct extent_buffer *leaf; 3504 int slot; 3505 3506 leaf = path->nodes[0]; 3507 slot = path->slots[0]; 3508 3509 nritems = btrfs_header_nritems(leaf); 3510 data_end = leaf_data_end(root, leaf); 3511 3512 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3513 btrfs_print_leaf(root, leaf); 3514 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3515 total_size, btrfs_leaf_free_space(root, leaf)); 3516 BUG(); 3517 } 3518 3519 if (slot != nritems) { 3520 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3521 3522 if (old_data < data_end) { 3523 btrfs_print_leaf(root, leaf); 3524 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3525 slot, old_data, data_end); 3526 BUG_ON(1); 3527 } 3528 /* 3529 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3530 */ 3531 /* first correct the data pointers */ 3532 WARN_ON(leaf->map_token); 3533 for (i = slot; i < nritems; i++) { 3534 u32 ioff; 3535 3536 item = btrfs_item_nr(leaf, i); 3537 if (!leaf->map_token) { 3538 map_extent_buffer(leaf, (unsigned long)item, 3539 sizeof(struct btrfs_item), 3540 &leaf->map_token, &leaf->kaddr, 3541 &leaf->map_start, &leaf->map_len, 3542 KM_USER1); 3543 } 3544 3545 ioff = btrfs_item_offset(leaf, item); 3546 btrfs_set_item_offset(leaf, item, ioff - total_data); 3547 } 3548 if (leaf->map_token) { 3549 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3550 leaf->map_token = NULL; 3551 } 3552 3553 /* shift the items */ 3554 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3555 btrfs_item_nr_offset(slot), 3556 (nritems - slot) * sizeof(struct btrfs_item)); 3557 3558 /* shift the data */ 3559 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3560 data_end - total_data, btrfs_leaf_data(leaf) + 3561 data_end, old_data - data_end); 3562 data_end = old_data; 3563 } 3564 3565 /* setup the item for the new data */ 3566 for (i = 0; i < nr; i++) { 3567 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3568 btrfs_set_item_key(leaf, &disk_key, slot + i); 3569 item = btrfs_item_nr(leaf, slot + i); 3570 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3571 data_end -= data_size[i]; 3572 btrfs_set_item_size(leaf, item, data_size[i]); 3573 } 3574 3575 btrfs_set_header_nritems(leaf, nritems + nr); 3576 3577 ret = 0; 3578 if (slot == 0) { 3579 struct btrfs_disk_key disk_key; 3580 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3581 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3582 } 3583 btrfs_unlock_up_safe(path, 1); 3584 btrfs_mark_buffer_dirty(leaf); 3585 3586 if (btrfs_leaf_free_space(root, leaf) < 0) { 3587 btrfs_print_leaf(root, leaf); 3588 BUG(); 3589 } 3590 return ret; 3591 } 3592 3593 /* 3594 * Given a key and some data, insert items into the tree. 3595 * This does all the path init required, making room in the tree if needed. 3596 */ 3597 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3598 struct btrfs_root *root, 3599 struct btrfs_path *path, 3600 struct btrfs_key *cpu_key, u32 *data_size, 3601 int nr) 3602 { 3603 struct extent_buffer *leaf; 3604 int ret = 0; 3605 int slot; 3606 int i; 3607 u32 total_size = 0; 3608 u32 total_data = 0; 3609 3610 for (i = 0; i < nr; i++) 3611 total_data += data_size[i]; 3612 3613 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3614 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3615 if (ret == 0) 3616 return -EEXIST; 3617 if (ret < 0) 3618 goto out; 3619 3620 leaf = path->nodes[0]; 3621 slot = path->slots[0]; 3622 BUG_ON(slot < 0); 3623 3624 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3625 total_data, total_size, nr); 3626 3627 out: 3628 return ret; 3629 } 3630 3631 /* 3632 * Given a key and some data, insert an item into the tree. 3633 * This does all the path init required, making room in the tree if needed. 3634 */ 3635 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3636 *root, struct btrfs_key *cpu_key, void *data, u32 3637 data_size) 3638 { 3639 int ret = 0; 3640 struct btrfs_path *path; 3641 struct extent_buffer *leaf; 3642 unsigned long ptr; 3643 3644 path = btrfs_alloc_path(); 3645 BUG_ON(!path); 3646 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3647 if (!ret) { 3648 leaf = path->nodes[0]; 3649 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3650 write_extent_buffer(leaf, data, ptr, data_size); 3651 btrfs_mark_buffer_dirty(leaf); 3652 } 3653 btrfs_free_path(path); 3654 return ret; 3655 } 3656 3657 /* 3658 * delete the pointer from a given node. 3659 * 3660 * the tree should have been previously balanced so the deletion does not 3661 * empty a node. 3662 */ 3663 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3664 struct btrfs_path *path, int level, int slot) 3665 { 3666 struct extent_buffer *parent = path->nodes[level]; 3667 u32 nritems; 3668 int ret = 0; 3669 int wret; 3670 3671 nritems = btrfs_header_nritems(parent); 3672 if (slot != nritems - 1) { 3673 memmove_extent_buffer(parent, 3674 btrfs_node_key_ptr_offset(slot), 3675 btrfs_node_key_ptr_offset(slot + 1), 3676 sizeof(struct btrfs_key_ptr) * 3677 (nritems - slot - 1)); 3678 } 3679 nritems--; 3680 btrfs_set_header_nritems(parent, nritems); 3681 if (nritems == 0 && parent == root->node) { 3682 BUG_ON(btrfs_header_level(root->node) != 1); 3683 /* just turn the root into a leaf and break */ 3684 btrfs_set_header_level(root->node, 0); 3685 } else if (slot == 0) { 3686 struct btrfs_disk_key disk_key; 3687 3688 btrfs_node_key(parent, &disk_key, 0); 3689 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3690 if (wret) 3691 ret = wret; 3692 } 3693 btrfs_mark_buffer_dirty(parent); 3694 return ret; 3695 } 3696 3697 /* 3698 * a helper function to delete the leaf pointed to by path->slots[1] and 3699 * path->nodes[1]. 3700 * 3701 * This deletes the pointer in path->nodes[1] and frees the leaf 3702 * block extent. zero is returned if it all worked out, < 0 otherwise. 3703 * 3704 * The path must have already been setup for deleting the leaf, including 3705 * all the proper balancing. path->nodes[1] must be locked. 3706 */ 3707 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3708 struct btrfs_root *root, 3709 struct btrfs_path *path, 3710 struct extent_buffer *leaf) 3711 { 3712 int ret; 3713 3714 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3715 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3716 if (ret) 3717 return ret; 3718 3719 /* 3720 * btrfs_free_extent is expensive, we want to make sure we 3721 * aren't holding any locks when we call it 3722 */ 3723 btrfs_unlock_up_safe(path, 0); 3724 3725 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len, 3726 0, root->root_key.objectid, 0, 0); 3727 return ret; 3728 } 3729 /* 3730 * delete the item at the leaf level in path. If that empties 3731 * the leaf, remove it from the tree 3732 */ 3733 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3734 struct btrfs_path *path, int slot, int nr) 3735 { 3736 struct extent_buffer *leaf; 3737 struct btrfs_item *item; 3738 int last_off; 3739 int dsize = 0; 3740 int ret = 0; 3741 int wret; 3742 int i; 3743 u32 nritems; 3744 3745 leaf = path->nodes[0]; 3746 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3747 3748 for (i = 0; i < nr; i++) 3749 dsize += btrfs_item_size_nr(leaf, slot + i); 3750 3751 nritems = btrfs_header_nritems(leaf); 3752 3753 if (slot + nr != nritems) { 3754 int data_end = leaf_data_end(root, leaf); 3755 3756 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3757 data_end + dsize, 3758 btrfs_leaf_data(leaf) + data_end, 3759 last_off - data_end); 3760 3761 for (i = slot + nr; i < nritems; i++) { 3762 u32 ioff; 3763 3764 item = btrfs_item_nr(leaf, i); 3765 if (!leaf->map_token) { 3766 map_extent_buffer(leaf, (unsigned long)item, 3767 sizeof(struct btrfs_item), 3768 &leaf->map_token, &leaf->kaddr, 3769 &leaf->map_start, &leaf->map_len, 3770 KM_USER1); 3771 } 3772 ioff = btrfs_item_offset(leaf, item); 3773 btrfs_set_item_offset(leaf, item, ioff + dsize); 3774 } 3775 3776 if (leaf->map_token) { 3777 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3778 leaf->map_token = NULL; 3779 } 3780 3781 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3782 btrfs_item_nr_offset(slot + nr), 3783 sizeof(struct btrfs_item) * 3784 (nritems - slot - nr)); 3785 } 3786 btrfs_set_header_nritems(leaf, nritems - nr); 3787 nritems -= nr; 3788 3789 /* delete the leaf if we've emptied it */ 3790 if (nritems == 0) { 3791 if (leaf == root->node) { 3792 btrfs_set_header_level(leaf, 0); 3793 } else { 3794 ret = btrfs_del_leaf(trans, root, path, leaf); 3795 BUG_ON(ret); 3796 } 3797 } else { 3798 int used = leaf_space_used(leaf, 0, nritems); 3799 if (slot == 0) { 3800 struct btrfs_disk_key disk_key; 3801 3802 btrfs_item_key(leaf, &disk_key, 0); 3803 wret = fixup_low_keys(trans, root, path, 3804 &disk_key, 1); 3805 if (wret) 3806 ret = wret; 3807 } 3808 3809 /* delete the leaf if it is mostly empty */ 3810 if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { 3811 /* push_leaf_left fixes the path. 3812 * make sure the path still points to our leaf 3813 * for possible call to del_ptr below 3814 */ 3815 slot = path->slots[1]; 3816 extent_buffer_get(leaf); 3817 3818 btrfs_set_path_blocking(path); 3819 wret = push_leaf_left(trans, root, path, 1, 1); 3820 if (wret < 0 && wret != -ENOSPC) 3821 ret = wret; 3822 3823 if (path->nodes[0] == leaf && 3824 btrfs_header_nritems(leaf)) { 3825 wret = push_leaf_right(trans, root, path, 1, 1); 3826 if (wret < 0 && wret != -ENOSPC) 3827 ret = wret; 3828 } 3829 3830 if (btrfs_header_nritems(leaf) == 0) { 3831 path->slots[1] = slot; 3832 ret = btrfs_del_leaf(trans, root, path, leaf); 3833 BUG_ON(ret); 3834 free_extent_buffer(leaf); 3835 } else { 3836 /* if we're still in the path, make sure 3837 * we're dirty. Otherwise, one of the 3838 * push_leaf functions must have already 3839 * dirtied this buffer 3840 */ 3841 if (path->nodes[0] == leaf) 3842 btrfs_mark_buffer_dirty(leaf); 3843 free_extent_buffer(leaf); 3844 } 3845 } else { 3846 btrfs_mark_buffer_dirty(leaf); 3847 } 3848 } 3849 return ret; 3850 } 3851 3852 /* 3853 * search the tree again to find a leaf with lesser keys 3854 * returns 0 if it found something or 1 if there are no lesser leaves. 3855 * returns < 0 on io errors. 3856 * 3857 * This may release the path, and so you may lose any locks held at the 3858 * time you call it. 3859 */ 3860 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3861 { 3862 struct btrfs_key key; 3863 struct btrfs_disk_key found_key; 3864 int ret; 3865 3866 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 3867 3868 if (key.offset > 0) 3869 key.offset--; 3870 else if (key.type > 0) 3871 key.type--; 3872 else if (key.objectid > 0) 3873 key.objectid--; 3874 else 3875 return 1; 3876 3877 btrfs_release_path(root, path); 3878 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3879 if (ret < 0) 3880 return ret; 3881 btrfs_item_key(path->nodes[0], &found_key, 0); 3882 ret = comp_keys(&found_key, &key); 3883 if (ret < 0) 3884 return 0; 3885 return 1; 3886 } 3887 3888 /* 3889 * A helper function to walk down the tree starting at min_key, and looking 3890 * for nodes or leaves that are either in cache or have a minimum 3891 * transaction id. This is used by the btree defrag code, and tree logging 3892 * 3893 * This does not cow, but it does stuff the starting key it finds back 3894 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3895 * key and get a writable path. 3896 * 3897 * This does lock as it descends, and path->keep_locks should be set 3898 * to 1 by the caller. 3899 * 3900 * This honors path->lowest_level to prevent descent past a given level 3901 * of the tree. 3902 * 3903 * min_trans indicates the oldest transaction that you are interested 3904 * in walking through. Any nodes or leaves older than min_trans are 3905 * skipped over (without reading them). 3906 * 3907 * returns zero if something useful was found, < 0 on error and 1 if there 3908 * was nothing in the tree that matched the search criteria. 3909 */ 3910 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3911 struct btrfs_key *max_key, 3912 struct btrfs_path *path, int cache_only, 3913 u64 min_trans) 3914 { 3915 struct extent_buffer *cur; 3916 struct btrfs_key found_key; 3917 int slot; 3918 int sret; 3919 u32 nritems; 3920 int level; 3921 int ret = 1; 3922 3923 WARN_ON(!path->keep_locks); 3924 again: 3925 cur = btrfs_lock_root_node(root); 3926 level = btrfs_header_level(cur); 3927 WARN_ON(path->nodes[level]); 3928 path->nodes[level] = cur; 3929 path->locks[level] = 1; 3930 3931 if (btrfs_header_generation(cur) < min_trans) { 3932 ret = 1; 3933 goto out; 3934 } 3935 while (1) { 3936 nritems = btrfs_header_nritems(cur); 3937 level = btrfs_header_level(cur); 3938 sret = bin_search(cur, min_key, level, &slot); 3939 3940 /* at the lowest level, we're done, setup the path and exit */ 3941 if (level == path->lowest_level) { 3942 if (slot >= nritems) 3943 goto find_next_key; 3944 ret = 0; 3945 path->slots[level] = slot; 3946 btrfs_item_key_to_cpu(cur, &found_key, slot); 3947 goto out; 3948 } 3949 if (sret && slot > 0) 3950 slot--; 3951 /* 3952 * check this node pointer against the cache_only and 3953 * min_trans parameters. If it isn't in cache or is too 3954 * old, skip to the next one. 3955 */ 3956 while (slot < nritems) { 3957 u64 blockptr; 3958 u64 gen; 3959 struct extent_buffer *tmp; 3960 struct btrfs_disk_key disk_key; 3961 3962 blockptr = btrfs_node_blockptr(cur, slot); 3963 gen = btrfs_node_ptr_generation(cur, slot); 3964 if (gen < min_trans) { 3965 slot++; 3966 continue; 3967 } 3968 if (!cache_only) 3969 break; 3970 3971 if (max_key) { 3972 btrfs_node_key(cur, &disk_key, slot); 3973 if (comp_keys(&disk_key, max_key) >= 0) { 3974 ret = 1; 3975 goto out; 3976 } 3977 } 3978 3979 tmp = btrfs_find_tree_block(root, blockptr, 3980 btrfs_level_size(root, level - 1)); 3981 3982 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 3983 free_extent_buffer(tmp); 3984 break; 3985 } 3986 if (tmp) 3987 free_extent_buffer(tmp); 3988 slot++; 3989 } 3990 find_next_key: 3991 /* 3992 * we didn't find a candidate key in this node, walk forward 3993 * and find another one 3994 */ 3995 if (slot >= nritems) { 3996 path->slots[level] = slot; 3997 btrfs_set_path_blocking(path); 3998 sret = btrfs_find_next_key(root, path, min_key, level, 3999 cache_only, min_trans); 4000 if (sret == 0) { 4001 btrfs_release_path(root, path); 4002 goto again; 4003 } else { 4004 goto out; 4005 } 4006 } 4007 /* save our key for returning back */ 4008 btrfs_node_key_to_cpu(cur, &found_key, slot); 4009 path->slots[level] = slot; 4010 if (level == path->lowest_level) { 4011 ret = 0; 4012 unlock_up(path, level, 1); 4013 goto out; 4014 } 4015 btrfs_set_path_blocking(path); 4016 cur = read_node_slot(root, cur, slot); 4017 4018 btrfs_tree_lock(cur); 4019 4020 path->locks[level - 1] = 1; 4021 path->nodes[level - 1] = cur; 4022 unlock_up(path, level, 1); 4023 btrfs_clear_path_blocking(path, NULL); 4024 } 4025 out: 4026 if (ret == 0) 4027 memcpy(min_key, &found_key, sizeof(found_key)); 4028 btrfs_set_path_blocking(path); 4029 return ret; 4030 } 4031 4032 /* 4033 * this is similar to btrfs_next_leaf, but does not try to preserve 4034 * and fixup the path. It looks for and returns the next key in the 4035 * tree based on the current path and the cache_only and min_trans 4036 * parameters. 4037 * 4038 * 0 is returned if another key is found, < 0 if there are any errors 4039 * and 1 is returned if there are no higher keys in the tree 4040 * 4041 * path->keep_locks should be set to 1 on the search made before 4042 * calling this function. 4043 */ 4044 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4045 struct btrfs_key *key, int lowest_level, 4046 int cache_only, u64 min_trans) 4047 { 4048 int level = lowest_level; 4049 int slot; 4050 struct extent_buffer *c; 4051 4052 WARN_ON(!path->keep_locks); 4053 while (level < BTRFS_MAX_LEVEL) { 4054 if (!path->nodes[level]) 4055 return 1; 4056 4057 slot = path->slots[level] + 1; 4058 c = path->nodes[level]; 4059 next: 4060 if (slot >= btrfs_header_nritems(c)) { 4061 level++; 4062 if (level == BTRFS_MAX_LEVEL) 4063 return 1; 4064 continue; 4065 } 4066 if (level == 0) 4067 btrfs_item_key_to_cpu(c, key, slot); 4068 else { 4069 u64 blockptr = btrfs_node_blockptr(c, slot); 4070 u64 gen = btrfs_node_ptr_generation(c, slot); 4071 4072 if (cache_only) { 4073 struct extent_buffer *cur; 4074 cur = btrfs_find_tree_block(root, blockptr, 4075 btrfs_level_size(root, level - 1)); 4076 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 4077 slot++; 4078 if (cur) 4079 free_extent_buffer(cur); 4080 goto next; 4081 } 4082 free_extent_buffer(cur); 4083 } 4084 if (gen < min_trans) { 4085 slot++; 4086 goto next; 4087 } 4088 btrfs_node_key_to_cpu(c, key, slot); 4089 } 4090 return 0; 4091 } 4092 return 1; 4093 } 4094 4095 /* 4096 * search the tree again to find a leaf with greater keys 4097 * returns 0 if it found something or 1 if there are no greater leaves. 4098 * returns < 0 on io errors. 4099 */ 4100 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4101 { 4102 int slot; 4103 int level; 4104 struct extent_buffer *c; 4105 struct extent_buffer *next; 4106 struct btrfs_key key; 4107 u32 nritems; 4108 int ret; 4109 int old_spinning = path->leave_spinning; 4110 int force_blocking = 0; 4111 4112 nritems = btrfs_header_nritems(path->nodes[0]); 4113 if (nritems == 0) 4114 return 1; 4115 4116 /* 4117 * we take the blocks in an order that upsets lockdep. Using 4118 * blocking mode is the only way around it. 4119 */ 4120 #ifdef CONFIG_DEBUG_LOCK_ALLOC 4121 force_blocking = 1; 4122 #endif 4123 4124 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4125 again: 4126 level = 1; 4127 next = NULL; 4128 btrfs_release_path(root, path); 4129 4130 path->keep_locks = 1; 4131 4132 if (!force_blocking) 4133 path->leave_spinning = 1; 4134 4135 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4136 path->keep_locks = 0; 4137 4138 if (ret < 0) 4139 return ret; 4140 4141 nritems = btrfs_header_nritems(path->nodes[0]); 4142 /* 4143 * by releasing the path above we dropped all our locks. A balance 4144 * could have added more items next to the key that used to be 4145 * at the very end of the block. So, check again here and 4146 * advance the path if there are now more items available. 4147 */ 4148 if (nritems > 0 && path->slots[0] < nritems - 1) { 4149 path->slots[0]++; 4150 ret = 0; 4151 goto done; 4152 } 4153 4154 while (level < BTRFS_MAX_LEVEL) { 4155 if (!path->nodes[level]) { 4156 ret = 1; 4157 goto done; 4158 } 4159 4160 slot = path->slots[level] + 1; 4161 c = path->nodes[level]; 4162 if (slot >= btrfs_header_nritems(c)) { 4163 level++; 4164 if (level == BTRFS_MAX_LEVEL) { 4165 ret = 1; 4166 goto done; 4167 } 4168 continue; 4169 } 4170 4171 if (next) { 4172 btrfs_tree_unlock(next); 4173 free_extent_buffer(next); 4174 } 4175 4176 next = c; 4177 ret = read_block_for_search(NULL, root, path, &next, level, 4178 slot, &key); 4179 if (ret == -EAGAIN) 4180 goto again; 4181 4182 if (ret < 0) { 4183 btrfs_release_path(root, path); 4184 goto done; 4185 } 4186 4187 if (!path->skip_locking) { 4188 ret = btrfs_try_spin_lock(next); 4189 if (!ret) { 4190 btrfs_set_path_blocking(path); 4191 btrfs_tree_lock(next); 4192 if (!force_blocking) 4193 btrfs_clear_path_blocking(path, next); 4194 } 4195 if (force_blocking) 4196 btrfs_set_lock_blocking(next); 4197 } 4198 break; 4199 } 4200 path->slots[level] = slot; 4201 while (1) { 4202 level--; 4203 c = path->nodes[level]; 4204 if (path->locks[level]) 4205 btrfs_tree_unlock(c); 4206 4207 free_extent_buffer(c); 4208 path->nodes[level] = next; 4209 path->slots[level] = 0; 4210 if (!path->skip_locking) 4211 path->locks[level] = 1; 4212 4213 if (!level) 4214 break; 4215 4216 ret = read_block_for_search(NULL, root, path, &next, level, 4217 0, &key); 4218 if (ret == -EAGAIN) 4219 goto again; 4220 4221 if (ret < 0) { 4222 btrfs_release_path(root, path); 4223 goto done; 4224 } 4225 4226 if (!path->skip_locking) { 4227 btrfs_assert_tree_locked(path->nodes[level]); 4228 ret = btrfs_try_spin_lock(next); 4229 if (!ret) { 4230 btrfs_set_path_blocking(path); 4231 btrfs_tree_lock(next); 4232 if (!force_blocking) 4233 btrfs_clear_path_blocking(path, next); 4234 } 4235 if (force_blocking) 4236 btrfs_set_lock_blocking(next); 4237 } 4238 } 4239 ret = 0; 4240 done: 4241 unlock_up(path, 0, 1); 4242 path->leave_spinning = old_spinning; 4243 if (!old_spinning) 4244 btrfs_set_path_blocking(path); 4245 4246 return ret; 4247 } 4248 4249 /* 4250 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4251 * searching until it gets past min_objectid or finds an item of 'type' 4252 * 4253 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4254 */ 4255 int btrfs_previous_item(struct btrfs_root *root, 4256 struct btrfs_path *path, u64 min_objectid, 4257 int type) 4258 { 4259 struct btrfs_key found_key; 4260 struct extent_buffer *leaf; 4261 u32 nritems; 4262 int ret; 4263 4264 while (1) { 4265 if (path->slots[0] == 0) { 4266 btrfs_set_path_blocking(path); 4267 ret = btrfs_prev_leaf(root, path); 4268 if (ret != 0) 4269 return ret; 4270 } else { 4271 path->slots[0]--; 4272 } 4273 leaf = path->nodes[0]; 4274 nritems = btrfs_header_nritems(leaf); 4275 if (nritems == 0) 4276 return 1; 4277 if (path->slots[0] == nritems) 4278 path->slots[0]--; 4279 4280 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4281 if (found_key.type == type) 4282 return 0; 4283 if (found_key.objectid < min_objectid) 4284 break; 4285 if (found_key.objectid == min_objectid && 4286 found_key.type < type) 4287 break; 4288 } 4289 return 1; 4290 } 4291