1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "print-tree.h" 25 #include "locking.h" 26 27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 28 *root, struct btrfs_path *path, int level); 29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 30 *root, struct btrfs_key *ins_key, 31 struct btrfs_path *path, int data_size, int extend); 32 static int push_node_left(struct btrfs_trans_handle *trans, 33 struct btrfs_root *root, struct extent_buffer *dst, 34 struct extent_buffer *src, int empty); 35 static int balance_node_right(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 struct extent_buffer *dst_buf, 38 struct extent_buffer *src_buf); 39 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40 struct btrfs_path *path, int level, int slot); 41 42 struct btrfs_path *btrfs_alloc_path(void) 43 { 44 struct btrfs_path *path; 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 46 return path; 47 } 48 49 /* 50 * set all locked nodes in the path to blocking locks. This should 51 * be done before scheduling 52 */ 53 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 54 { 55 int i; 56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 57 if (!p->nodes[i] || !p->locks[i]) 58 continue; 59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); 60 if (p->locks[i] == BTRFS_READ_LOCK) 61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING; 62 else if (p->locks[i] == BTRFS_WRITE_LOCK) 63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; 64 } 65 } 66 67 /* 68 * reset all the locked nodes in the patch to spinning locks. 69 * 70 * held is used to keep lockdep happy, when lockdep is enabled 71 * we set held to a blocking lock before we go around and 72 * retake all the spinlocks in the path. You can safely use NULL 73 * for held 74 */ 75 noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 76 struct extent_buffer *held, int held_rw) 77 { 78 int i; 79 80 #ifdef CONFIG_DEBUG_LOCK_ALLOC 81 /* lockdep really cares that we take all of these spinlocks 82 * in the right order. If any of the locks in the path are not 83 * currently blocking, it is going to complain. So, make really 84 * really sure by forcing the path to blocking before we clear 85 * the path blocking. 86 */ 87 if (held) { 88 btrfs_set_lock_blocking_rw(held, held_rw); 89 if (held_rw == BTRFS_WRITE_LOCK) 90 held_rw = BTRFS_WRITE_LOCK_BLOCKING; 91 else if (held_rw == BTRFS_READ_LOCK) 92 held_rw = BTRFS_READ_LOCK_BLOCKING; 93 } 94 btrfs_set_path_blocking(p); 95 #endif 96 97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 98 if (p->nodes[i] && p->locks[i]) { 99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); 100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) 101 p->locks[i] = BTRFS_WRITE_LOCK; 102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) 103 p->locks[i] = BTRFS_READ_LOCK; 104 } 105 } 106 107 #ifdef CONFIG_DEBUG_LOCK_ALLOC 108 if (held) 109 btrfs_clear_lock_blocking_rw(held, held_rw); 110 #endif 111 } 112 113 /* this also releases the path */ 114 void btrfs_free_path(struct btrfs_path *p) 115 { 116 if (!p) 117 return; 118 btrfs_release_path(p); 119 kmem_cache_free(btrfs_path_cachep, p); 120 } 121 122 /* 123 * path release drops references on the extent buffers in the path 124 * and it drops any locks held by this path 125 * 126 * It is safe to call this on paths that no locks or extent buffers held. 127 */ 128 noinline void btrfs_release_path(struct btrfs_path *p) 129 { 130 int i; 131 132 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 133 p->slots[i] = 0; 134 if (!p->nodes[i]) 135 continue; 136 if (p->locks[i]) { 137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); 138 p->locks[i] = 0; 139 } 140 free_extent_buffer(p->nodes[i]); 141 p->nodes[i] = NULL; 142 } 143 } 144 145 /* 146 * safely gets a reference on the root node of a tree. A lock 147 * is not taken, so a concurrent writer may put a different node 148 * at the root of the tree. See btrfs_lock_root_node for the 149 * looping required. 150 * 151 * The extent buffer returned by this has a reference taken, so 152 * it won't disappear. It may stop being the root of the tree 153 * at any time because there are no locks held. 154 */ 155 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 156 { 157 struct extent_buffer *eb; 158 159 rcu_read_lock(); 160 eb = rcu_dereference(root->node); 161 extent_buffer_get(eb); 162 rcu_read_unlock(); 163 return eb; 164 } 165 166 /* loop around taking references on and locking the root node of the 167 * tree until you end up with a lock on the root. A locked buffer 168 * is returned, with a reference held. 169 */ 170 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 171 { 172 struct extent_buffer *eb; 173 174 while (1) { 175 eb = btrfs_root_node(root); 176 btrfs_tree_lock(eb); 177 if (eb == root->node) 178 break; 179 btrfs_tree_unlock(eb); 180 free_extent_buffer(eb); 181 } 182 return eb; 183 } 184 185 /* loop around taking references on and locking the root node of the 186 * tree until you end up with a lock on the root. A locked buffer 187 * is returned, with a reference held. 188 */ 189 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 190 { 191 struct extent_buffer *eb; 192 193 while (1) { 194 eb = btrfs_root_node(root); 195 btrfs_tree_read_lock(eb); 196 if (eb == root->node) 197 break; 198 btrfs_tree_read_unlock(eb); 199 free_extent_buffer(eb); 200 } 201 return eb; 202 } 203 204 /* cowonly root (everything not a reference counted cow subvolume), just get 205 * put onto a simple dirty list. transaction.c walks this to make sure they 206 * get properly updated on disk. 207 */ 208 static void add_root_to_dirty_list(struct btrfs_root *root) 209 { 210 if (root->track_dirty && list_empty(&root->dirty_list)) { 211 list_add(&root->dirty_list, 212 &root->fs_info->dirty_cowonly_roots); 213 } 214 } 215 216 /* 217 * used by snapshot creation to make a copy of a root for a tree with 218 * a given objectid. The buffer with the new root node is returned in 219 * cow_ret, and this func returns zero on success or a negative error code. 220 */ 221 int btrfs_copy_root(struct btrfs_trans_handle *trans, 222 struct btrfs_root *root, 223 struct extent_buffer *buf, 224 struct extent_buffer **cow_ret, u64 new_root_objectid) 225 { 226 struct extent_buffer *cow; 227 int ret = 0; 228 int level; 229 struct btrfs_disk_key disk_key; 230 231 WARN_ON(root->ref_cows && trans->transid != 232 root->fs_info->running_transaction->transid); 233 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 234 235 level = btrfs_header_level(buf); 236 if (level == 0) 237 btrfs_item_key(buf, &disk_key, 0); 238 else 239 btrfs_node_key(buf, &disk_key, 0); 240 241 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 242 new_root_objectid, &disk_key, level, 243 buf->start, 0); 244 if (IS_ERR(cow)) 245 return PTR_ERR(cow); 246 247 copy_extent_buffer(cow, buf, 0, 0, cow->len); 248 btrfs_set_header_bytenr(cow, cow->start); 249 btrfs_set_header_generation(cow, trans->transid); 250 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 251 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 252 BTRFS_HEADER_FLAG_RELOC); 253 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 254 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 255 else 256 btrfs_set_header_owner(cow, new_root_objectid); 257 258 write_extent_buffer(cow, root->fs_info->fsid, 259 (unsigned long)btrfs_header_fsid(cow), 260 BTRFS_FSID_SIZE); 261 262 WARN_ON(btrfs_header_generation(buf) > trans->transid); 263 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 264 ret = btrfs_inc_ref(trans, root, cow, 1); 265 else 266 ret = btrfs_inc_ref(trans, root, cow, 0); 267 268 if (ret) 269 return ret; 270 271 btrfs_mark_buffer_dirty(cow); 272 *cow_ret = cow; 273 return 0; 274 } 275 276 /* 277 * check if the tree block can be shared by multiple trees 278 */ 279 int btrfs_block_can_be_shared(struct btrfs_root *root, 280 struct extent_buffer *buf) 281 { 282 /* 283 * Tree blocks not in refernece counted trees and tree roots 284 * are never shared. If a block was allocated after the last 285 * snapshot and the block was not allocated by tree relocation, 286 * we know the block is not shared. 287 */ 288 if (root->ref_cows && 289 buf != root->node && buf != root->commit_root && 290 (btrfs_header_generation(buf) <= 291 btrfs_root_last_snapshot(&root->root_item) || 292 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 293 return 1; 294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 295 if (root->ref_cows && 296 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 297 return 1; 298 #endif 299 return 0; 300 } 301 302 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 303 struct btrfs_root *root, 304 struct extent_buffer *buf, 305 struct extent_buffer *cow, 306 int *last_ref) 307 { 308 u64 refs; 309 u64 owner; 310 u64 flags; 311 u64 new_flags = 0; 312 int ret; 313 314 /* 315 * Backrefs update rules: 316 * 317 * Always use full backrefs for extent pointers in tree block 318 * allocated by tree relocation. 319 * 320 * If a shared tree block is no longer referenced by its owner 321 * tree (btrfs_header_owner(buf) == root->root_key.objectid), 322 * use full backrefs for extent pointers in tree block. 323 * 324 * If a tree block is been relocating 325 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 326 * use full backrefs for extent pointers in tree block. 327 * The reason for this is some operations (such as drop tree) 328 * are only allowed for blocks use full backrefs. 329 */ 330 331 if (btrfs_block_can_be_shared(root, buf)) { 332 ret = btrfs_lookup_extent_info(trans, root, buf->start, 333 buf->len, &refs, &flags); 334 BUG_ON(ret); 335 BUG_ON(refs == 0); 336 } else { 337 refs = 1; 338 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 339 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 340 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 341 else 342 flags = 0; 343 } 344 345 owner = btrfs_header_owner(buf); 346 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 347 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 348 349 if (refs > 1) { 350 if ((owner == root->root_key.objectid || 351 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 352 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 353 ret = btrfs_inc_ref(trans, root, buf, 1); 354 BUG_ON(ret); 355 356 if (root->root_key.objectid == 357 BTRFS_TREE_RELOC_OBJECTID) { 358 ret = btrfs_dec_ref(trans, root, buf, 0); 359 BUG_ON(ret); 360 ret = btrfs_inc_ref(trans, root, cow, 1); 361 BUG_ON(ret); 362 } 363 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 364 } else { 365 366 if (root->root_key.objectid == 367 BTRFS_TREE_RELOC_OBJECTID) 368 ret = btrfs_inc_ref(trans, root, cow, 1); 369 else 370 ret = btrfs_inc_ref(trans, root, cow, 0); 371 BUG_ON(ret); 372 } 373 if (new_flags != 0) { 374 ret = btrfs_set_disk_extent_flags(trans, root, 375 buf->start, 376 buf->len, 377 new_flags, 0); 378 BUG_ON(ret); 379 } 380 } else { 381 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 382 if (root->root_key.objectid == 383 BTRFS_TREE_RELOC_OBJECTID) 384 ret = btrfs_inc_ref(trans, root, cow, 1); 385 else 386 ret = btrfs_inc_ref(trans, root, cow, 0); 387 BUG_ON(ret); 388 ret = btrfs_dec_ref(trans, root, buf, 1); 389 BUG_ON(ret); 390 } 391 clean_tree_block(trans, root, buf); 392 *last_ref = 1; 393 } 394 return 0; 395 } 396 397 /* 398 * does the dirty work in cow of a single block. The parent block (if 399 * supplied) is updated to point to the new cow copy. The new buffer is marked 400 * dirty and returned locked. If you modify the block it needs to be marked 401 * dirty again. 402 * 403 * search_start -- an allocation hint for the new block 404 * 405 * empty_size -- a hint that you plan on doing more cow. This is the size in 406 * bytes the allocator should try to find free next to the block it returns. 407 * This is just a hint and may be ignored by the allocator. 408 */ 409 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 410 struct btrfs_root *root, 411 struct extent_buffer *buf, 412 struct extent_buffer *parent, int parent_slot, 413 struct extent_buffer **cow_ret, 414 u64 search_start, u64 empty_size) 415 { 416 struct btrfs_disk_key disk_key; 417 struct extent_buffer *cow; 418 int level; 419 int last_ref = 0; 420 int unlock_orig = 0; 421 u64 parent_start; 422 423 if (*cow_ret == buf) 424 unlock_orig = 1; 425 426 btrfs_assert_tree_locked(buf); 427 428 WARN_ON(root->ref_cows && trans->transid != 429 root->fs_info->running_transaction->transid); 430 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 431 432 level = btrfs_header_level(buf); 433 434 if (level == 0) 435 btrfs_item_key(buf, &disk_key, 0); 436 else 437 btrfs_node_key(buf, &disk_key, 0); 438 439 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 440 if (parent) 441 parent_start = parent->start; 442 else 443 parent_start = 0; 444 } else 445 parent_start = 0; 446 447 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 448 root->root_key.objectid, &disk_key, 449 level, search_start, empty_size); 450 if (IS_ERR(cow)) 451 return PTR_ERR(cow); 452 453 /* cow is set to blocking by btrfs_init_new_buffer */ 454 455 copy_extent_buffer(cow, buf, 0, 0, cow->len); 456 btrfs_set_header_bytenr(cow, cow->start); 457 btrfs_set_header_generation(cow, trans->transid); 458 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 459 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 460 BTRFS_HEADER_FLAG_RELOC); 461 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 462 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 463 else 464 btrfs_set_header_owner(cow, root->root_key.objectid); 465 466 write_extent_buffer(cow, root->fs_info->fsid, 467 (unsigned long)btrfs_header_fsid(cow), 468 BTRFS_FSID_SIZE); 469 470 update_ref_for_cow(trans, root, buf, cow, &last_ref); 471 472 if (root->ref_cows) 473 btrfs_reloc_cow_block(trans, root, buf, cow); 474 475 if (buf == root->node) { 476 WARN_ON(parent && parent != buf); 477 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 478 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 479 parent_start = buf->start; 480 else 481 parent_start = 0; 482 483 extent_buffer_get(cow); 484 rcu_assign_pointer(root->node, cow); 485 486 btrfs_free_tree_block(trans, root, buf, parent_start, 487 last_ref); 488 free_extent_buffer(buf); 489 add_root_to_dirty_list(root); 490 } else { 491 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 492 parent_start = parent->start; 493 else 494 parent_start = 0; 495 496 WARN_ON(trans->transid != btrfs_header_generation(parent)); 497 btrfs_set_node_blockptr(parent, parent_slot, 498 cow->start); 499 btrfs_set_node_ptr_generation(parent, parent_slot, 500 trans->transid); 501 btrfs_mark_buffer_dirty(parent); 502 btrfs_free_tree_block(trans, root, buf, parent_start, 503 last_ref); 504 } 505 if (unlock_orig) 506 btrfs_tree_unlock(buf); 507 free_extent_buffer(buf); 508 btrfs_mark_buffer_dirty(cow); 509 *cow_ret = cow; 510 return 0; 511 } 512 513 static inline int should_cow_block(struct btrfs_trans_handle *trans, 514 struct btrfs_root *root, 515 struct extent_buffer *buf) 516 { 517 if (btrfs_header_generation(buf) == trans->transid && 518 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 519 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 520 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 521 return 0; 522 return 1; 523 } 524 525 /* 526 * cows a single block, see __btrfs_cow_block for the real work. 527 * This version of it has extra checks so that a block isn't cow'd more than 528 * once per transaction, as long as it hasn't been written yet 529 */ 530 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 531 struct btrfs_root *root, struct extent_buffer *buf, 532 struct extent_buffer *parent, int parent_slot, 533 struct extent_buffer **cow_ret) 534 { 535 u64 search_start; 536 int ret; 537 538 if (trans->transaction != root->fs_info->running_transaction) { 539 printk(KERN_CRIT "trans %llu running %llu\n", 540 (unsigned long long)trans->transid, 541 (unsigned long long) 542 root->fs_info->running_transaction->transid); 543 WARN_ON(1); 544 } 545 if (trans->transid != root->fs_info->generation) { 546 printk(KERN_CRIT "trans %llu running %llu\n", 547 (unsigned long long)trans->transid, 548 (unsigned long long)root->fs_info->generation); 549 WARN_ON(1); 550 } 551 552 if (!should_cow_block(trans, root, buf)) { 553 *cow_ret = buf; 554 return 0; 555 } 556 557 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 558 559 if (parent) 560 btrfs_set_lock_blocking(parent); 561 btrfs_set_lock_blocking(buf); 562 563 ret = __btrfs_cow_block(trans, root, buf, parent, 564 parent_slot, cow_ret, search_start, 0); 565 566 trace_btrfs_cow_block(root, buf, *cow_ret); 567 568 return ret; 569 } 570 571 /* 572 * helper function for defrag to decide if two blocks pointed to by a 573 * node are actually close by 574 */ 575 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 576 { 577 if (blocknr < other && other - (blocknr + blocksize) < 32768) 578 return 1; 579 if (blocknr > other && blocknr - (other + blocksize) < 32768) 580 return 1; 581 return 0; 582 } 583 584 /* 585 * compare two keys in a memcmp fashion 586 */ 587 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 588 { 589 struct btrfs_key k1; 590 591 btrfs_disk_key_to_cpu(&k1, disk); 592 593 return btrfs_comp_cpu_keys(&k1, k2); 594 } 595 596 /* 597 * same as comp_keys only with two btrfs_key's 598 */ 599 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 600 { 601 if (k1->objectid > k2->objectid) 602 return 1; 603 if (k1->objectid < k2->objectid) 604 return -1; 605 if (k1->type > k2->type) 606 return 1; 607 if (k1->type < k2->type) 608 return -1; 609 if (k1->offset > k2->offset) 610 return 1; 611 if (k1->offset < k2->offset) 612 return -1; 613 return 0; 614 } 615 616 /* 617 * this is used by the defrag code to go through all the 618 * leaves pointed to by a node and reallocate them so that 619 * disk order is close to key order 620 */ 621 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 622 struct btrfs_root *root, struct extent_buffer *parent, 623 int start_slot, int cache_only, u64 *last_ret, 624 struct btrfs_key *progress) 625 { 626 struct extent_buffer *cur; 627 u64 blocknr; 628 u64 gen; 629 u64 search_start = *last_ret; 630 u64 last_block = 0; 631 u64 other; 632 u32 parent_nritems; 633 int end_slot; 634 int i; 635 int err = 0; 636 int parent_level; 637 int uptodate; 638 u32 blocksize; 639 int progress_passed = 0; 640 struct btrfs_disk_key disk_key; 641 642 parent_level = btrfs_header_level(parent); 643 if (cache_only && parent_level != 1) 644 return 0; 645 646 if (trans->transaction != root->fs_info->running_transaction) 647 WARN_ON(1); 648 if (trans->transid != root->fs_info->generation) 649 WARN_ON(1); 650 651 parent_nritems = btrfs_header_nritems(parent); 652 blocksize = btrfs_level_size(root, parent_level - 1); 653 end_slot = parent_nritems; 654 655 if (parent_nritems == 1) 656 return 0; 657 658 btrfs_set_lock_blocking(parent); 659 660 for (i = start_slot; i < end_slot; i++) { 661 int close = 1; 662 663 btrfs_node_key(parent, &disk_key, i); 664 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 665 continue; 666 667 progress_passed = 1; 668 blocknr = btrfs_node_blockptr(parent, i); 669 gen = btrfs_node_ptr_generation(parent, i); 670 if (last_block == 0) 671 last_block = blocknr; 672 673 if (i > 0) { 674 other = btrfs_node_blockptr(parent, i - 1); 675 close = close_blocks(blocknr, other, blocksize); 676 } 677 if (!close && i < end_slot - 2) { 678 other = btrfs_node_blockptr(parent, i + 1); 679 close = close_blocks(blocknr, other, blocksize); 680 } 681 if (close) { 682 last_block = blocknr; 683 continue; 684 } 685 686 cur = btrfs_find_tree_block(root, blocknr, blocksize); 687 if (cur) 688 uptodate = btrfs_buffer_uptodate(cur, gen); 689 else 690 uptodate = 0; 691 if (!cur || !uptodate) { 692 if (cache_only) { 693 free_extent_buffer(cur); 694 continue; 695 } 696 if (!cur) { 697 cur = read_tree_block(root, blocknr, 698 blocksize, gen); 699 if (!cur) 700 return -EIO; 701 } else if (!uptodate) { 702 btrfs_read_buffer(cur, gen); 703 } 704 } 705 if (search_start == 0) 706 search_start = last_block; 707 708 btrfs_tree_lock(cur); 709 btrfs_set_lock_blocking(cur); 710 err = __btrfs_cow_block(trans, root, cur, parent, i, 711 &cur, search_start, 712 min(16 * blocksize, 713 (end_slot - i) * blocksize)); 714 if (err) { 715 btrfs_tree_unlock(cur); 716 free_extent_buffer(cur); 717 break; 718 } 719 search_start = cur->start; 720 last_block = cur->start; 721 *last_ret = search_start; 722 btrfs_tree_unlock(cur); 723 free_extent_buffer(cur); 724 } 725 return err; 726 } 727 728 /* 729 * The leaf data grows from end-to-front in the node. 730 * this returns the address of the start of the last item, 731 * which is the stop of the leaf data stack 732 */ 733 static inline unsigned int leaf_data_end(struct btrfs_root *root, 734 struct extent_buffer *leaf) 735 { 736 u32 nr = btrfs_header_nritems(leaf); 737 if (nr == 0) 738 return BTRFS_LEAF_DATA_SIZE(root); 739 return btrfs_item_offset_nr(leaf, nr - 1); 740 } 741 742 743 /* 744 * search for key in the extent_buffer. The items start at offset p, 745 * and they are item_size apart. There are 'max' items in p. 746 * 747 * the slot in the array is returned via slot, and it points to 748 * the place where you would insert key if it is not found in 749 * the array. 750 * 751 * slot may point to max if the key is bigger than all of the keys 752 */ 753 static noinline int generic_bin_search(struct extent_buffer *eb, 754 unsigned long p, 755 int item_size, struct btrfs_key *key, 756 int max, int *slot) 757 { 758 int low = 0; 759 int high = max; 760 int mid; 761 int ret; 762 struct btrfs_disk_key *tmp = NULL; 763 struct btrfs_disk_key unaligned; 764 unsigned long offset; 765 char *kaddr = NULL; 766 unsigned long map_start = 0; 767 unsigned long map_len = 0; 768 int err; 769 770 while (low < high) { 771 mid = (low + high) / 2; 772 offset = p + mid * item_size; 773 774 if (!kaddr || offset < map_start || 775 (offset + sizeof(struct btrfs_disk_key)) > 776 map_start + map_len) { 777 778 err = map_private_extent_buffer(eb, offset, 779 sizeof(struct btrfs_disk_key), 780 &kaddr, &map_start, &map_len); 781 782 if (!err) { 783 tmp = (struct btrfs_disk_key *)(kaddr + offset - 784 map_start); 785 } else { 786 read_extent_buffer(eb, &unaligned, 787 offset, sizeof(unaligned)); 788 tmp = &unaligned; 789 } 790 791 } else { 792 tmp = (struct btrfs_disk_key *)(kaddr + offset - 793 map_start); 794 } 795 ret = comp_keys(tmp, key); 796 797 if (ret < 0) 798 low = mid + 1; 799 else if (ret > 0) 800 high = mid; 801 else { 802 *slot = mid; 803 return 0; 804 } 805 } 806 *slot = low; 807 return 1; 808 } 809 810 /* 811 * simple bin_search frontend that does the right thing for 812 * leaves vs nodes 813 */ 814 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 815 int level, int *slot) 816 { 817 if (level == 0) { 818 return generic_bin_search(eb, 819 offsetof(struct btrfs_leaf, items), 820 sizeof(struct btrfs_item), 821 key, btrfs_header_nritems(eb), 822 slot); 823 } else { 824 return generic_bin_search(eb, 825 offsetof(struct btrfs_node, ptrs), 826 sizeof(struct btrfs_key_ptr), 827 key, btrfs_header_nritems(eb), 828 slot); 829 } 830 return -1; 831 } 832 833 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 834 int level, int *slot) 835 { 836 return bin_search(eb, key, level, slot); 837 } 838 839 static void root_add_used(struct btrfs_root *root, u32 size) 840 { 841 spin_lock(&root->accounting_lock); 842 btrfs_set_root_used(&root->root_item, 843 btrfs_root_used(&root->root_item) + size); 844 spin_unlock(&root->accounting_lock); 845 } 846 847 static void root_sub_used(struct btrfs_root *root, u32 size) 848 { 849 spin_lock(&root->accounting_lock); 850 btrfs_set_root_used(&root->root_item, 851 btrfs_root_used(&root->root_item) - size); 852 spin_unlock(&root->accounting_lock); 853 } 854 855 /* given a node and slot number, this reads the blocks it points to. The 856 * extent buffer is returned with a reference taken (but unlocked). 857 * NULL is returned on error. 858 */ 859 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 860 struct extent_buffer *parent, int slot) 861 { 862 int level = btrfs_header_level(parent); 863 if (slot < 0) 864 return NULL; 865 if (slot >= btrfs_header_nritems(parent)) 866 return NULL; 867 868 BUG_ON(level == 0); 869 870 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 871 btrfs_level_size(root, level - 1), 872 btrfs_node_ptr_generation(parent, slot)); 873 } 874 875 /* 876 * node level balancing, used to make sure nodes are in proper order for 877 * item deletion. We balance from the top down, so we have to make sure 878 * that a deletion won't leave an node completely empty later on. 879 */ 880 static noinline int balance_level(struct btrfs_trans_handle *trans, 881 struct btrfs_root *root, 882 struct btrfs_path *path, int level) 883 { 884 struct extent_buffer *right = NULL; 885 struct extent_buffer *mid; 886 struct extent_buffer *left = NULL; 887 struct extent_buffer *parent = NULL; 888 int ret = 0; 889 int wret; 890 int pslot; 891 int orig_slot = path->slots[level]; 892 u64 orig_ptr; 893 894 if (level == 0) 895 return 0; 896 897 mid = path->nodes[level]; 898 899 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && 900 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); 901 WARN_ON(btrfs_header_generation(mid) != trans->transid); 902 903 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 904 905 if (level < BTRFS_MAX_LEVEL - 1) { 906 parent = path->nodes[level + 1]; 907 pslot = path->slots[level + 1]; 908 } 909 910 /* 911 * deal with the case where there is only one pointer in the root 912 * by promoting the node below to a root 913 */ 914 if (!parent) { 915 struct extent_buffer *child; 916 917 if (btrfs_header_nritems(mid) != 1) 918 return 0; 919 920 /* promote the child to a root */ 921 child = read_node_slot(root, mid, 0); 922 BUG_ON(!child); 923 btrfs_tree_lock(child); 924 btrfs_set_lock_blocking(child); 925 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 926 if (ret) { 927 btrfs_tree_unlock(child); 928 free_extent_buffer(child); 929 goto enospc; 930 } 931 932 rcu_assign_pointer(root->node, child); 933 934 add_root_to_dirty_list(root); 935 btrfs_tree_unlock(child); 936 937 path->locks[level] = 0; 938 path->nodes[level] = NULL; 939 clean_tree_block(trans, root, mid); 940 btrfs_tree_unlock(mid); 941 /* once for the path */ 942 free_extent_buffer(mid); 943 944 root_sub_used(root, mid->len); 945 btrfs_free_tree_block(trans, root, mid, 0, 1); 946 /* once for the root ptr */ 947 free_extent_buffer(mid); 948 return 0; 949 } 950 if (btrfs_header_nritems(mid) > 951 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 952 return 0; 953 954 btrfs_header_nritems(mid); 955 956 left = read_node_slot(root, parent, pslot - 1); 957 if (left) { 958 btrfs_tree_lock(left); 959 btrfs_set_lock_blocking(left); 960 wret = btrfs_cow_block(trans, root, left, 961 parent, pslot - 1, &left); 962 if (wret) { 963 ret = wret; 964 goto enospc; 965 } 966 } 967 right = read_node_slot(root, parent, pslot + 1); 968 if (right) { 969 btrfs_tree_lock(right); 970 btrfs_set_lock_blocking(right); 971 wret = btrfs_cow_block(trans, root, right, 972 parent, pslot + 1, &right); 973 if (wret) { 974 ret = wret; 975 goto enospc; 976 } 977 } 978 979 /* first, try to make some room in the middle buffer */ 980 if (left) { 981 orig_slot += btrfs_header_nritems(left); 982 wret = push_node_left(trans, root, left, mid, 1); 983 if (wret < 0) 984 ret = wret; 985 btrfs_header_nritems(mid); 986 } 987 988 /* 989 * then try to empty the right most buffer into the middle 990 */ 991 if (right) { 992 wret = push_node_left(trans, root, mid, right, 1); 993 if (wret < 0 && wret != -ENOSPC) 994 ret = wret; 995 if (btrfs_header_nritems(right) == 0) { 996 clean_tree_block(trans, root, right); 997 btrfs_tree_unlock(right); 998 wret = del_ptr(trans, root, path, level + 1, pslot + 999 1); 1000 if (wret) 1001 ret = wret; 1002 root_sub_used(root, right->len); 1003 btrfs_free_tree_block(trans, root, right, 0, 1); 1004 free_extent_buffer(right); 1005 right = NULL; 1006 } else { 1007 struct btrfs_disk_key right_key; 1008 btrfs_node_key(right, &right_key, 0); 1009 btrfs_set_node_key(parent, &right_key, pslot + 1); 1010 btrfs_mark_buffer_dirty(parent); 1011 } 1012 } 1013 if (btrfs_header_nritems(mid) == 1) { 1014 /* 1015 * we're not allowed to leave a node with one item in the 1016 * tree during a delete. A deletion from lower in the tree 1017 * could try to delete the only pointer in this node. 1018 * So, pull some keys from the left. 1019 * There has to be a left pointer at this point because 1020 * otherwise we would have pulled some pointers from the 1021 * right 1022 */ 1023 BUG_ON(!left); 1024 wret = balance_node_right(trans, root, mid, left); 1025 if (wret < 0) { 1026 ret = wret; 1027 goto enospc; 1028 } 1029 if (wret == 1) { 1030 wret = push_node_left(trans, root, left, mid, 1); 1031 if (wret < 0) 1032 ret = wret; 1033 } 1034 BUG_ON(wret == 1); 1035 } 1036 if (btrfs_header_nritems(mid) == 0) { 1037 clean_tree_block(trans, root, mid); 1038 btrfs_tree_unlock(mid); 1039 wret = del_ptr(trans, root, path, level + 1, pslot); 1040 if (wret) 1041 ret = wret; 1042 root_sub_used(root, mid->len); 1043 btrfs_free_tree_block(trans, root, mid, 0, 1); 1044 free_extent_buffer(mid); 1045 mid = NULL; 1046 } else { 1047 /* update the parent key to reflect our changes */ 1048 struct btrfs_disk_key mid_key; 1049 btrfs_node_key(mid, &mid_key, 0); 1050 btrfs_set_node_key(parent, &mid_key, pslot); 1051 btrfs_mark_buffer_dirty(parent); 1052 } 1053 1054 /* update the path */ 1055 if (left) { 1056 if (btrfs_header_nritems(left) > orig_slot) { 1057 extent_buffer_get(left); 1058 /* left was locked after cow */ 1059 path->nodes[level] = left; 1060 path->slots[level + 1] -= 1; 1061 path->slots[level] = orig_slot; 1062 if (mid) { 1063 btrfs_tree_unlock(mid); 1064 free_extent_buffer(mid); 1065 } 1066 } else { 1067 orig_slot -= btrfs_header_nritems(left); 1068 path->slots[level] = orig_slot; 1069 } 1070 } 1071 /* double check we haven't messed things up */ 1072 if (orig_ptr != 1073 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1074 BUG(); 1075 enospc: 1076 if (right) { 1077 btrfs_tree_unlock(right); 1078 free_extent_buffer(right); 1079 } 1080 if (left) { 1081 if (path->nodes[level] != left) 1082 btrfs_tree_unlock(left); 1083 free_extent_buffer(left); 1084 } 1085 return ret; 1086 } 1087 1088 /* Node balancing for insertion. Here we only split or push nodes around 1089 * when they are completely full. This is also done top down, so we 1090 * have to be pessimistic. 1091 */ 1092 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1093 struct btrfs_root *root, 1094 struct btrfs_path *path, int level) 1095 { 1096 struct extent_buffer *right = NULL; 1097 struct extent_buffer *mid; 1098 struct extent_buffer *left = NULL; 1099 struct extent_buffer *parent = NULL; 1100 int ret = 0; 1101 int wret; 1102 int pslot; 1103 int orig_slot = path->slots[level]; 1104 1105 if (level == 0) 1106 return 1; 1107 1108 mid = path->nodes[level]; 1109 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1110 1111 if (level < BTRFS_MAX_LEVEL - 1) { 1112 parent = path->nodes[level + 1]; 1113 pslot = path->slots[level + 1]; 1114 } 1115 1116 if (!parent) 1117 return 1; 1118 1119 left = read_node_slot(root, parent, pslot - 1); 1120 1121 /* first, try to make some room in the middle buffer */ 1122 if (left) { 1123 u32 left_nr; 1124 1125 btrfs_tree_lock(left); 1126 btrfs_set_lock_blocking(left); 1127 1128 left_nr = btrfs_header_nritems(left); 1129 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1130 wret = 1; 1131 } else { 1132 ret = btrfs_cow_block(trans, root, left, parent, 1133 pslot - 1, &left); 1134 if (ret) 1135 wret = 1; 1136 else { 1137 wret = push_node_left(trans, root, 1138 left, mid, 0); 1139 } 1140 } 1141 if (wret < 0) 1142 ret = wret; 1143 if (wret == 0) { 1144 struct btrfs_disk_key disk_key; 1145 orig_slot += left_nr; 1146 btrfs_node_key(mid, &disk_key, 0); 1147 btrfs_set_node_key(parent, &disk_key, pslot); 1148 btrfs_mark_buffer_dirty(parent); 1149 if (btrfs_header_nritems(left) > orig_slot) { 1150 path->nodes[level] = left; 1151 path->slots[level + 1] -= 1; 1152 path->slots[level] = orig_slot; 1153 btrfs_tree_unlock(mid); 1154 free_extent_buffer(mid); 1155 } else { 1156 orig_slot -= 1157 btrfs_header_nritems(left); 1158 path->slots[level] = orig_slot; 1159 btrfs_tree_unlock(left); 1160 free_extent_buffer(left); 1161 } 1162 return 0; 1163 } 1164 btrfs_tree_unlock(left); 1165 free_extent_buffer(left); 1166 } 1167 right = read_node_slot(root, parent, pslot + 1); 1168 1169 /* 1170 * then try to empty the right most buffer into the middle 1171 */ 1172 if (right) { 1173 u32 right_nr; 1174 1175 btrfs_tree_lock(right); 1176 btrfs_set_lock_blocking(right); 1177 1178 right_nr = btrfs_header_nritems(right); 1179 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1180 wret = 1; 1181 } else { 1182 ret = btrfs_cow_block(trans, root, right, 1183 parent, pslot + 1, 1184 &right); 1185 if (ret) 1186 wret = 1; 1187 else { 1188 wret = balance_node_right(trans, root, 1189 right, mid); 1190 } 1191 } 1192 if (wret < 0) 1193 ret = wret; 1194 if (wret == 0) { 1195 struct btrfs_disk_key disk_key; 1196 1197 btrfs_node_key(right, &disk_key, 0); 1198 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1199 btrfs_mark_buffer_dirty(parent); 1200 1201 if (btrfs_header_nritems(mid) <= orig_slot) { 1202 path->nodes[level] = right; 1203 path->slots[level + 1] += 1; 1204 path->slots[level] = orig_slot - 1205 btrfs_header_nritems(mid); 1206 btrfs_tree_unlock(mid); 1207 free_extent_buffer(mid); 1208 } else { 1209 btrfs_tree_unlock(right); 1210 free_extent_buffer(right); 1211 } 1212 return 0; 1213 } 1214 btrfs_tree_unlock(right); 1215 free_extent_buffer(right); 1216 } 1217 return 1; 1218 } 1219 1220 /* 1221 * readahead one full node of leaves, finding things that are close 1222 * to the block in 'slot', and triggering ra on them. 1223 */ 1224 static void reada_for_search(struct btrfs_root *root, 1225 struct btrfs_path *path, 1226 int level, int slot, u64 objectid) 1227 { 1228 struct extent_buffer *node; 1229 struct btrfs_disk_key disk_key; 1230 u32 nritems; 1231 u64 search; 1232 u64 target; 1233 u64 nread = 0; 1234 u64 gen; 1235 int direction = path->reada; 1236 struct extent_buffer *eb; 1237 u32 nr; 1238 u32 blocksize; 1239 u32 nscan = 0; 1240 1241 if (level != 1) 1242 return; 1243 1244 if (!path->nodes[level]) 1245 return; 1246 1247 node = path->nodes[level]; 1248 1249 search = btrfs_node_blockptr(node, slot); 1250 blocksize = btrfs_level_size(root, level - 1); 1251 eb = btrfs_find_tree_block(root, search, blocksize); 1252 if (eb) { 1253 free_extent_buffer(eb); 1254 return; 1255 } 1256 1257 target = search; 1258 1259 nritems = btrfs_header_nritems(node); 1260 nr = slot; 1261 1262 while (1) { 1263 if (direction < 0) { 1264 if (nr == 0) 1265 break; 1266 nr--; 1267 } else if (direction > 0) { 1268 nr++; 1269 if (nr >= nritems) 1270 break; 1271 } 1272 if (path->reada < 0 && objectid) { 1273 btrfs_node_key(node, &disk_key, nr); 1274 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1275 break; 1276 } 1277 search = btrfs_node_blockptr(node, nr); 1278 if ((search <= target && target - search <= 65536) || 1279 (search > target && search - target <= 65536)) { 1280 gen = btrfs_node_ptr_generation(node, nr); 1281 readahead_tree_block(root, search, blocksize, gen); 1282 nread += blocksize; 1283 } 1284 nscan++; 1285 if ((nread > 65536 || nscan > 32)) 1286 break; 1287 } 1288 } 1289 1290 /* 1291 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1292 * cache 1293 */ 1294 static noinline int reada_for_balance(struct btrfs_root *root, 1295 struct btrfs_path *path, int level) 1296 { 1297 int slot; 1298 int nritems; 1299 struct extent_buffer *parent; 1300 struct extent_buffer *eb; 1301 u64 gen; 1302 u64 block1 = 0; 1303 u64 block2 = 0; 1304 int ret = 0; 1305 int blocksize; 1306 1307 parent = path->nodes[level + 1]; 1308 if (!parent) 1309 return 0; 1310 1311 nritems = btrfs_header_nritems(parent); 1312 slot = path->slots[level + 1]; 1313 blocksize = btrfs_level_size(root, level); 1314 1315 if (slot > 0) { 1316 block1 = btrfs_node_blockptr(parent, slot - 1); 1317 gen = btrfs_node_ptr_generation(parent, slot - 1); 1318 eb = btrfs_find_tree_block(root, block1, blocksize); 1319 if (eb && btrfs_buffer_uptodate(eb, gen)) 1320 block1 = 0; 1321 free_extent_buffer(eb); 1322 } 1323 if (slot + 1 < nritems) { 1324 block2 = btrfs_node_blockptr(parent, slot + 1); 1325 gen = btrfs_node_ptr_generation(parent, slot + 1); 1326 eb = btrfs_find_tree_block(root, block2, blocksize); 1327 if (eb && btrfs_buffer_uptodate(eb, gen)) 1328 block2 = 0; 1329 free_extent_buffer(eb); 1330 } 1331 if (block1 || block2) { 1332 ret = -EAGAIN; 1333 1334 /* release the whole path */ 1335 btrfs_release_path(path); 1336 1337 /* read the blocks */ 1338 if (block1) 1339 readahead_tree_block(root, block1, blocksize, 0); 1340 if (block2) 1341 readahead_tree_block(root, block2, blocksize, 0); 1342 1343 if (block1) { 1344 eb = read_tree_block(root, block1, blocksize, 0); 1345 free_extent_buffer(eb); 1346 } 1347 if (block2) { 1348 eb = read_tree_block(root, block2, blocksize, 0); 1349 free_extent_buffer(eb); 1350 } 1351 } 1352 return ret; 1353 } 1354 1355 1356 /* 1357 * when we walk down the tree, it is usually safe to unlock the higher layers 1358 * in the tree. The exceptions are when our path goes through slot 0, because 1359 * operations on the tree might require changing key pointers higher up in the 1360 * tree. 1361 * 1362 * callers might also have set path->keep_locks, which tells this code to keep 1363 * the lock if the path points to the last slot in the block. This is part of 1364 * walking through the tree, and selecting the next slot in the higher block. 1365 * 1366 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1367 * if lowest_unlock is 1, level 0 won't be unlocked 1368 */ 1369 static noinline void unlock_up(struct btrfs_path *path, int level, 1370 int lowest_unlock) 1371 { 1372 int i; 1373 int skip_level = level; 1374 int no_skips = 0; 1375 struct extent_buffer *t; 1376 1377 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1378 if (!path->nodes[i]) 1379 break; 1380 if (!path->locks[i]) 1381 break; 1382 if (!no_skips && path->slots[i] == 0) { 1383 skip_level = i + 1; 1384 continue; 1385 } 1386 if (!no_skips && path->keep_locks) { 1387 u32 nritems; 1388 t = path->nodes[i]; 1389 nritems = btrfs_header_nritems(t); 1390 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1391 skip_level = i + 1; 1392 continue; 1393 } 1394 } 1395 if (skip_level < i && i >= lowest_unlock) 1396 no_skips = 1; 1397 1398 t = path->nodes[i]; 1399 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1400 btrfs_tree_unlock_rw(t, path->locks[i]); 1401 path->locks[i] = 0; 1402 } 1403 } 1404 } 1405 1406 /* 1407 * This releases any locks held in the path starting at level and 1408 * going all the way up to the root. 1409 * 1410 * btrfs_search_slot will keep the lock held on higher nodes in a few 1411 * corner cases, such as COW of the block at slot zero in the node. This 1412 * ignores those rules, and it should only be called when there are no 1413 * more updates to be done higher up in the tree. 1414 */ 1415 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1416 { 1417 int i; 1418 1419 if (path->keep_locks) 1420 return; 1421 1422 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1423 if (!path->nodes[i]) 1424 continue; 1425 if (!path->locks[i]) 1426 continue; 1427 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 1428 path->locks[i] = 0; 1429 } 1430 } 1431 1432 /* 1433 * helper function for btrfs_search_slot. The goal is to find a block 1434 * in cache without setting the path to blocking. If we find the block 1435 * we return zero and the path is unchanged. 1436 * 1437 * If we can't find the block, we set the path blocking and do some 1438 * reada. -EAGAIN is returned and the search must be repeated. 1439 */ 1440 static int 1441 read_block_for_search(struct btrfs_trans_handle *trans, 1442 struct btrfs_root *root, struct btrfs_path *p, 1443 struct extent_buffer **eb_ret, int level, int slot, 1444 struct btrfs_key *key) 1445 { 1446 u64 blocknr; 1447 u64 gen; 1448 u32 blocksize; 1449 struct extent_buffer *b = *eb_ret; 1450 struct extent_buffer *tmp; 1451 int ret; 1452 1453 blocknr = btrfs_node_blockptr(b, slot); 1454 gen = btrfs_node_ptr_generation(b, slot); 1455 blocksize = btrfs_level_size(root, level - 1); 1456 1457 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1458 if (tmp) { 1459 if (btrfs_buffer_uptodate(tmp, 0)) { 1460 if (btrfs_buffer_uptodate(tmp, gen)) { 1461 /* 1462 * we found an up to date block without 1463 * sleeping, return 1464 * right away 1465 */ 1466 *eb_ret = tmp; 1467 return 0; 1468 } 1469 /* the pages were up to date, but we failed 1470 * the generation number check. Do a full 1471 * read for the generation number that is correct. 1472 * We must do this without dropping locks so 1473 * we can trust our generation number 1474 */ 1475 free_extent_buffer(tmp); 1476 btrfs_set_path_blocking(p); 1477 1478 tmp = read_tree_block(root, blocknr, blocksize, gen); 1479 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1480 *eb_ret = tmp; 1481 return 0; 1482 } 1483 free_extent_buffer(tmp); 1484 btrfs_release_path(p); 1485 return -EIO; 1486 } 1487 } 1488 1489 /* 1490 * reduce lock contention at high levels 1491 * of the btree by dropping locks before 1492 * we read. Don't release the lock on the current 1493 * level because we need to walk this node to figure 1494 * out which blocks to read. 1495 */ 1496 btrfs_unlock_up_safe(p, level + 1); 1497 btrfs_set_path_blocking(p); 1498 1499 free_extent_buffer(tmp); 1500 if (p->reada) 1501 reada_for_search(root, p, level, slot, key->objectid); 1502 1503 btrfs_release_path(p); 1504 1505 ret = -EAGAIN; 1506 tmp = read_tree_block(root, blocknr, blocksize, 0); 1507 if (tmp) { 1508 /* 1509 * If the read above didn't mark this buffer up to date, 1510 * it will never end up being up to date. Set ret to EIO now 1511 * and give up so that our caller doesn't loop forever 1512 * on our EAGAINs. 1513 */ 1514 if (!btrfs_buffer_uptodate(tmp, 0)) 1515 ret = -EIO; 1516 free_extent_buffer(tmp); 1517 } 1518 return ret; 1519 } 1520 1521 /* 1522 * helper function for btrfs_search_slot. This does all of the checks 1523 * for node-level blocks and does any balancing required based on 1524 * the ins_len. 1525 * 1526 * If no extra work was required, zero is returned. If we had to 1527 * drop the path, -EAGAIN is returned and btrfs_search_slot must 1528 * start over 1529 */ 1530 static int 1531 setup_nodes_for_search(struct btrfs_trans_handle *trans, 1532 struct btrfs_root *root, struct btrfs_path *p, 1533 struct extent_buffer *b, int level, int ins_len, 1534 int *write_lock_level) 1535 { 1536 int ret; 1537 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1538 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1539 int sret; 1540 1541 if (*write_lock_level < level + 1) { 1542 *write_lock_level = level + 1; 1543 btrfs_release_path(p); 1544 goto again; 1545 } 1546 1547 sret = reada_for_balance(root, p, level); 1548 if (sret) 1549 goto again; 1550 1551 btrfs_set_path_blocking(p); 1552 sret = split_node(trans, root, p, level); 1553 btrfs_clear_path_blocking(p, NULL, 0); 1554 1555 BUG_ON(sret > 0); 1556 if (sret) { 1557 ret = sret; 1558 goto done; 1559 } 1560 b = p->nodes[level]; 1561 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1562 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1563 int sret; 1564 1565 if (*write_lock_level < level + 1) { 1566 *write_lock_level = level + 1; 1567 btrfs_release_path(p); 1568 goto again; 1569 } 1570 1571 sret = reada_for_balance(root, p, level); 1572 if (sret) 1573 goto again; 1574 1575 btrfs_set_path_blocking(p); 1576 sret = balance_level(trans, root, p, level); 1577 btrfs_clear_path_blocking(p, NULL, 0); 1578 1579 if (sret) { 1580 ret = sret; 1581 goto done; 1582 } 1583 b = p->nodes[level]; 1584 if (!b) { 1585 btrfs_release_path(p); 1586 goto again; 1587 } 1588 BUG_ON(btrfs_header_nritems(b) == 1); 1589 } 1590 return 0; 1591 1592 again: 1593 ret = -EAGAIN; 1594 done: 1595 return ret; 1596 } 1597 1598 /* 1599 * look for key in the tree. path is filled in with nodes along the way 1600 * if key is found, we return zero and you can find the item in the leaf 1601 * level of the path (level 0) 1602 * 1603 * If the key isn't found, the path points to the slot where it should 1604 * be inserted, and 1 is returned. If there are other errors during the 1605 * search a negative error number is returned. 1606 * 1607 * if ins_len > 0, nodes and leaves will be split as we walk down the 1608 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1609 * possible) 1610 */ 1611 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1612 *root, struct btrfs_key *key, struct btrfs_path *p, int 1613 ins_len, int cow) 1614 { 1615 struct extent_buffer *b; 1616 int slot; 1617 int ret; 1618 int err; 1619 int level; 1620 int lowest_unlock = 1; 1621 int root_lock; 1622 /* everything at write_lock_level or lower must be write locked */ 1623 int write_lock_level = 0; 1624 u8 lowest_level = 0; 1625 1626 lowest_level = p->lowest_level; 1627 WARN_ON(lowest_level && ins_len > 0); 1628 WARN_ON(p->nodes[0] != NULL); 1629 1630 if (ins_len < 0) { 1631 lowest_unlock = 2; 1632 1633 /* when we are removing items, we might have to go up to level 1634 * two as we update tree pointers Make sure we keep write 1635 * for those levels as well 1636 */ 1637 write_lock_level = 2; 1638 } else if (ins_len > 0) { 1639 /* 1640 * for inserting items, make sure we have a write lock on 1641 * level 1 so we can update keys 1642 */ 1643 write_lock_level = 1; 1644 } 1645 1646 if (!cow) 1647 write_lock_level = -1; 1648 1649 if (cow && (p->keep_locks || p->lowest_level)) 1650 write_lock_level = BTRFS_MAX_LEVEL; 1651 1652 again: 1653 /* 1654 * we try very hard to do read locks on the root 1655 */ 1656 root_lock = BTRFS_READ_LOCK; 1657 level = 0; 1658 if (p->search_commit_root) { 1659 /* 1660 * the commit roots are read only 1661 * so we always do read locks 1662 */ 1663 b = root->commit_root; 1664 extent_buffer_get(b); 1665 level = btrfs_header_level(b); 1666 if (!p->skip_locking) 1667 btrfs_tree_read_lock(b); 1668 } else { 1669 if (p->skip_locking) { 1670 b = btrfs_root_node(root); 1671 level = btrfs_header_level(b); 1672 } else { 1673 /* we don't know the level of the root node 1674 * until we actually have it read locked 1675 */ 1676 b = btrfs_read_lock_root_node(root); 1677 level = btrfs_header_level(b); 1678 if (level <= write_lock_level) { 1679 /* whoops, must trade for write lock */ 1680 btrfs_tree_read_unlock(b); 1681 free_extent_buffer(b); 1682 b = btrfs_lock_root_node(root); 1683 root_lock = BTRFS_WRITE_LOCK; 1684 1685 /* the level might have changed, check again */ 1686 level = btrfs_header_level(b); 1687 } 1688 } 1689 } 1690 p->nodes[level] = b; 1691 if (!p->skip_locking) 1692 p->locks[level] = root_lock; 1693 1694 while (b) { 1695 level = btrfs_header_level(b); 1696 1697 /* 1698 * setup the path here so we can release it under lock 1699 * contention with the cow code 1700 */ 1701 if (cow) { 1702 /* 1703 * if we don't really need to cow this block 1704 * then we don't want to set the path blocking, 1705 * so we test it here 1706 */ 1707 if (!should_cow_block(trans, root, b)) 1708 goto cow_done; 1709 1710 btrfs_set_path_blocking(p); 1711 1712 /* 1713 * must have write locks on this node and the 1714 * parent 1715 */ 1716 if (level + 1 > write_lock_level) { 1717 write_lock_level = level + 1; 1718 btrfs_release_path(p); 1719 goto again; 1720 } 1721 1722 err = btrfs_cow_block(trans, root, b, 1723 p->nodes[level + 1], 1724 p->slots[level + 1], &b); 1725 if (err) { 1726 ret = err; 1727 goto done; 1728 } 1729 } 1730 cow_done: 1731 BUG_ON(!cow && ins_len); 1732 1733 p->nodes[level] = b; 1734 btrfs_clear_path_blocking(p, NULL, 0); 1735 1736 /* 1737 * we have a lock on b and as long as we aren't changing 1738 * the tree, there is no way to for the items in b to change. 1739 * It is safe to drop the lock on our parent before we 1740 * go through the expensive btree search on b. 1741 * 1742 * If cow is true, then we might be changing slot zero, 1743 * which may require changing the parent. So, we can't 1744 * drop the lock until after we know which slot we're 1745 * operating on. 1746 */ 1747 if (!cow) 1748 btrfs_unlock_up_safe(p, level + 1); 1749 1750 ret = bin_search(b, key, level, &slot); 1751 1752 if (level != 0) { 1753 int dec = 0; 1754 if (ret && slot > 0) { 1755 dec = 1; 1756 slot -= 1; 1757 } 1758 p->slots[level] = slot; 1759 err = setup_nodes_for_search(trans, root, p, b, level, 1760 ins_len, &write_lock_level); 1761 if (err == -EAGAIN) 1762 goto again; 1763 if (err) { 1764 ret = err; 1765 goto done; 1766 } 1767 b = p->nodes[level]; 1768 slot = p->slots[level]; 1769 1770 /* 1771 * slot 0 is special, if we change the key 1772 * we have to update the parent pointer 1773 * which means we must have a write lock 1774 * on the parent 1775 */ 1776 if (slot == 0 && cow && 1777 write_lock_level < level + 1) { 1778 write_lock_level = level + 1; 1779 btrfs_release_path(p); 1780 goto again; 1781 } 1782 1783 unlock_up(p, level, lowest_unlock); 1784 1785 if (level == lowest_level) { 1786 if (dec) 1787 p->slots[level]++; 1788 goto done; 1789 } 1790 1791 err = read_block_for_search(trans, root, p, 1792 &b, level, slot, key); 1793 if (err == -EAGAIN) 1794 goto again; 1795 if (err) { 1796 ret = err; 1797 goto done; 1798 } 1799 1800 if (!p->skip_locking) { 1801 level = btrfs_header_level(b); 1802 if (level <= write_lock_level) { 1803 err = btrfs_try_tree_write_lock(b); 1804 if (!err) { 1805 btrfs_set_path_blocking(p); 1806 btrfs_tree_lock(b); 1807 btrfs_clear_path_blocking(p, b, 1808 BTRFS_WRITE_LOCK); 1809 } 1810 p->locks[level] = BTRFS_WRITE_LOCK; 1811 } else { 1812 err = btrfs_try_tree_read_lock(b); 1813 if (!err) { 1814 btrfs_set_path_blocking(p); 1815 btrfs_tree_read_lock(b); 1816 btrfs_clear_path_blocking(p, b, 1817 BTRFS_READ_LOCK); 1818 } 1819 p->locks[level] = BTRFS_READ_LOCK; 1820 } 1821 p->nodes[level] = b; 1822 } 1823 } else { 1824 p->slots[level] = slot; 1825 if (ins_len > 0 && 1826 btrfs_leaf_free_space(root, b) < ins_len) { 1827 if (write_lock_level < 1) { 1828 write_lock_level = 1; 1829 btrfs_release_path(p); 1830 goto again; 1831 } 1832 1833 btrfs_set_path_blocking(p); 1834 err = split_leaf(trans, root, key, 1835 p, ins_len, ret == 0); 1836 btrfs_clear_path_blocking(p, NULL, 0); 1837 1838 BUG_ON(err > 0); 1839 if (err) { 1840 ret = err; 1841 goto done; 1842 } 1843 } 1844 if (!p->search_for_split) 1845 unlock_up(p, level, lowest_unlock); 1846 goto done; 1847 } 1848 } 1849 ret = 1; 1850 done: 1851 /* 1852 * we don't really know what they plan on doing with the path 1853 * from here on, so for now just mark it as blocking 1854 */ 1855 if (!p->leave_spinning) 1856 btrfs_set_path_blocking(p); 1857 if (ret < 0) 1858 btrfs_release_path(p); 1859 return ret; 1860 } 1861 1862 /* 1863 * adjust the pointers going up the tree, starting at level 1864 * making sure the right key of each node is points to 'key'. 1865 * This is used after shifting pointers to the left, so it stops 1866 * fixing up pointers when a given leaf/node is not in slot 0 of the 1867 * higher levels 1868 * 1869 * If this fails to write a tree block, it returns -1, but continues 1870 * fixing up the blocks in ram so the tree is consistent. 1871 */ 1872 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1873 struct btrfs_root *root, struct btrfs_path *path, 1874 struct btrfs_disk_key *key, int level) 1875 { 1876 int i; 1877 int ret = 0; 1878 struct extent_buffer *t; 1879 1880 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1881 int tslot = path->slots[i]; 1882 if (!path->nodes[i]) 1883 break; 1884 t = path->nodes[i]; 1885 btrfs_set_node_key(t, key, tslot); 1886 btrfs_mark_buffer_dirty(path->nodes[i]); 1887 if (tslot != 0) 1888 break; 1889 } 1890 return ret; 1891 } 1892 1893 /* 1894 * update item key. 1895 * 1896 * This function isn't completely safe. It's the caller's responsibility 1897 * that the new key won't break the order 1898 */ 1899 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1900 struct btrfs_root *root, struct btrfs_path *path, 1901 struct btrfs_key *new_key) 1902 { 1903 struct btrfs_disk_key disk_key; 1904 struct extent_buffer *eb; 1905 int slot; 1906 1907 eb = path->nodes[0]; 1908 slot = path->slots[0]; 1909 if (slot > 0) { 1910 btrfs_item_key(eb, &disk_key, slot - 1); 1911 if (comp_keys(&disk_key, new_key) >= 0) 1912 return -1; 1913 } 1914 if (slot < btrfs_header_nritems(eb) - 1) { 1915 btrfs_item_key(eb, &disk_key, slot + 1); 1916 if (comp_keys(&disk_key, new_key) <= 0) 1917 return -1; 1918 } 1919 1920 btrfs_cpu_key_to_disk(&disk_key, new_key); 1921 btrfs_set_item_key(eb, &disk_key, slot); 1922 btrfs_mark_buffer_dirty(eb); 1923 if (slot == 0) 1924 fixup_low_keys(trans, root, path, &disk_key, 1); 1925 return 0; 1926 } 1927 1928 /* 1929 * try to push data from one node into the next node left in the 1930 * tree. 1931 * 1932 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1933 * error, and > 0 if there was no room in the left hand block. 1934 */ 1935 static int push_node_left(struct btrfs_trans_handle *trans, 1936 struct btrfs_root *root, struct extent_buffer *dst, 1937 struct extent_buffer *src, int empty) 1938 { 1939 int push_items = 0; 1940 int src_nritems; 1941 int dst_nritems; 1942 int ret = 0; 1943 1944 src_nritems = btrfs_header_nritems(src); 1945 dst_nritems = btrfs_header_nritems(dst); 1946 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1947 WARN_ON(btrfs_header_generation(src) != trans->transid); 1948 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1949 1950 if (!empty && src_nritems <= 8) 1951 return 1; 1952 1953 if (push_items <= 0) 1954 return 1; 1955 1956 if (empty) { 1957 push_items = min(src_nritems, push_items); 1958 if (push_items < src_nritems) { 1959 /* leave at least 8 pointers in the node if 1960 * we aren't going to empty it 1961 */ 1962 if (src_nritems - push_items < 8) { 1963 if (push_items <= 8) 1964 return 1; 1965 push_items -= 8; 1966 } 1967 } 1968 } else 1969 push_items = min(src_nritems - 8, push_items); 1970 1971 copy_extent_buffer(dst, src, 1972 btrfs_node_key_ptr_offset(dst_nritems), 1973 btrfs_node_key_ptr_offset(0), 1974 push_items * sizeof(struct btrfs_key_ptr)); 1975 1976 if (push_items < src_nritems) { 1977 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1978 btrfs_node_key_ptr_offset(push_items), 1979 (src_nritems - push_items) * 1980 sizeof(struct btrfs_key_ptr)); 1981 } 1982 btrfs_set_header_nritems(src, src_nritems - push_items); 1983 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1984 btrfs_mark_buffer_dirty(src); 1985 btrfs_mark_buffer_dirty(dst); 1986 1987 return ret; 1988 } 1989 1990 /* 1991 * try to push data from one node into the next node right in the 1992 * tree. 1993 * 1994 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 1995 * error, and > 0 if there was no room in the right hand block. 1996 * 1997 * this will only push up to 1/2 the contents of the left node over 1998 */ 1999 static int balance_node_right(struct btrfs_trans_handle *trans, 2000 struct btrfs_root *root, 2001 struct extent_buffer *dst, 2002 struct extent_buffer *src) 2003 { 2004 int push_items = 0; 2005 int max_push; 2006 int src_nritems; 2007 int dst_nritems; 2008 int ret = 0; 2009 2010 WARN_ON(btrfs_header_generation(src) != trans->transid); 2011 WARN_ON(btrfs_header_generation(dst) != trans->transid); 2012 2013 src_nritems = btrfs_header_nritems(src); 2014 dst_nritems = btrfs_header_nritems(dst); 2015 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 2016 if (push_items <= 0) 2017 return 1; 2018 2019 if (src_nritems < 4) 2020 return 1; 2021 2022 max_push = src_nritems / 2 + 1; 2023 /* don't try to empty the node */ 2024 if (max_push >= src_nritems) 2025 return 1; 2026 2027 if (max_push < push_items) 2028 push_items = max_push; 2029 2030 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2031 btrfs_node_key_ptr_offset(0), 2032 (dst_nritems) * 2033 sizeof(struct btrfs_key_ptr)); 2034 2035 copy_extent_buffer(dst, src, 2036 btrfs_node_key_ptr_offset(0), 2037 btrfs_node_key_ptr_offset(src_nritems - push_items), 2038 push_items * sizeof(struct btrfs_key_ptr)); 2039 2040 btrfs_set_header_nritems(src, src_nritems - push_items); 2041 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2042 2043 btrfs_mark_buffer_dirty(src); 2044 btrfs_mark_buffer_dirty(dst); 2045 2046 return ret; 2047 } 2048 2049 /* 2050 * helper function to insert a new root level in the tree. 2051 * A new node is allocated, and a single item is inserted to 2052 * point to the existing root 2053 * 2054 * returns zero on success or < 0 on failure. 2055 */ 2056 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 2057 struct btrfs_root *root, 2058 struct btrfs_path *path, int level) 2059 { 2060 u64 lower_gen; 2061 struct extent_buffer *lower; 2062 struct extent_buffer *c; 2063 struct extent_buffer *old; 2064 struct btrfs_disk_key lower_key; 2065 2066 BUG_ON(path->nodes[level]); 2067 BUG_ON(path->nodes[level-1] != root->node); 2068 2069 lower = path->nodes[level-1]; 2070 if (level == 1) 2071 btrfs_item_key(lower, &lower_key, 0); 2072 else 2073 btrfs_node_key(lower, &lower_key, 0); 2074 2075 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2076 root->root_key.objectid, &lower_key, 2077 level, root->node->start, 0); 2078 if (IS_ERR(c)) 2079 return PTR_ERR(c); 2080 2081 root_add_used(root, root->nodesize); 2082 2083 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 2084 btrfs_set_header_nritems(c, 1); 2085 btrfs_set_header_level(c, level); 2086 btrfs_set_header_bytenr(c, c->start); 2087 btrfs_set_header_generation(c, trans->transid); 2088 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 2089 btrfs_set_header_owner(c, root->root_key.objectid); 2090 2091 write_extent_buffer(c, root->fs_info->fsid, 2092 (unsigned long)btrfs_header_fsid(c), 2093 BTRFS_FSID_SIZE); 2094 2095 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 2096 (unsigned long)btrfs_header_chunk_tree_uuid(c), 2097 BTRFS_UUID_SIZE); 2098 2099 btrfs_set_node_key(c, &lower_key, 0); 2100 btrfs_set_node_blockptr(c, 0, lower->start); 2101 lower_gen = btrfs_header_generation(lower); 2102 WARN_ON(lower_gen != trans->transid); 2103 2104 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2105 2106 btrfs_mark_buffer_dirty(c); 2107 2108 old = root->node; 2109 rcu_assign_pointer(root->node, c); 2110 2111 /* the super has an extra ref to root->node */ 2112 free_extent_buffer(old); 2113 2114 add_root_to_dirty_list(root); 2115 extent_buffer_get(c); 2116 path->nodes[level] = c; 2117 path->locks[level] = BTRFS_WRITE_LOCK; 2118 path->slots[level] = 0; 2119 return 0; 2120 } 2121 2122 /* 2123 * worker function to insert a single pointer in a node. 2124 * the node should have enough room for the pointer already 2125 * 2126 * slot and level indicate where you want the key to go, and 2127 * blocknr is the block the key points to. 2128 * 2129 * returns zero on success and < 0 on any error 2130 */ 2131 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2132 *root, struct btrfs_path *path, struct btrfs_disk_key 2133 *key, u64 bytenr, int slot, int level) 2134 { 2135 struct extent_buffer *lower; 2136 int nritems; 2137 2138 BUG_ON(!path->nodes[level]); 2139 btrfs_assert_tree_locked(path->nodes[level]); 2140 lower = path->nodes[level]; 2141 nritems = btrfs_header_nritems(lower); 2142 BUG_ON(slot > nritems); 2143 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2144 BUG(); 2145 if (slot != nritems) { 2146 memmove_extent_buffer(lower, 2147 btrfs_node_key_ptr_offset(slot + 1), 2148 btrfs_node_key_ptr_offset(slot), 2149 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2150 } 2151 btrfs_set_node_key(lower, key, slot); 2152 btrfs_set_node_blockptr(lower, slot, bytenr); 2153 WARN_ON(trans->transid == 0); 2154 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2155 btrfs_set_header_nritems(lower, nritems + 1); 2156 btrfs_mark_buffer_dirty(lower); 2157 return 0; 2158 } 2159 2160 /* 2161 * split the node at the specified level in path in two. 2162 * The path is corrected to point to the appropriate node after the split 2163 * 2164 * Before splitting this tries to make some room in the node by pushing 2165 * left and right, if either one works, it returns right away. 2166 * 2167 * returns 0 on success and < 0 on failure 2168 */ 2169 static noinline int split_node(struct btrfs_trans_handle *trans, 2170 struct btrfs_root *root, 2171 struct btrfs_path *path, int level) 2172 { 2173 struct extent_buffer *c; 2174 struct extent_buffer *split; 2175 struct btrfs_disk_key disk_key; 2176 int mid; 2177 int ret; 2178 int wret; 2179 u32 c_nritems; 2180 2181 c = path->nodes[level]; 2182 WARN_ON(btrfs_header_generation(c) != trans->transid); 2183 if (c == root->node) { 2184 /* trying to split the root, lets make a new one */ 2185 ret = insert_new_root(trans, root, path, level + 1); 2186 if (ret) 2187 return ret; 2188 } else { 2189 ret = push_nodes_for_insert(trans, root, path, level); 2190 c = path->nodes[level]; 2191 if (!ret && btrfs_header_nritems(c) < 2192 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2193 return 0; 2194 if (ret < 0) 2195 return ret; 2196 } 2197 2198 c_nritems = btrfs_header_nritems(c); 2199 mid = (c_nritems + 1) / 2; 2200 btrfs_node_key(c, &disk_key, mid); 2201 2202 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2203 root->root_key.objectid, 2204 &disk_key, level, c->start, 0); 2205 if (IS_ERR(split)) 2206 return PTR_ERR(split); 2207 2208 root_add_used(root, root->nodesize); 2209 2210 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2211 btrfs_set_header_level(split, btrfs_header_level(c)); 2212 btrfs_set_header_bytenr(split, split->start); 2213 btrfs_set_header_generation(split, trans->transid); 2214 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 2215 btrfs_set_header_owner(split, root->root_key.objectid); 2216 write_extent_buffer(split, root->fs_info->fsid, 2217 (unsigned long)btrfs_header_fsid(split), 2218 BTRFS_FSID_SIZE); 2219 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2220 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2221 BTRFS_UUID_SIZE); 2222 2223 2224 copy_extent_buffer(split, c, 2225 btrfs_node_key_ptr_offset(0), 2226 btrfs_node_key_ptr_offset(mid), 2227 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2228 btrfs_set_header_nritems(split, c_nritems - mid); 2229 btrfs_set_header_nritems(c, mid); 2230 ret = 0; 2231 2232 btrfs_mark_buffer_dirty(c); 2233 btrfs_mark_buffer_dirty(split); 2234 2235 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2236 path->slots[level + 1] + 1, 2237 level + 1); 2238 if (wret) 2239 ret = wret; 2240 2241 if (path->slots[level] >= mid) { 2242 path->slots[level] -= mid; 2243 btrfs_tree_unlock(c); 2244 free_extent_buffer(c); 2245 path->nodes[level] = split; 2246 path->slots[level + 1] += 1; 2247 } else { 2248 btrfs_tree_unlock(split); 2249 free_extent_buffer(split); 2250 } 2251 return ret; 2252 } 2253 2254 /* 2255 * how many bytes are required to store the items in a leaf. start 2256 * and nr indicate which items in the leaf to check. This totals up the 2257 * space used both by the item structs and the item data 2258 */ 2259 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2260 { 2261 int data_len; 2262 int nritems = btrfs_header_nritems(l); 2263 int end = min(nritems, start + nr) - 1; 2264 2265 if (!nr) 2266 return 0; 2267 data_len = btrfs_item_end_nr(l, start); 2268 data_len = data_len - btrfs_item_offset_nr(l, end); 2269 data_len += sizeof(struct btrfs_item) * nr; 2270 WARN_ON(data_len < 0); 2271 return data_len; 2272 } 2273 2274 /* 2275 * The space between the end of the leaf items and 2276 * the start of the leaf data. IOW, how much room 2277 * the leaf has left for both items and data 2278 */ 2279 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2280 struct extent_buffer *leaf) 2281 { 2282 int nritems = btrfs_header_nritems(leaf); 2283 int ret; 2284 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2285 if (ret < 0) { 2286 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2287 "used %d nritems %d\n", 2288 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2289 leaf_space_used(leaf, 0, nritems), nritems); 2290 } 2291 return ret; 2292 } 2293 2294 /* 2295 * min slot controls the lowest index we're willing to push to the 2296 * right. We'll push up to and including min_slot, but no lower 2297 */ 2298 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2299 struct btrfs_root *root, 2300 struct btrfs_path *path, 2301 int data_size, int empty, 2302 struct extent_buffer *right, 2303 int free_space, u32 left_nritems, 2304 u32 min_slot) 2305 { 2306 struct extent_buffer *left = path->nodes[0]; 2307 struct extent_buffer *upper = path->nodes[1]; 2308 struct btrfs_disk_key disk_key; 2309 int slot; 2310 u32 i; 2311 int push_space = 0; 2312 int push_items = 0; 2313 struct btrfs_item *item; 2314 u32 nr; 2315 u32 right_nritems; 2316 u32 data_end; 2317 u32 this_item_size; 2318 2319 if (empty) 2320 nr = 0; 2321 else 2322 nr = max_t(u32, 1, min_slot); 2323 2324 if (path->slots[0] >= left_nritems) 2325 push_space += data_size; 2326 2327 slot = path->slots[1]; 2328 i = left_nritems - 1; 2329 while (i >= nr) { 2330 item = btrfs_item_nr(left, i); 2331 2332 if (!empty && push_items > 0) { 2333 if (path->slots[0] > i) 2334 break; 2335 if (path->slots[0] == i) { 2336 int space = btrfs_leaf_free_space(root, left); 2337 if (space + push_space * 2 > free_space) 2338 break; 2339 } 2340 } 2341 2342 if (path->slots[0] == i) 2343 push_space += data_size; 2344 2345 this_item_size = btrfs_item_size(left, item); 2346 if (this_item_size + sizeof(*item) + push_space > free_space) 2347 break; 2348 2349 push_items++; 2350 push_space += this_item_size + sizeof(*item); 2351 if (i == 0) 2352 break; 2353 i--; 2354 } 2355 2356 if (push_items == 0) 2357 goto out_unlock; 2358 2359 if (!empty && push_items == left_nritems) 2360 WARN_ON(1); 2361 2362 /* push left to right */ 2363 right_nritems = btrfs_header_nritems(right); 2364 2365 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2366 push_space -= leaf_data_end(root, left); 2367 2368 /* make room in the right data area */ 2369 data_end = leaf_data_end(root, right); 2370 memmove_extent_buffer(right, 2371 btrfs_leaf_data(right) + data_end - push_space, 2372 btrfs_leaf_data(right) + data_end, 2373 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2374 2375 /* copy from the left data area */ 2376 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2377 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2378 btrfs_leaf_data(left) + leaf_data_end(root, left), 2379 push_space); 2380 2381 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2382 btrfs_item_nr_offset(0), 2383 right_nritems * sizeof(struct btrfs_item)); 2384 2385 /* copy the items from left to right */ 2386 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2387 btrfs_item_nr_offset(left_nritems - push_items), 2388 push_items * sizeof(struct btrfs_item)); 2389 2390 /* update the item pointers */ 2391 right_nritems += push_items; 2392 btrfs_set_header_nritems(right, right_nritems); 2393 push_space = BTRFS_LEAF_DATA_SIZE(root); 2394 for (i = 0; i < right_nritems; i++) { 2395 item = btrfs_item_nr(right, i); 2396 push_space -= btrfs_item_size(right, item); 2397 btrfs_set_item_offset(right, item, push_space); 2398 } 2399 2400 left_nritems -= push_items; 2401 btrfs_set_header_nritems(left, left_nritems); 2402 2403 if (left_nritems) 2404 btrfs_mark_buffer_dirty(left); 2405 else 2406 clean_tree_block(trans, root, left); 2407 2408 btrfs_mark_buffer_dirty(right); 2409 2410 btrfs_item_key(right, &disk_key, 0); 2411 btrfs_set_node_key(upper, &disk_key, slot + 1); 2412 btrfs_mark_buffer_dirty(upper); 2413 2414 /* then fixup the leaf pointer in the path */ 2415 if (path->slots[0] >= left_nritems) { 2416 path->slots[0] -= left_nritems; 2417 if (btrfs_header_nritems(path->nodes[0]) == 0) 2418 clean_tree_block(trans, root, path->nodes[0]); 2419 btrfs_tree_unlock(path->nodes[0]); 2420 free_extent_buffer(path->nodes[0]); 2421 path->nodes[0] = right; 2422 path->slots[1] += 1; 2423 } else { 2424 btrfs_tree_unlock(right); 2425 free_extent_buffer(right); 2426 } 2427 return 0; 2428 2429 out_unlock: 2430 btrfs_tree_unlock(right); 2431 free_extent_buffer(right); 2432 return 1; 2433 } 2434 2435 /* 2436 * push some data in the path leaf to the right, trying to free up at 2437 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2438 * 2439 * returns 1 if the push failed because the other node didn't have enough 2440 * room, 0 if everything worked out and < 0 if there were major errors. 2441 * 2442 * this will push starting from min_slot to the end of the leaf. It won't 2443 * push any slot lower than min_slot 2444 */ 2445 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2446 *root, struct btrfs_path *path, 2447 int min_data_size, int data_size, 2448 int empty, u32 min_slot) 2449 { 2450 struct extent_buffer *left = path->nodes[0]; 2451 struct extent_buffer *right; 2452 struct extent_buffer *upper; 2453 int slot; 2454 int free_space; 2455 u32 left_nritems; 2456 int ret; 2457 2458 if (!path->nodes[1]) 2459 return 1; 2460 2461 slot = path->slots[1]; 2462 upper = path->nodes[1]; 2463 if (slot >= btrfs_header_nritems(upper) - 1) 2464 return 1; 2465 2466 btrfs_assert_tree_locked(path->nodes[1]); 2467 2468 right = read_node_slot(root, upper, slot + 1); 2469 if (right == NULL) 2470 return 1; 2471 2472 btrfs_tree_lock(right); 2473 btrfs_set_lock_blocking(right); 2474 2475 free_space = btrfs_leaf_free_space(root, right); 2476 if (free_space < data_size) 2477 goto out_unlock; 2478 2479 /* cow and double check */ 2480 ret = btrfs_cow_block(trans, root, right, upper, 2481 slot + 1, &right); 2482 if (ret) 2483 goto out_unlock; 2484 2485 free_space = btrfs_leaf_free_space(root, right); 2486 if (free_space < data_size) 2487 goto out_unlock; 2488 2489 left_nritems = btrfs_header_nritems(left); 2490 if (left_nritems == 0) 2491 goto out_unlock; 2492 2493 return __push_leaf_right(trans, root, path, min_data_size, empty, 2494 right, free_space, left_nritems, min_slot); 2495 out_unlock: 2496 btrfs_tree_unlock(right); 2497 free_extent_buffer(right); 2498 return 1; 2499 } 2500 2501 /* 2502 * push some data in the path leaf to the left, trying to free up at 2503 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2504 * 2505 * max_slot can put a limit on how far into the leaf we'll push items. The 2506 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 2507 * items 2508 */ 2509 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2510 struct btrfs_root *root, 2511 struct btrfs_path *path, int data_size, 2512 int empty, struct extent_buffer *left, 2513 int free_space, u32 right_nritems, 2514 u32 max_slot) 2515 { 2516 struct btrfs_disk_key disk_key; 2517 struct extent_buffer *right = path->nodes[0]; 2518 int i; 2519 int push_space = 0; 2520 int push_items = 0; 2521 struct btrfs_item *item; 2522 u32 old_left_nritems; 2523 u32 nr; 2524 int ret = 0; 2525 int wret; 2526 u32 this_item_size; 2527 u32 old_left_item_size; 2528 2529 if (empty) 2530 nr = min(right_nritems, max_slot); 2531 else 2532 nr = min(right_nritems - 1, max_slot); 2533 2534 for (i = 0; i < nr; i++) { 2535 item = btrfs_item_nr(right, i); 2536 2537 if (!empty && push_items > 0) { 2538 if (path->slots[0] < i) 2539 break; 2540 if (path->slots[0] == i) { 2541 int space = btrfs_leaf_free_space(root, right); 2542 if (space + push_space * 2 > free_space) 2543 break; 2544 } 2545 } 2546 2547 if (path->slots[0] == i) 2548 push_space += data_size; 2549 2550 this_item_size = btrfs_item_size(right, item); 2551 if (this_item_size + sizeof(*item) + push_space > free_space) 2552 break; 2553 2554 push_items++; 2555 push_space += this_item_size + sizeof(*item); 2556 } 2557 2558 if (push_items == 0) { 2559 ret = 1; 2560 goto out; 2561 } 2562 if (!empty && push_items == btrfs_header_nritems(right)) 2563 WARN_ON(1); 2564 2565 /* push data from right to left */ 2566 copy_extent_buffer(left, right, 2567 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2568 btrfs_item_nr_offset(0), 2569 push_items * sizeof(struct btrfs_item)); 2570 2571 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2572 btrfs_item_offset_nr(right, push_items - 1); 2573 2574 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2575 leaf_data_end(root, left) - push_space, 2576 btrfs_leaf_data(right) + 2577 btrfs_item_offset_nr(right, push_items - 1), 2578 push_space); 2579 old_left_nritems = btrfs_header_nritems(left); 2580 BUG_ON(old_left_nritems <= 0); 2581 2582 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2583 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2584 u32 ioff; 2585 2586 item = btrfs_item_nr(left, i); 2587 2588 ioff = btrfs_item_offset(left, item); 2589 btrfs_set_item_offset(left, item, 2590 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2591 } 2592 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2593 2594 /* fixup right node */ 2595 if (push_items > right_nritems) { 2596 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2597 right_nritems); 2598 WARN_ON(1); 2599 } 2600 2601 if (push_items < right_nritems) { 2602 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2603 leaf_data_end(root, right); 2604 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2605 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2606 btrfs_leaf_data(right) + 2607 leaf_data_end(root, right), push_space); 2608 2609 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2610 btrfs_item_nr_offset(push_items), 2611 (btrfs_header_nritems(right) - push_items) * 2612 sizeof(struct btrfs_item)); 2613 } 2614 right_nritems -= push_items; 2615 btrfs_set_header_nritems(right, right_nritems); 2616 push_space = BTRFS_LEAF_DATA_SIZE(root); 2617 for (i = 0; i < right_nritems; i++) { 2618 item = btrfs_item_nr(right, i); 2619 2620 push_space = push_space - btrfs_item_size(right, item); 2621 btrfs_set_item_offset(right, item, push_space); 2622 } 2623 2624 btrfs_mark_buffer_dirty(left); 2625 if (right_nritems) 2626 btrfs_mark_buffer_dirty(right); 2627 else 2628 clean_tree_block(trans, root, right); 2629 2630 btrfs_item_key(right, &disk_key, 0); 2631 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2632 if (wret) 2633 ret = wret; 2634 2635 /* then fixup the leaf pointer in the path */ 2636 if (path->slots[0] < push_items) { 2637 path->slots[0] += old_left_nritems; 2638 btrfs_tree_unlock(path->nodes[0]); 2639 free_extent_buffer(path->nodes[0]); 2640 path->nodes[0] = left; 2641 path->slots[1] -= 1; 2642 } else { 2643 btrfs_tree_unlock(left); 2644 free_extent_buffer(left); 2645 path->slots[0] -= push_items; 2646 } 2647 BUG_ON(path->slots[0] < 0); 2648 return ret; 2649 out: 2650 btrfs_tree_unlock(left); 2651 free_extent_buffer(left); 2652 return ret; 2653 } 2654 2655 /* 2656 * push some data in the path leaf to the left, trying to free up at 2657 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2658 * 2659 * max_slot can put a limit on how far into the leaf we'll push items. The 2660 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 2661 * items 2662 */ 2663 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2664 *root, struct btrfs_path *path, int min_data_size, 2665 int data_size, int empty, u32 max_slot) 2666 { 2667 struct extent_buffer *right = path->nodes[0]; 2668 struct extent_buffer *left; 2669 int slot; 2670 int free_space; 2671 u32 right_nritems; 2672 int ret = 0; 2673 2674 slot = path->slots[1]; 2675 if (slot == 0) 2676 return 1; 2677 if (!path->nodes[1]) 2678 return 1; 2679 2680 right_nritems = btrfs_header_nritems(right); 2681 if (right_nritems == 0) 2682 return 1; 2683 2684 btrfs_assert_tree_locked(path->nodes[1]); 2685 2686 left = read_node_slot(root, path->nodes[1], slot - 1); 2687 if (left == NULL) 2688 return 1; 2689 2690 btrfs_tree_lock(left); 2691 btrfs_set_lock_blocking(left); 2692 2693 free_space = btrfs_leaf_free_space(root, left); 2694 if (free_space < data_size) { 2695 ret = 1; 2696 goto out; 2697 } 2698 2699 /* cow and double check */ 2700 ret = btrfs_cow_block(trans, root, left, 2701 path->nodes[1], slot - 1, &left); 2702 if (ret) { 2703 /* we hit -ENOSPC, but it isn't fatal here */ 2704 ret = 1; 2705 goto out; 2706 } 2707 2708 free_space = btrfs_leaf_free_space(root, left); 2709 if (free_space < data_size) { 2710 ret = 1; 2711 goto out; 2712 } 2713 2714 return __push_leaf_left(trans, root, path, min_data_size, 2715 empty, left, free_space, right_nritems, 2716 max_slot); 2717 out: 2718 btrfs_tree_unlock(left); 2719 free_extent_buffer(left); 2720 return ret; 2721 } 2722 2723 /* 2724 * split the path's leaf in two, making sure there is at least data_size 2725 * available for the resulting leaf level of the path. 2726 * 2727 * returns 0 if all went well and < 0 on failure. 2728 */ 2729 static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2730 struct btrfs_root *root, 2731 struct btrfs_path *path, 2732 struct extent_buffer *l, 2733 struct extent_buffer *right, 2734 int slot, int mid, int nritems) 2735 { 2736 int data_copy_size; 2737 int rt_data_off; 2738 int i; 2739 int ret = 0; 2740 int wret; 2741 struct btrfs_disk_key disk_key; 2742 2743 nritems = nritems - mid; 2744 btrfs_set_header_nritems(right, nritems); 2745 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2746 2747 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2748 btrfs_item_nr_offset(mid), 2749 nritems * sizeof(struct btrfs_item)); 2750 2751 copy_extent_buffer(right, l, 2752 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2753 data_copy_size, btrfs_leaf_data(l) + 2754 leaf_data_end(root, l), data_copy_size); 2755 2756 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2757 btrfs_item_end_nr(l, mid); 2758 2759 for (i = 0; i < nritems; i++) { 2760 struct btrfs_item *item = btrfs_item_nr(right, i); 2761 u32 ioff; 2762 2763 ioff = btrfs_item_offset(right, item); 2764 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2765 } 2766 2767 btrfs_set_header_nritems(l, mid); 2768 ret = 0; 2769 btrfs_item_key(right, &disk_key, 0); 2770 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2771 path->slots[1] + 1, 1); 2772 if (wret) 2773 ret = wret; 2774 2775 btrfs_mark_buffer_dirty(right); 2776 btrfs_mark_buffer_dirty(l); 2777 BUG_ON(path->slots[0] != slot); 2778 2779 if (mid <= slot) { 2780 btrfs_tree_unlock(path->nodes[0]); 2781 free_extent_buffer(path->nodes[0]); 2782 path->nodes[0] = right; 2783 path->slots[0] -= mid; 2784 path->slots[1] += 1; 2785 } else { 2786 btrfs_tree_unlock(right); 2787 free_extent_buffer(right); 2788 } 2789 2790 BUG_ON(path->slots[0] < 0); 2791 2792 return ret; 2793 } 2794 2795 /* 2796 * double splits happen when we need to insert a big item in the middle 2797 * of a leaf. A double split can leave us with 3 mostly empty leaves: 2798 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 2799 * A B C 2800 * 2801 * We avoid this by trying to push the items on either side of our target 2802 * into the adjacent leaves. If all goes well we can avoid the double split 2803 * completely. 2804 */ 2805 static noinline int push_for_double_split(struct btrfs_trans_handle *trans, 2806 struct btrfs_root *root, 2807 struct btrfs_path *path, 2808 int data_size) 2809 { 2810 int ret; 2811 int progress = 0; 2812 int slot; 2813 u32 nritems; 2814 2815 slot = path->slots[0]; 2816 2817 /* 2818 * try to push all the items after our slot into the 2819 * right leaf 2820 */ 2821 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 2822 if (ret < 0) 2823 return ret; 2824 2825 if (ret == 0) 2826 progress++; 2827 2828 nritems = btrfs_header_nritems(path->nodes[0]); 2829 /* 2830 * our goal is to get our slot at the start or end of a leaf. If 2831 * we've done so we're done 2832 */ 2833 if (path->slots[0] == 0 || path->slots[0] == nritems) 2834 return 0; 2835 2836 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 2837 return 0; 2838 2839 /* try to push all the items before our slot into the next leaf */ 2840 slot = path->slots[0]; 2841 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 2842 if (ret < 0) 2843 return ret; 2844 2845 if (ret == 0) 2846 progress++; 2847 2848 if (progress) 2849 return 0; 2850 return 1; 2851 } 2852 2853 /* 2854 * split the path's leaf in two, making sure there is at least data_size 2855 * available for the resulting leaf level of the path. 2856 * 2857 * returns 0 if all went well and < 0 on failure. 2858 */ 2859 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2860 struct btrfs_root *root, 2861 struct btrfs_key *ins_key, 2862 struct btrfs_path *path, int data_size, 2863 int extend) 2864 { 2865 struct btrfs_disk_key disk_key; 2866 struct extent_buffer *l; 2867 u32 nritems; 2868 int mid; 2869 int slot; 2870 struct extent_buffer *right; 2871 int ret = 0; 2872 int wret; 2873 int split; 2874 int num_doubles = 0; 2875 int tried_avoid_double = 0; 2876 2877 l = path->nodes[0]; 2878 slot = path->slots[0]; 2879 if (extend && data_size + btrfs_item_size_nr(l, slot) + 2880 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) 2881 return -EOVERFLOW; 2882 2883 /* first try to make some room by pushing left and right */ 2884 if (data_size) { 2885 wret = push_leaf_right(trans, root, path, data_size, 2886 data_size, 0, 0); 2887 if (wret < 0) 2888 return wret; 2889 if (wret) { 2890 wret = push_leaf_left(trans, root, path, data_size, 2891 data_size, 0, (u32)-1); 2892 if (wret < 0) 2893 return wret; 2894 } 2895 l = path->nodes[0]; 2896 2897 /* did the pushes work? */ 2898 if (btrfs_leaf_free_space(root, l) >= data_size) 2899 return 0; 2900 } 2901 2902 if (!path->nodes[1]) { 2903 ret = insert_new_root(trans, root, path, 1); 2904 if (ret) 2905 return ret; 2906 } 2907 again: 2908 split = 1; 2909 l = path->nodes[0]; 2910 slot = path->slots[0]; 2911 nritems = btrfs_header_nritems(l); 2912 mid = (nritems + 1) / 2; 2913 2914 if (mid <= slot) { 2915 if (nritems == 1 || 2916 leaf_space_used(l, mid, nritems - mid) + data_size > 2917 BTRFS_LEAF_DATA_SIZE(root)) { 2918 if (slot >= nritems) { 2919 split = 0; 2920 } else { 2921 mid = slot; 2922 if (mid != nritems && 2923 leaf_space_used(l, mid, nritems - mid) + 2924 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2925 if (data_size && !tried_avoid_double) 2926 goto push_for_double; 2927 split = 2; 2928 } 2929 } 2930 } 2931 } else { 2932 if (leaf_space_used(l, 0, mid) + data_size > 2933 BTRFS_LEAF_DATA_SIZE(root)) { 2934 if (!extend && data_size && slot == 0) { 2935 split = 0; 2936 } else if ((extend || !data_size) && slot == 0) { 2937 mid = 1; 2938 } else { 2939 mid = slot; 2940 if (mid != nritems && 2941 leaf_space_used(l, mid, nritems - mid) + 2942 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2943 if (data_size && !tried_avoid_double) 2944 goto push_for_double; 2945 split = 2 ; 2946 } 2947 } 2948 } 2949 } 2950 2951 if (split == 0) 2952 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2953 else 2954 btrfs_item_key(l, &disk_key, mid); 2955 2956 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 2957 root->root_key.objectid, 2958 &disk_key, 0, l->start, 0); 2959 if (IS_ERR(right)) 2960 return PTR_ERR(right); 2961 2962 root_add_used(root, root->leafsize); 2963 2964 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2965 btrfs_set_header_bytenr(right, right->start); 2966 btrfs_set_header_generation(right, trans->transid); 2967 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 2968 btrfs_set_header_owner(right, root->root_key.objectid); 2969 btrfs_set_header_level(right, 0); 2970 write_extent_buffer(right, root->fs_info->fsid, 2971 (unsigned long)btrfs_header_fsid(right), 2972 BTRFS_FSID_SIZE); 2973 2974 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2975 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2976 BTRFS_UUID_SIZE); 2977 2978 if (split == 0) { 2979 if (mid <= slot) { 2980 btrfs_set_header_nritems(right, 0); 2981 wret = insert_ptr(trans, root, path, 2982 &disk_key, right->start, 2983 path->slots[1] + 1, 1); 2984 if (wret) 2985 ret = wret; 2986 2987 btrfs_tree_unlock(path->nodes[0]); 2988 free_extent_buffer(path->nodes[0]); 2989 path->nodes[0] = right; 2990 path->slots[0] = 0; 2991 path->slots[1] += 1; 2992 } else { 2993 btrfs_set_header_nritems(right, 0); 2994 wret = insert_ptr(trans, root, path, 2995 &disk_key, 2996 right->start, 2997 path->slots[1], 1); 2998 if (wret) 2999 ret = wret; 3000 btrfs_tree_unlock(path->nodes[0]); 3001 free_extent_buffer(path->nodes[0]); 3002 path->nodes[0] = right; 3003 path->slots[0] = 0; 3004 if (path->slots[1] == 0) { 3005 wret = fixup_low_keys(trans, root, 3006 path, &disk_key, 1); 3007 if (wret) 3008 ret = wret; 3009 } 3010 } 3011 btrfs_mark_buffer_dirty(right); 3012 return ret; 3013 } 3014 3015 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 3016 BUG_ON(ret); 3017 3018 if (split == 2) { 3019 BUG_ON(num_doubles != 0); 3020 num_doubles++; 3021 goto again; 3022 } 3023 3024 return ret; 3025 3026 push_for_double: 3027 push_for_double_split(trans, root, path, data_size); 3028 tried_avoid_double = 1; 3029 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 3030 return 0; 3031 goto again; 3032 } 3033 3034 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3035 struct btrfs_root *root, 3036 struct btrfs_path *path, int ins_len) 3037 { 3038 struct btrfs_key key; 3039 struct extent_buffer *leaf; 3040 struct btrfs_file_extent_item *fi; 3041 u64 extent_len = 0; 3042 u32 item_size; 3043 int ret; 3044 3045 leaf = path->nodes[0]; 3046 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3047 3048 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 3049 key.type != BTRFS_EXTENT_CSUM_KEY); 3050 3051 if (btrfs_leaf_free_space(root, leaf) >= ins_len) 3052 return 0; 3053 3054 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3055 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3056 fi = btrfs_item_ptr(leaf, path->slots[0], 3057 struct btrfs_file_extent_item); 3058 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3059 } 3060 btrfs_release_path(path); 3061 3062 path->keep_locks = 1; 3063 path->search_for_split = 1; 3064 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 3065 path->search_for_split = 0; 3066 if (ret < 0) 3067 goto err; 3068 3069 ret = -EAGAIN; 3070 leaf = path->nodes[0]; 3071 /* if our item isn't there or got smaller, return now */ 3072 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3073 goto err; 3074 3075 /* the leaf has changed, it now has room. return now */ 3076 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) 3077 goto err; 3078 3079 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3080 fi = btrfs_item_ptr(leaf, path->slots[0], 3081 struct btrfs_file_extent_item); 3082 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 3083 goto err; 3084 } 3085 3086 btrfs_set_path_blocking(path); 3087 ret = split_leaf(trans, root, &key, path, ins_len, 1); 3088 if (ret) 3089 goto err; 3090 3091 path->keep_locks = 0; 3092 btrfs_unlock_up_safe(path, 1); 3093 return 0; 3094 err: 3095 path->keep_locks = 0; 3096 return ret; 3097 } 3098 3099 static noinline int split_item(struct btrfs_trans_handle *trans, 3100 struct btrfs_root *root, 3101 struct btrfs_path *path, 3102 struct btrfs_key *new_key, 3103 unsigned long split_offset) 3104 { 3105 struct extent_buffer *leaf; 3106 struct btrfs_item *item; 3107 struct btrfs_item *new_item; 3108 int slot; 3109 char *buf; 3110 u32 nritems; 3111 u32 item_size; 3112 u32 orig_offset; 3113 struct btrfs_disk_key disk_key; 3114 3115 leaf = path->nodes[0]; 3116 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3117 3118 btrfs_set_path_blocking(path); 3119 3120 item = btrfs_item_nr(leaf, path->slots[0]); 3121 orig_offset = btrfs_item_offset(leaf, item); 3122 item_size = btrfs_item_size(leaf, item); 3123 3124 buf = kmalloc(item_size, GFP_NOFS); 3125 if (!buf) 3126 return -ENOMEM; 3127 3128 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3129 path->slots[0]), item_size); 3130 3131 slot = path->slots[0] + 1; 3132 nritems = btrfs_header_nritems(leaf); 3133 if (slot != nritems) { 3134 /* shift the items */ 3135 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3136 btrfs_item_nr_offset(slot), 3137 (nritems - slot) * sizeof(struct btrfs_item)); 3138 } 3139 3140 btrfs_cpu_key_to_disk(&disk_key, new_key); 3141 btrfs_set_item_key(leaf, &disk_key, slot); 3142 3143 new_item = btrfs_item_nr(leaf, slot); 3144 3145 btrfs_set_item_offset(leaf, new_item, orig_offset); 3146 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3147 3148 btrfs_set_item_offset(leaf, item, 3149 orig_offset + item_size - split_offset); 3150 btrfs_set_item_size(leaf, item, split_offset); 3151 3152 btrfs_set_header_nritems(leaf, nritems + 1); 3153 3154 /* write the data for the start of the original item */ 3155 write_extent_buffer(leaf, buf, 3156 btrfs_item_ptr_offset(leaf, path->slots[0]), 3157 split_offset); 3158 3159 /* write the data for the new item */ 3160 write_extent_buffer(leaf, buf + split_offset, 3161 btrfs_item_ptr_offset(leaf, slot), 3162 item_size - split_offset); 3163 btrfs_mark_buffer_dirty(leaf); 3164 3165 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 3166 kfree(buf); 3167 return 0; 3168 } 3169 3170 /* 3171 * This function splits a single item into two items, 3172 * giving 'new_key' to the new item and splitting the 3173 * old one at split_offset (from the start of the item). 3174 * 3175 * The path may be released by this operation. After 3176 * the split, the path is pointing to the old item. The 3177 * new item is going to be in the same node as the old one. 3178 * 3179 * Note, the item being split must be smaller enough to live alone on 3180 * a tree block with room for one extra struct btrfs_item 3181 * 3182 * This allows us to split the item in place, keeping a lock on the 3183 * leaf the entire time. 3184 */ 3185 int btrfs_split_item(struct btrfs_trans_handle *trans, 3186 struct btrfs_root *root, 3187 struct btrfs_path *path, 3188 struct btrfs_key *new_key, 3189 unsigned long split_offset) 3190 { 3191 int ret; 3192 ret = setup_leaf_for_split(trans, root, path, 3193 sizeof(struct btrfs_item)); 3194 if (ret) 3195 return ret; 3196 3197 ret = split_item(trans, root, path, new_key, split_offset); 3198 return ret; 3199 } 3200 3201 /* 3202 * This function duplicate a item, giving 'new_key' to the new item. 3203 * It guarantees both items live in the same tree leaf and the new item 3204 * is contiguous with the original item. 3205 * 3206 * This allows us to split file extent in place, keeping a lock on the 3207 * leaf the entire time. 3208 */ 3209 int btrfs_duplicate_item(struct btrfs_trans_handle *trans, 3210 struct btrfs_root *root, 3211 struct btrfs_path *path, 3212 struct btrfs_key *new_key) 3213 { 3214 struct extent_buffer *leaf; 3215 int ret; 3216 u32 item_size; 3217 3218 leaf = path->nodes[0]; 3219 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3220 ret = setup_leaf_for_split(trans, root, path, 3221 item_size + sizeof(struct btrfs_item)); 3222 if (ret) 3223 return ret; 3224 3225 path->slots[0]++; 3226 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3227 item_size, item_size + 3228 sizeof(struct btrfs_item), 1); 3229 BUG_ON(ret); 3230 3231 leaf = path->nodes[0]; 3232 memcpy_extent_buffer(leaf, 3233 btrfs_item_ptr_offset(leaf, path->slots[0]), 3234 btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 3235 item_size); 3236 return 0; 3237 } 3238 3239 /* 3240 * make the item pointed to by the path smaller. new_size indicates 3241 * how small to make it, and from_end tells us if we just chop bytes 3242 * off the end of the item or if we shift the item to chop bytes off 3243 * the front. 3244 */ 3245 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3246 struct btrfs_root *root, 3247 struct btrfs_path *path, 3248 u32 new_size, int from_end) 3249 { 3250 int slot; 3251 struct extent_buffer *leaf; 3252 struct btrfs_item *item; 3253 u32 nritems; 3254 unsigned int data_end; 3255 unsigned int old_data_start; 3256 unsigned int old_size; 3257 unsigned int size_diff; 3258 int i; 3259 3260 leaf = path->nodes[0]; 3261 slot = path->slots[0]; 3262 3263 old_size = btrfs_item_size_nr(leaf, slot); 3264 if (old_size == new_size) 3265 return 0; 3266 3267 nritems = btrfs_header_nritems(leaf); 3268 data_end = leaf_data_end(root, leaf); 3269 3270 old_data_start = btrfs_item_offset_nr(leaf, slot); 3271 3272 size_diff = old_size - new_size; 3273 3274 BUG_ON(slot < 0); 3275 BUG_ON(slot >= nritems); 3276 3277 /* 3278 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3279 */ 3280 /* first correct the data pointers */ 3281 for (i = slot; i < nritems; i++) { 3282 u32 ioff; 3283 item = btrfs_item_nr(leaf, i); 3284 3285 ioff = btrfs_item_offset(leaf, item); 3286 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3287 } 3288 3289 /* shift the data */ 3290 if (from_end) { 3291 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3292 data_end + size_diff, btrfs_leaf_data(leaf) + 3293 data_end, old_data_start + new_size - data_end); 3294 } else { 3295 struct btrfs_disk_key disk_key; 3296 u64 offset; 3297 3298 btrfs_item_key(leaf, &disk_key, slot); 3299 3300 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3301 unsigned long ptr; 3302 struct btrfs_file_extent_item *fi; 3303 3304 fi = btrfs_item_ptr(leaf, slot, 3305 struct btrfs_file_extent_item); 3306 fi = (struct btrfs_file_extent_item *)( 3307 (unsigned long)fi - size_diff); 3308 3309 if (btrfs_file_extent_type(leaf, fi) == 3310 BTRFS_FILE_EXTENT_INLINE) { 3311 ptr = btrfs_item_ptr_offset(leaf, slot); 3312 memmove_extent_buffer(leaf, ptr, 3313 (unsigned long)fi, 3314 offsetof(struct btrfs_file_extent_item, 3315 disk_bytenr)); 3316 } 3317 } 3318 3319 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3320 data_end + size_diff, btrfs_leaf_data(leaf) + 3321 data_end, old_data_start - data_end); 3322 3323 offset = btrfs_disk_key_offset(&disk_key); 3324 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3325 btrfs_set_item_key(leaf, &disk_key, slot); 3326 if (slot == 0) 3327 fixup_low_keys(trans, root, path, &disk_key, 1); 3328 } 3329 3330 item = btrfs_item_nr(leaf, slot); 3331 btrfs_set_item_size(leaf, item, new_size); 3332 btrfs_mark_buffer_dirty(leaf); 3333 3334 if (btrfs_leaf_free_space(root, leaf) < 0) { 3335 btrfs_print_leaf(root, leaf); 3336 BUG(); 3337 } 3338 return 0; 3339 } 3340 3341 /* 3342 * make the item pointed to by the path bigger, data_size is the new size. 3343 */ 3344 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3345 struct btrfs_root *root, struct btrfs_path *path, 3346 u32 data_size) 3347 { 3348 int slot; 3349 struct extent_buffer *leaf; 3350 struct btrfs_item *item; 3351 u32 nritems; 3352 unsigned int data_end; 3353 unsigned int old_data; 3354 unsigned int old_size; 3355 int i; 3356 3357 leaf = path->nodes[0]; 3358 3359 nritems = btrfs_header_nritems(leaf); 3360 data_end = leaf_data_end(root, leaf); 3361 3362 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3363 btrfs_print_leaf(root, leaf); 3364 BUG(); 3365 } 3366 slot = path->slots[0]; 3367 old_data = btrfs_item_end_nr(leaf, slot); 3368 3369 BUG_ON(slot < 0); 3370 if (slot >= nritems) { 3371 btrfs_print_leaf(root, leaf); 3372 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3373 slot, nritems); 3374 BUG_ON(1); 3375 } 3376 3377 /* 3378 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3379 */ 3380 /* first correct the data pointers */ 3381 for (i = slot; i < nritems; i++) { 3382 u32 ioff; 3383 item = btrfs_item_nr(leaf, i); 3384 3385 ioff = btrfs_item_offset(leaf, item); 3386 btrfs_set_item_offset(leaf, item, ioff - data_size); 3387 } 3388 3389 /* shift the data */ 3390 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3391 data_end - data_size, btrfs_leaf_data(leaf) + 3392 data_end, old_data - data_end); 3393 3394 data_end = old_data; 3395 old_size = btrfs_item_size_nr(leaf, slot); 3396 item = btrfs_item_nr(leaf, slot); 3397 btrfs_set_item_size(leaf, item, old_size + data_size); 3398 btrfs_mark_buffer_dirty(leaf); 3399 3400 if (btrfs_leaf_free_space(root, leaf) < 0) { 3401 btrfs_print_leaf(root, leaf); 3402 BUG(); 3403 } 3404 return 0; 3405 } 3406 3407 /* 3408 * Given a key and some data, insert items into the tree. 3409 * This does all the path init required, making room in the tree if needed. 3410 * Returns the number of keys that were inserted. 3411 */ 3412 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3413 struct btrfs_root *root, 3414 struct btrfs_path *path, 3415 struct btrfs_key *cpu_key, u32 *data_size, 3416 int nr) 3417 { 3418 struct extent_buffer *leaf; 3419 struct btrfs_item *item; 3420 int ret = 0; 3421 int slot; 3422 int i; 3423 u32 nritems; 3424 u32 total_data = 0; 3425 u32 total_size = 0; 3426 unsigned int data_end; 3427 struct btrfs_disk_key disk_key; 3428 struct btrfs_key found_key; 3429 3430 for (i = 0; i < nr; i++) { 3431 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3432 BTRFS_LEAF_DATA_SIZE(root)) { 3433 break; 3434 nr = i; 3435 } 3436 total_data += data_size[i]; 3437 total_size += data_size[i] + sizeof(struct btrfs_item); 3438 } 3439 BUG_ON(nr == 0); 3440 3441 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3442 if (ret == 0) 3443 return -EEXIST; 3444 if (ret < 0) 3445 goto out; 3446 3447 leaf = path->nodes[0]; 3448 3449 nritems = btrfs_header_nritems(leaf); 3450 data_end = leaf_data_end(root, leaf); 3451 3452 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3453 for (i = nr; i >= 0; i--) { 3454 total_data -= data_size[i]; 3455 total_size -= data_size[i] + sizeof(struct btrfs_item); 3456 if (total_size < btrfs_leaf_free_space(root, leaf)) 3457 break; 3458 } 3459 nr = i; 3460 } 3461 3462 slot = path->slots[0]; 3463 BUG_ON(slot < 0); 3464 3465 if (slot != nritems) { 3466 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3467 3468 item = btrfs_item_nr(leaf, slot); 3469 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3470 3471 /* figure out how many keys we can insert in here */ 3472 total_data = data_size[0]; 3473 for (i = 1; i < nr; i++) { 3474 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3475 break; 3476 total_data += data_size[i]; 3477 } 3478 nr = i; 3479 3480 if (old_data < data_end) { 3481 btrfs_print_leaf(root, leaf); 3482 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3483 slot, old_data, data_end); 3484 BUG_ON(1); 3485 } 3486 /* 3487 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3488 */ 3489 /* first correct the data pointers */ 3490 for (i = slot; i < nritems; i++) { 3491 u32 ioff; 3492 3493 item = btrfs_item_nr(leaf, i); 3494 ioff = btrfs_item_offset(leaf, item); 3495 btrfs_set_item_offset(leaf, item, ioff - total_data); 3496 } 3497 /* shift the items */ 3498 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3499 btrfs_item_nr_offset(slot), 3500 (nritems - slot) * sizeof(struct btrfs_item)); 3501 3502 /* shift the data */ 3503 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3504 data_end - total_data, btrfs_leaf_data(leaf) + 3505 data_end, old_data - data_end); 3506 data_end = old_data; 3507 } else { 3508 /* 3509 * this sucks but it has to be done, if we are inserting at 3510 * the end of the leaf only insert 1 of the items, since we 3511 * have no way of knowing whats on the next leaf and we'd have 3512 * to drop our current locks to figure it out 3513 */ 3514 nr = 1; 3515 } 3516 3517 /* setup the item for the new data */ 3518 for (i = 0; i < nr; i++) { 3519 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3520 btrfs_set_item_key(leaf, &disk_key, slot + i); 3521 item = btrfs_item_nr(leaf, slot + i); 3522 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3523 data_end -= data_size[i]; 3524 btrfs_set_item_size(leaf, item, data_size[i]); 3525 } 3526 btrfs_set_header_nritems(leaf, nritems + nr); 3527 btrfs_mark_buffer_dirty(leaf); 3528 3529 ret = 0; 3530 if (slot == 0) { 3531 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3532 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3533 } 3534 3535 if (btrfs_leaf_free_space(root, leaf) < 0) { 3536 btrfs_print_leaf(root, leaf); 3537 BUG(); 3538 } 3539 out: 3540 if (!ret) 3541 ret = nr; 3542 return ret; 3543 } 3544 3545 /* 3546 * this is a helper for btrfs_insert_empty_items, the main goal here is 3547 * to save stack depth by doing the bulk of the work in a function 3548 * that doesn't call btrfs_search_slot 3549 */ 3550 int setup_items_for_insert(struct btrfs_trans_handle *trans, 3551 struct btrfs_root *root, struct btrfs_path *path, 3552 struct btrfs_key *cpu_key, u32 *data_size, 3553 u32 total_data, u32 total_size, int nr) 3554 { 3555 struct btrfs_item *item; 3556 int i; 3557 u32 nritems; 3558 unsigned int data_end; 3559 struct btrfs_disk_key disk_key; 3560 int ret; 3561 struct extent_buffer *leaf; 3562 int slot; 3563 3564 leaf = path->nodes[0]; 3565 slot = path->slots[0]; 3566 3567 nritems = btrfs_header_nritems(leaf); 3568 data_end = leaf_data_end(root, leaf); 3569 3570 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3571 btrfs_print_leaf(root, leaf); 3572 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3573 total_size, btrfs_leaf_free_space(root, leaf)); 3574 BUG(); 3575 } 3576 3577 if (slot != nritems) { 3578 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3579 3580 if (old_data < data_end) { 3581 btrfs_print_leaf(root, leaf); 3582 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3583 slot, old_data, data_end); 3584 BUG_ON(1); 3585 } 3586 /* 3587 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3588 */ 3589 /* first correct the data pointers */ 3590 for (i = slot; i < nritems; i++) { 3591 u32 ioff; 3592 3593 item = btrfs_item_nr(leaf, i); 3594 ioff = btrfs_item_offset(leaf, item); 3595 btrfs_set_item_offset(leaf, item, ioff - total_data); 3596 } 3597 /* shift the items */ 3598 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3599 btrfs_item_nr_offset(slot), 3600 (nritems - slot) * sizeof(struct btrfs_item)); 3601 3602 /* shift the data */ 3603 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3604 data_end - total_data, btrfs_leaf_data(leaf) + 3605 data_end, old_data - data_end); 3606 data_end = old_data; 3607 } 3608 3609 /* setup the item for the new data */ 3610 for (i = 0; i < nr; i++) { 3611 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3612 btrfs_set_item_key(leaf, &disk_key, slot + i); 3613 item = btrfs_item_nr(leaf, slot + i); 3614 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3615 data_end -= data_size[i]; 3616 btrfs_set_item_size(leaf, item, data_size[i]); 3617 } 3618 3619 btrfs_set_header_nritems(leaf, nritems + nr); 3620 3621 ret = 0; 3622 if (slot == 0) { 3623 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3624 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3625 } 3626 btrfs_unlock_up_safe(path, 1); 3627 btrfs_mark_buffer_dirty(leaf); 3628 3629 if (btrfs_leaf_free_space(root, leaf) < 0) { 3630 btrfs_print_leaf(root, leaf); 3631 BUG(); 3632 } 3633 return ret; 3634 } 3635 3636 /* 3637 * Given a key and some data, insert items into the tree. 3638 * This does all the path init required, making room in the tree if needed. 3639 */ 3640 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3641 struct btrfs_root *root, 3642 struct btrfs_path *path, 3643 struct btrfs_key *cpu_key, u32 *data_size, 3644 int nr) 3645 { 3646 int ret = 0; 3647 int slot; 3648 int i; 3649 u32 total_size = 0; 3650 u32 total_data = 0; 3651 3652 for (i = 0; i < nr; i++) 3653 total_data += data_size[i]; 3654 3655 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3656 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3657 if (ret == 0) 3658 return -EEXIST; 3659 if (ret < 0) 3660 goto out; 3661 3662 slot = path->slots[0]; 3663 BUG_ON(slot < 0); 3664 3665 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3666 total_data, total_size, nr); 3667 3668 out: 3669 return ret; 3670 } 3671 3672 /* 3673 * Given a key and some data, insert an item into the tree. 3674 * This does all the path init required, making room in the tree if needed. 3675 */ 3676 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3677 *root, struct btrfs_key *cpu_key, void *data, u32 3678 data_size) 3679 { 3680 int ret = 0; 3681 struct btrfs_path *path; 3682 struct extent_buffer *leaf; 3683 unsigned long ptr; 3684 3685 path = btrfs_alloc_path(); 3686 if (!path) 3687 return -ENOMEM; 3688 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3689 if (!ret) { 3690 leaf = path->nodes[0]; 3691 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3692 write_extent_buffer(leaf, data, ptr, data_size); 3693 btrfs_mark_buffer_dirty(leaf); 3694 } 3695 btrfs_free_path(path); 3696 return ret; 3697 } 3698 3699 /* 3700 * delete the pointer from a given node. 3701 * 3702 * the tree should have been previously balanced so the deletion does not 3703 * empty a node. 3704 */ 3705 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3706 struct btrfs_path *path, int level, int slot) 3707 { 3708 struct extent_buffer *parent = path->nodes[level]; 3709 u32 nritems; 3710 int ret = 0; 3711 int wret; 3712 3713 nritems = btrfs_header_nritems(parent); 3714 if (slot != nritems - 1) { 3715 memmove_extent_buffer(parent, 3716 btrfs_node_key_ptr_offset(slot), 3717 btrfs_node_key_ptr_offset(slot + 1), 3718 sizeof(struct btrfs_key_ptr) * 3719 (nritems - slot - 1)); 3720 } 3721 nritems--; 3722 btrfs_set_header_nritems(parent, nritems); 3723 if (nritems == 0 && parent == root->node) { 3724 BUG_ON(btrfs_header_level(root->node) != 1); 3725 /* just turn the root into a leaf and break */ 3726 btrfs_set_header_level(root->node, 0); 3727 } else if (slot == 0) { 3728 struct btrfs_disk_key disk_key; 3729 3730 btrfs_node_key(parent, &disk_key, 0); 3731 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3732 if (wret) 3733 ret = wret; 3734 } 3735 btrfs_mark_buffer_dirty(parent); 3736 return ret; 3737 } 3738 3739 /* 3740 * a helper function to delete the leaf pointed to by path->slots[1] and 3741 * path->nodes[1]. 3742 * 3743 * This deletes the pointer in path->nodes[1] and frees the leaf 3744 * block extent. zero is returned if it all worked out, < 0 otherwise. 3745 * 3746 * The path must have already been setup for deleting the leaf, including 3747 * all the proper balancing. path->nodes[1] must be locked. 3748 */ 3749 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3750 struct btrfs_root *root, 3751 struct btrfs_path *path, 3752 struct extent_buffer *leaf) 3753 { 3754 int ret; 3755 3756 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3757 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3758 if (ret) 3759 return ret; 3760 3761 /* 3762 * btrfs_free_extent is expensive, we want to make sure we 3763 * aren't holding any locks when we call it 3764 */ 3765 btrfs_unlock_up_safe(path, 0); 3766 3767 root_sub_used(root, leaf->len); 3768 3769 btrfs_free_tree_block(trans, root, leaf, 0, 1); 3770 return 0; 3771 } 3772 /* 3773 * delete the item at the leaf level in path. If that empties 3774 * the leaf, remove it from the tree 3775 */ 3776 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3777 struct btrfs_path *path, int slot, int nr) 3778 { 3779 struct extent_buffer *leaf; 3780 struct btrfs_item *item; 3781 int last_off; 3782 int dsize = 0; 3783 int ret = 0; 3784 int wret; 3785 int i; 3786 u32 nritems; 3787 3788 leaf = path->nodes[0]; 3789 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3790 3791 for (i = 0; i < nr; i++) 3792 dsize += btrfs_item_size_nr(leaf, slot + i); 3793 3794 nritems = btrfs_header_nritems(leaf); 3795 3796 if (slot + nr != nritems) { 3797 int data_end = leaf_data_end(root, leaf); 3798 3799 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3800 data_end + dsize, 3801 btrfs_leaf_data(leaf) + data_end, 3802 last_off - data_end); 3803 3804 for (i = slot + nr; i < nritems; i++) { 3805 u32 ioff; 3806 3807 item = btrfs_item_nr(leaf, i); 3808 ioff = btrfs_item_offset(leaf, item); 3809 btrfs_set_item_offset(leaf, item, ioff + dsize); 3810 } 3811 3812 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3813 btrfs_item_nr_offset(slot + nr), 3814 sizeof(struct btrfs_item) * 3815 (nritems - slot - nr)); 3816 } 3817 btrfs_set_header_nritems(leaf, nritems - nr); 3818 nritems -= nr; 3819 3820 /* delete the leaf if we've emptied it */ 3821 if (nritems == 0) { 3822 if (leaf == root->node) { 3823 btrfs_set_header_level(leaf, 0); 3824 } else { 3825 btrfs_set_path_blocking(path); 3826 clean_tree_block(trans, root, leaf); 3827 ret = btrfs_del_leaf(trans, root, path, leaf); 3828 BUG_ON(ret); 3829 } 3830 } else { 3831 int used = leaf_space_used(leaf, 0, nritems); 3832 if (slot == 0) { 3833 struct btrfs_disk_key disk_key; 3834 3835 btrfs_item_key(leaf, &disk_key, 0); 3836 wret = fixup_low_keys(trans, root, path, 3837 &disk_key, 1); 3838 if (wret) 3839 ret = wret; 3840 } 3841 3842 /* delete the leaf if it is mostly empty */ 3843 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 3844 /* push_leaf_left fixes the path. 3845 * make sure the path still points to our leaf 3846 * for possible call to del_ptr below 3847 */ 3848 slot = path->slots[1]; 3849 extent_buffer_get(leaf); 3850 3851 btrfs_set_path_blocking(path); 3852 wret = push_leaf_left(trans, root, path, 1, 1, 3853 1, (u32)-1); 3854 if (wret < 0 && wret != -ENOSPC) 3855 ret = wret; 3856 3857 if (path->nodes[0] == leaf && 3858 btrfs_header_nritems(leaf)) { 3859 wret = push_leaf_right(trans, root, path, 1, 3860 1, 1, 0); 3861 if (wret < 0 && wret != -ENOSPC) 3862 ret = wret; 3863 } 3864 3865 if (btrfs_header_nritems(leaf) == 0) { 3866 path->slots[1] = slot; 3867 ret = btrfs_del_leaf(trans, root, path, leaf); 3868 BUG_ON(ret); 3869 free_extent_buffer(leaf); 3870 } else { 3871 /* if we're still in the path, make sure 3872 * we're dirty. Otherwise, one of the 3873 * push_leaf functions must have already 3874 * dirtied this buffer 3875 */ 3876 if (path->nodes[0] == leaf) 3877 btrfs_mark_buffer_dirty(leaf); 3878 free_extent_buffer(leaf); 3879 } 3880 } else { 3881 btrfs_mark_buffer_dirty(leaf); 3882 } 3883 } 3884 return ret; 3885 } 3886 3887 /* 3888 * search the tree again to find a leaf with lesser keys 3889 * returns 0 if it found something or 1 if there are no lesser leaves. 3890 * returns < 0 on io errors. 3891 * 3892 * This may release the path, and so you may lose any locks held at the 3893 * time you call it. 3894 */ 3895 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3896 { 3897 struct btrfs_key key; 3898 struct btrfs_disk_key found_key; 3899 int ret; 3900 3901 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 3902 3903 if (key.offset > 0) 3904 key.offset--; 3905 else if (key.type > 0) 3906 key.type--; 3907 else if (key.objectid > 0) 3908 key.objectid--; 3909 else 3910 return 1; 3911 3912 btrfs_release_path(path); 3913 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3914 if (ret < 0) 3915 return ret; 3916 btrfs_item_key(path->nodes[0], &found_key, 0); 3917 ret = comp_keys(&found_key, &key); 3918 if (ret < 0) 3919 return 0; 3920 return 1; 3921 } 3922 3923 /* 3924 * A helper function to walk down the tree starting at min_key, and looking 3925 * for nodes or leaves that are either in cache or have a minimum 3926 * transaction id. This is used by the btree defrag code, and tree logging 3927 * 3928 * This does not cow, but it does stuff the starting key it finds back 3929 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3930 * key and get a writable path. 3931 * 3932 * This does lock as it descends, and path->keep_locks should be set 3933 * to 1 by the caller. 3934 * 3935 * This honors path->lowest_level to prevent descent past a given level 3936 * of the tree. 3937 * 3938 * min_trans indicates the oldest transaction that you are interested 3939 * in walking through. Any nodes or leaves older than min_trans are 3940 * skipped over (without reading them). 3941 * 3942 * returns zero if something useful was found, < 0 on error and 1 if there 3943 * was nothing in the tree that matched the search criteria. 3944 */ 3945 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3946 struct btrfs_key *max_key, 3947 struct btrfs_path *path, int cache_only, 3948 u64 min_trans) 3949 { 3950 struct extent_buffer *cur; 3951 struct btrfs_key found_key; 3952 int slot; 3953 int sret; 3954 u32 nritems; 3955 int level; 3956 int ret = 1; 3957 3958 WARN_ON(!path->keep_locks); 3959 again: 3960 cur = btrfs_read_lock_root_node(root); 3961 level = btrfs_header_level(cur); 3962 WARN_ON(path->nodes[level]); 3963 path->nodes[level] = cur; 3964 path->locks[level] = BTRFS_READ_LOCK; 3965 3966 if (btrfs_header_generation(cur) < min_trans) { 3967 ret = 1; 3968 goto out; 3969 } 3970 while (1) { 3971 nritems = btrfs_header_nritems(cur); 3972 level = btrfs_header_level(cur); 3973 sret = bin_search(cur, min_key, level, &slot); 3974 3975 /* at the lowest level, we're done, setup the path and exit */ 3976 if (level == path->lowest_level) { 3977 if (slot >= nritems) 3978 goto find_next_key; 3979 ret = 0; 3980 path->slots[level] = slot; 3981 btrfs_item_key_to_cpu(cur, &found_key, slot); 3982 goto out; 3983 } 3984 if (sret && slot > 0) 3985 slot--; 3986 /* 3987 * check this node pointer against the cache_only and 3988 * min_trans parameters. If it isn't in cache or is too 3989 * old, skip to the next one. 3990 */ 3991 while (slot < nritems) { 3992 u64 blockptr; 3993 u64 gen; 3994 struct extent_buffer *tmp; 3995 struct btrfs_disk_key disk_key; 3996 3997 blockptr = btrfs_node_blockptr(cur, slot); 3998 gen = btrfs_node_ptr_generation(cur, slot); 3999 if (gen < min_trans) { 4000 slot++; 4001 continue; 4002 } 4003 if (!cache_only) 4004 break; 4005 4006 if (max_key) { 4007 btrfs_node_key(cur, &disk_key, slot); 4008 if (comp_keys(&disk_key, max_key) >= 0) { 4009 ret = 1; 4010 goto out; 4011 } 4012 } 4013 4014 tmp = btrfs_find_tree_block(root, blockptr, 4015 btrfs_level_size(root, level - 1)); 4016 4017 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 4018 free_extent_buffer(tmp); 4019 break; 4020 } 4021 if (tmp) 4022 free_extent_buffer(tmp); 4023 slot++; 4024 } 4025 find_next_key: 4026 /* 4027 * we didn't find a candidate key in this node, walk forward 4028 * and find another one 4029 */ 4030 if (slot >= nritems) { 4031 path->slots[level] = slot; 4032 btrfs_set_path_blocking(path); 4033 sret = btrfs_find_next_key(root, path, min_key, level, 4034 cache_only, min_trans); 4035 if (sret == 0) { 4036 btrfs_release_path(path); 4037 goto again; 4038 } else { 4039 goto out; 4040 } 4041 } 4042 /* save our key for returning back */ 4043 btrfs_node_key_to_cpu(cur, &found_key, slot); 4044 path->slots[level] = slot; 4045 if (level == path->lowest_level) { 4046 ret = 0; 4047 unlock_up(path, level, 1); 4048 goto out; 4049 } 4050 btrfs_set_path_blocking(path); 4051 cur = read_node_slot(root, cur, slot); 4052 BUG_ON(!cur); 4053 4054 btrfs_tree_read_lock(cur); 4055 4056 path->locks[level - 1] = BTRFS_READ_LOCK; 4057 path->nodes[level - 1] = cur; 4058 unlock_up(path, level, 1); 4059 btrfs_clear_path_blocking(path, NULL, 0); 4060 } 4061 out: 4062 if (ret == 0) 4063 memcpy(min_key, &found_key, sizeof(found_key)); 4064 btrfs_set_path_blocking(path); 4065 return ret; 4066 } 4067 4068 /* 4069 * this is similar to btrfs_next_leaf, but does not try to preserve 4070 * and fixup the path. It looks for and returns the next key in the 4071 * tree based on the current path and the cache_only and min_trans 4072 * parameters. 4073 * 4074 * 0 is returned if another key is found, < 0 if there are any errors 4075 * and 1 is returned if there are no higher keys in the tree 4076 * 4077 * path->keep_locks should be set to 1 on the search made before 4078 * calling this function. 4079 */ 4080 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4081 struct btrfs_key *key, int level, 4082 int cache_only, u64 min_trans) 4083 { 4084 int slot; 4085 struct extent_buffer *c; 4086 4087 WARN_ON(!path->keep_locks); 4088 while (level < BTRFS_MAX_LEVEL) { 4089 if (!path->nodes[level]) 4090 return 1; 4091 4092 slot = path->slots[level] + 1; 4093 c = path->nodes[level]; 4094 next: 4095 if (slot >= btrfs_header_nritems(c)) { 4096 int ret; 4097 int orig_lowest; 4098 struct btrfs_key cur_key; 4099 if (level + 1 >= BTRFS_MAX_LEVEL || 4100 !path->nodes[level + 1]) 4101 return 1; 4102 4103 if (path->locks[level + 1]) { 4104 level++; 4105 continue; 4106 } 4107 4108 slot = btrfs_header_nritems(c) - 1; 4109 if (level == 0) 4110 btrfs_item_key_to_cpu(c, &cur_key, slot); 4111 else 4112 btrfs_node_key_to_cpu(c, &cur_key, slot); 4113 4114 orig_lowest = path->lowest_level; 4115 btrfs_release_path(path); 4116 path->lowest_level = level; 4117 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4118 0, 0); 4119 path->lowest_level = orig_lowest; 4120 if (ret < 0) 4121 return ret; 4122 4123 c = path->nodes[level]; 4124 slot = path->slots[level]; 4125 if (ret == 0) 4126 slot++; 4127 goto next; 4128 } 4129 4130 if (level == 0) 4131 btrfs_item_key_to_cpu(c, key, slot); 4132 else { 4133 u64 blockptr = btrfs_node_blockptr(c, slot); 4134 u64 gen = btrfs_node_ptr_generation(c, slot); 4135 4136 if (cache_only) { 4137 struct extent_buffer *cur; 4138 cur = btrfs_find_tree_block(root, blockptr, 4139 btrfs_level_size(root, level - 1)); 4140 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 4141 slot++; 4142 if (cur) 4143 free_extent_buffer(cur); 4144 goto next; 4145 } 4146 free_extent_buffer(cur); 4147 } 4148 if (gen < min_trans) { 4149 slot++; 4150 goto next; 4151 } 4152 btrfs_node_key_to_cpu(c, key, slot); 4153 } 4154 return 0; 4155 } 4156 return 1; 4157 } 4158 4159 /* 4160 * search the tree again to find a leaf with greater keys 4161 * returns 0 if it found something or 1 if there are no greater leaves. 4162 * returns < 0 on io errors. 4163 */ 4164 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4165 { 4166 int slot; 4167 int level; 4168 struct extent_buffer *c; 4169 struct extent_buffer *next; 4170 struct btrfs_key key; 4171 u32 nritems; 4172 int ret; 4173 int old_spinning = path->leave_spinning; 4174 int next_rw_lock = 0; 4175 4176 nritems = btrfs_header_nritems(path->nodes[0]); 4177 if (nritems == 0) 4178 return 1; 4179 4180 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4181 again: 4182 level = 1; 4183 next = NULL; 4184 next_rw_lock = 0; 4185 btrfs_release_path(path); 4186 4187 path->keep_locks = 1; 4188 path->leave_spinning = 1; 4189 4190 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4191 path->keep_locks = 0; 4192 4193 if (ret < 0) 4194 return ret; 4195 4196 nritems = btrfs_header_nritems(path->nodes[0]); 4197 /* 4198 * by releasing the path above we dropped all our locks. A balance 4199 * could have added more items next to the key that used to be 4200 * at the very end of the block. So, check again here and 4201 * advance the path if there are now more items available. 4202 */ 4203 if (nritems > 0 && path->slots[0] < nritems - 1) { 4204 if (ret == 0) 4205 path->slots[0]++; 4206 ret = 0; 4207 goto done; 4208 } 4209 4210 while (level < BTRFS_MAX_LEVEL) { 4211 if (!path->nodes[level]) { 4212 ret = 1; 4213 goto done; 4214 } 4215 4216 slot = path->slots[level] + 1; 4217 c = path->nodes[level]; 4218 if (slot >= btrfs_header_nritems(c)) { 4219 level++; 4220 if (level == BTRFS_MAX_LEVEL) { 4221 ret = 1; 4222 goto done; 4223 } 4224 continue; 4225 } 4226 4227 if (next) { 4228 btrfs_tree_unlock_rw(next, next_rw_lock); 4229 free_extent_buffer(next); 4230 } 4231 4232 next = c; 4233 next_rw_lock = path->locks[level]; 4234 ret = read_block_for_search(NULL, root, path, &next, level, 4235 slot, &key); 4236 if (ret == -EAGAIN) 4237 goto again; 4238 4239 if (ret < 0) { 4240 btrfs_release_path(path); 4241 goto done; 4242 } 4243 4244 if (!path->skip_locking) { 4245 ret = btrfs_try_tree_read_lock(next); 4246 if (!ret) { 4247 btrfs_set_path_blocking(path); 4248 btrfs_tree_read_lock(next); 4249 btrfs_clear_path_blocking(path, next, 4250 BTRFS_READ_LOCK); 4251 } 4252 next_rw_lock = BTRFS_READ_LOCK; 4253 } 4254 break; 4255 } 4256 path->slots[level] = slot; 4257 while (1) { 4258 level--; 4259 c = path->nodes[level]; 4260 if (path->locks[level]) 4261 btrfs_tree_unlock_rw(c, path->locks[level]); 4262 4263 free_extent_buffer(c); 4264 path->nodes[level] = next; 4265 path->slots[level] = 0; 4266 if (!path->skip_locking) 4267 path->locks[level] = next_rw_lock; 4268 if (!level) 4269 break; 4270 4271 ret = read_block_for_search(NULL, root, path, &next, level, 4272 0, &key); 4273 if (ret == -EAGAIN) 4274 goto again; 4275 4276 if (ret < 0) { 4277 btrfs_release_path(path); 4278 goto done; 4279 } 4280 4281 if (!path->skip_locking) { 4282 ret = btrfs_try_tree_read_lock(next); 4283 if (!ret) { 4284 btrfs_set_path_blocking(path); 4285 btrfs_tree_read_lock(next); 4286 btrfs_clear_path_blocking(path, next, 4287 BTRFS_READ_LOCK); 4288 } 4289 next_rw_lock = BTRFS_READ_LOCK; 4290 } 4291 } 4292 ret = 0; 4293 done: 4294 unlock_up(path, 0, 1); 4295 path->leave_spinning = old_spinning; 4296 if (!old_spinning) 4297 btrfs_set_path_blocking(path); 4298 4299 return ret; 4300 } 4301 4302 /* 4303 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4304 * searching until it gets past min_objectid or finds an item of 'type' 4305 * 4306 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4307 */ 4308 int btrfs_previous_item(struct btrfs_root *root, 4309 struct btrfs_path *path, u64 min_objectid, 4310 int type) 4311 { 4312 struct btrfs_key found_key; 4313 struct extent_buffer *leaf; 4314 u32 nritems; 4315 int ret; 4316 4317 while (1) { 4318 if (path->slots[0] == 0) { 4319 btrfs_set_path_blocking(path); 4320 ret = btrfs_prev_leaf(root, path); 4321 if (ret != 0) 4322 return ret; 4323 } else { 4324 path->slots[0]--; 4325 } 4326 leaf = path->nodes[0]; 4327 nritems = btrfs_header_nritems(leaf); 4328 if (nritems == 0) 4329 return 1; 4330 if (path->slots[0] == nritems) 4331 path->slots[0]--; 4332 4333 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4334 if (found_key.objectid < min_objectid) 4335 break; 4336 if (found_key.type == type) 4337 return 0; 4338 if (found_key.objectid == min_objectid && 4339 found_key.type < type) 4340 break; 4341 } 4342 return 1; 4343 } 4344