1 /* 2 * Copyright (C) 2014 Facebook. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/stacktrace.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "locking.h" 24 #include "delayed-ref.h" 25 #include "ref-verify.h" 26 27 /* 28 * Used to keep track the roots and number of refs each root has for a given 29 * bytenr. This just tracks the number of direct references, no shared 30 * references. 31 */ 32 struct root_entry { 33 u64 root_objectid; 34 u64 num_refs; 35 struct rb_node node; 36 }; 37 38 /* 39 * These are meant to represent what should exist in the extent tree, these can 40 * be used to verify the extent tree is consistent as these should all match 41 * what the extent tree says. 42 */ 43 struct ref_entry { 44 u64 root_objectid; 45 u64 parent; 46 u64 owner; 47 u64 offset; 48 u64 num_refs; 49 struct rb_node node; 50 }; 51 52 #define MAX_TRACE 16 53 54 /* 55 * Whenever we add/remove a reference we record the action. The action maps 56 * back to the delayed ref action. We hold the ref we are changing in the 57 * action so we can account for the history properly, and we record the root we 58 * were called with since it could be different from ref_root. We also store 59 * stack traces because thats how I roll. 60 */ 61 struct ref_action { 62 int action; 63 u64 root; 64 struct ref_entry ref; 65 struct list_head list; 66 unsigned long trace[MAX_TRACE]; 67 unsigned int trace_len; 68 }; 69 70 /* 71 * One of these for every block we reference, it holds the roots and references 72 * to it as well as all of the ref actions that have occured to it. We never 73 * free it until we unmount the file system in order to make sure re-allocations 74 * are happening properly. 75 */ 76 struct block_entry { 77 u64 bytenr; 78 u64 len; 79 u64 num_refs; 80 int metadata; 81 int from_disk; 82 struct rb_root roots; 83 struct rb_root refs; 84 struct rb_node node; 85 struct list_head actions; 86 }; 87 88 static struct block_entry *insert_block_entry(struct rb_root *root, 89 struct block_entry *be) 90 { 91 struct rb_node **p = &root->rb_node; 92 struct rb_node *parent_node = NULL; 93 struct block_entry *entry; 94 95 while (*p) { 96 parent_node = *p; 97 entry = rb_entry(parent_node, struct block_entry, node); 98 if (entry->bytenr > be->bytenr) 99 p = &(*p)->rb_left; 100 else if (entry->bytenr < be->bytenr) 101 p = &(*p)->rb_right; 102 else 103 return entry; 104 } 105 106 rb_link_node(&be->node, parent_node, p); 107 rb_insert_color(&be->node, root); 108 return NULL; 109 } 110 111 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 112 { 113 struct rb_node *n; 114 struct block_entry *entry = NULL; 115 116 n = root->rb_node; 117 while (n) { 118 entry = rb_entry(n, struct block_entry, node); 119 if (entry->bytenr < bytenr) 120 n = n->rb_right; 121 else if (entry->bytenr > bytenr) 122 n = n->rb_left; 123 else 124 return entry; 125 } 126 return NULL; 127 } 128 129 static struct root_entry *insert_root_entry(struct rb_root *root, 130 struct root_entry *re) 131 { 132 struct rb_node **p = &root->rb_node; 133 struct rb_node *parent_node = NULL; 134 struct root_entry *entry; 135 136 while (*p) { 137 parent_node = *p; 138 entry = rb_entry(parent_node, struct root_entry, node); 139 if (entry->root_objectid > re->root_objectid) 140 p = &(*p)->rb_left; 141 else if (entry->root_objectid < re->root_objectid) 142 p = &(*p)->rb_right; 143 else 144 return entry; 145 } 146 147 rb_link_node(&re->node, parent_node, p); 148 rb_insert_color(&re->node, root); 149 return NULL; 150 151 } 152 153 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 154 { 155 if (ref1->root_objectid < ref2->root_objectid) 156 return -1; 157 if (ref1->root_objectid > ref2->root_objectid) 158 return 1; 159 if (ref1->parent < ref2->parent) 160 return -1; 161 if (ref1->parent > ref2->parent) 162 return 1; 163 if (ref1->owner < ref2->owner) 164 return -1; 165 if (ref1->owner > ref2->owner) 166 return 1; 167 if (ref1->offset < ref2->offset) 168 return -1; 169 if (ref1->offset > ref2->offset) 170 return 1; 171 return 0; 172 } 173 174 static struct ref_entry *insert_ref_entry(struct rb_root *root, 175 struct ref_entry *ref) 176 { 177 struct rb_node **p = &root->rb_node; 178 struct rb_node *parent_node = NULL; 179 struct ref_entry *entry; 180 int cmp; 181 182 while (*p) { 183 parent_node = *p; 184 entry = rb_entry(parent_node, struct ref_entry, node); 185 cmp = comp_refs(entry, ref); 186 if (cmp > 0) 187 p = &(*p)->rb_left; 188 else if (cmp < 0) 189 p = &(*p)->rb_right; 190 else 191 return entry; 192 } 193 194 rb_link_node(&ref->node, parent_node, p); 195 rb_insert_color(&ref->node, root); 196 return NULL; 197 198 } 199 200 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 201 { 202 struct rb_node *n; 203 struct root_entry *entry = NULL; 204 205 n = root->rb_node; 206 while (n) { 207 entry = rb_entry(n, struct root_entry, node); 208 if (entry->root_objectid < objectid) 209 n = n->rb_right; 210 else if (entry->root_objectid > objectid) 211 n = n->rb_left; 212 else 213 return entry; 214 } 215 return NULL; 216 } 217 218 #ifdef CONFIG_STACKTRACE 219 static void __save_stack_trace(struct ref_action *ra) 220 { 221 struct stack_trace stack_trace; 222 223 stack_trace.max_entries = MAX_TRACE; 224 stack_trace.nr_entries = 0; 225 stack_trace.entries = ra->trace; 226 stack_trace.skip = 2; 227 save_stack_trace(&stack_trace); 228 ra->trace_len = stack_trace.nr_entries; 229 } 230 231 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 232 struct ref_action *ra) 233 { 234 struct stack_trace trace; 235 236 if (ra->trace_len == 0) { 237 btrfs_err(fs_info, " ref-verify: no stacktrace"); 238 return; 239 } 240 trace.nr_entries = ra->trace_len; 241 trace.entries = ra->trace; 242 print_stack_trace(&trace, 2); 243 } 244 #else 245 static void inline __save_stack_trace(struct ref_action *ra) 246 { 247 } 248 249 static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, 250 struct ref_action *ra) 251 { 252 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 253 } 254 #endif 255 256 static void free_block_entry(struct block_entry *be) 257 { 258 struct root_entry *re; 259 struct ref_entry *ref; 260 struct ref_action *ra; 261 struct rb_node *n; 262 263 while ((n = rb_first(&be->roots))) { 264 re = rb_entry(n, struct root_entry, node); 265 rb_erase(&re->node, &be->roots); 266 kfree(re); 267 } 268 269 while((n = rb_first(&be->refs))) { 270 ref = rb_entry(n, struct ref_entry, node); 271 rb_erase(&ref->node, &be->refs); 272 kfree(ref); 273 } 274 275 while (!list_empty(&be->actions)) { 276 ra = list_first_entry(&be->actions, struct ref_action, 277 list); 278 list_del(&ra->list); 279 kfree(ra); 280 } 281 kfree(be); 282 } 283 284 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 285 u64 bytenr, u64 len, 286 u64 root_objectid) 287 { 288 struct block_entry *be = NULL, *exist; 289 struct root_entry *re = NULL; 290 291 re = kzalloc(sizeof(struct root_entry), GFP_KERNEL); 292 be = kzalloc(sizeof(struct block_entry), GFP_KERNEL); 293 if (!be || !re) { 294 kfree(re); 295 kfree(be); 296 return ERR_PTR(-ENOMEM); 297 } 298 be->bytenr = bytenr; 299 be->len = len; 300 301 re->root_objectid = root_objectid; 302 re->num_refs = 0; 303 304 spin_lock(&fs_info->ref_verify_lock); 305 exist = insert_block_entry(&fs_info->block_tree, be); 306 if (exist) { 307 if (root_objectid) { 308 struct root_entry *exist_re; 309 310 exist_re = insert_root_entry(&exist->roots, re); 311 if (exist_re) 312 kfree(re); 313 } 314 kfree(be); 315 return exist; 316 } 317 318 be->num_refs = 0; 319 be->metadata = 0; 320 be->from_disk = 0; 321 be->roots = RB_ROOT; 322 be->refs = RB_ROOT; 323 INIT_LIST_HEAD(&be->actions); 324 if (root_objectid) 325 insert_root_entry(&be->roots, re); 326 else 327 kfree(re); 328 return be; 329 } 330 331 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 332 u64 parent, u64 bytenr, int level) 333 { 334 struct block_entry *be; 335 struct root_entry *re; 336 struct ref_entry *ref = NULL, *exist; 337 338 ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL); 339 if (!ref) 340 return -ENOMEM; 341 342 if (parent) 343 ref->root_objectid = 0; 344 else 345 ref->root_objectid = ref_root; 346 ref->parent = parent; 347 ref->owner = level; 348 ref->offset = 0; 349 ref->num_refs = 1; 350 351 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 352 if (IS_ERR(be)) { 353 kfree(ref); 354 return PTR_ERR(be); 355 } 356 be->num_refs++; 357 be->from_disk = 1; 358 be->metadata = 1; 359 360 if (!parent) { 361 ASSERT(ref_root); 362 re = lookup_root_entry(&be->roots, ref_root); 363 ASSERT(re); 364 re->num_refs++; 365 } 366 exist = insert_ref_entry(&be->refs, ref); 367 if (exist) { 368 exist->num_refs++; 369 kfree(ref); 370 } 371 spin_unlock(&fs_info->ref_verify_lock); 372 373 return 0; 374 } 375 376 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 377 u64 parent, u32 num_refs, u64 bytenr, 378 u64 num_bytes) 379 { 380 struct block_entry *be; 381 struct ref_entry *ref; 382 383 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 384 if (!ref) 385 return -ENOMEM; 386 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 387 if (IS_ERR(be)) { 388 kfree(ref); 389 return PTR_ERR(be); 390 } 391 be->num_refs += num_refs; 392 393 ref->parent = parent; 394 ref->num_refs = num_refs; 395 if (insert_ref_entry(&be->refs, ref)) { 396 spin_unlock(&fs_info->ref_verify_lock); 397 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 398 kfree(ref); 399 return -EINVAL; 400 } 401 spin_unlock(&fs_info->ref_verify_lock); 402 return 0; 403 } 404 405 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 406 struct extent_buffer *leaf, 407 struct btrfs_extent_data_ref *dref, 408 u64 bytenr, u64 num_bytes) 409 { 410 struct block_entry *be; 411 struct ref_entry *ref; 412 struct root_entry *re; 413 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 414 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 415 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 416 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 417 418 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 419 if (!ref) 420 return -ENOMEM; 421 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 422 if (IS_ERR(be)) { 423 kfree(ref); 424 return PTR_ERR(be); 425 } 426 be->num_refs += num_refs; 427 428 ref->parent = 0; 429 ref->owner = owner; 430 ref->root_objectid = ref_root; 431 ref->offset = offset; 432 ref->num_refs = num_refs; 433 if (insert_ref_entry(&be->refs, ref)) { 434 spin_unlock(&fs_info->ref_verify_lock); 435 btrfs_err(fs_info, "existing ref when reading from disk?"); 436 kfree(ref); 437 return -EINVAL; 438 } 439 440 re = lookup_root_entry(&be->roots, ref_root); 441 if (!re) { 442 spin_unlock(&fs_info->ref_verify_lock); 443 btrfs_err(fs_info, "missing root in new block entry?"); 444 return -EINVAL; 445 } 446 re->num_refs += num_refs; 447 spin_unlock(&fs_info->ref_verify_lock); 448 return 0; 449 } 450 451 static int process_extent_item(struct btrfs_fs_info *fs_info, 452 struct btrfs_path *path, struct btrfs_key *key, 453 int slot, int *tree_block_level) 454 { 455 struct btrfs_extent_item *ei; 456 struct btrfs_extent_inline_ref *iref; 457 struct btrfs_extent_data_ref *dref; 458 struct btrfs_shared_data_ref *sref; 459 struct extent_buffer *leaf = path->nodes[0]; 460 u32 item_size = btrfs_item_size_nr(leaf, slot); 461 unsigned long end, ptr; 462 u64 offset, flags, count; 463 int type, ret; 464 465 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 466 flags = btrfs_extent_flags(leaf, ei); 467 468 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 469 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 470 struct btrfs_tree_block_info *info; 471 472 info = (struct btrfs_tree_block_info *)(ei + 1); 473 *tree_block_level = btrfs_tree_block_level(leaf, info); 474 iref = (struct btrfs_extent_inline_ref *)(info + 1); 475 } else { 476 if (key->type == BTRFS_METADATA_ITEM_KEY) 477 *tree_block_level = key->offset; 478 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 479 } 480 481 ptr = (unsigned long)iref; 482 end = (unsigned long)ei + item_size; 483 while (ptr < end) { 484 iref = (struct btrfs_extent_inline_ref *)ptr; 485 type = btrfs_extent_inline_ref_type(leaf, iref); 486 offset = btrfs_extent_inline_ref_offset(leaf, iref); 487 switch (type) { 488 case BTRFS_TREE_BLOCK_REF_KEY: 489 ret = add_tree_block(fs_info, offset, 0, key->objectid, 490 *tree_block_level); 491 break; 492 case BTRFS_SHARED_BLOCK_REF_KEY: 493 ret = add_tree_block(fs_info, 0, offset, key->objectid, 494 *tree_block_level); 495 break; 496 case BTRFS_EXTENT_DATA_REF_KEY: 497 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 498 ret = add_extent_data_ref(fs_info, leaf, dref, 499 key->objectid, key->offset); 500 break; 501 case BTRFS_SHARED_DATA_REF_KEY: 502 sref = (struct btrfs_shared_data_ref *)(iref + 1); 503 count = btrfs_shared_data_ref_count(leaf, sref); 504 ret = add_shared_data_ref(fs_info, offset, count, 505 key->objectid, key->offset); 506 break; 507 default: 508 btrfs_err(fs_info, "invalid key type in iref"); 509 ret = -EINVAL; 510 break; 511 } 512 if (ret) 513 break; 514 ptr += btrfs_extent_inline_ref_size(type); 515 } 516 return ret; 517 } 518 519 static int process_leaf(struct btrfs_root *root, 520 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes) 521 { 522 struct btrfs_fs_info *fs_info = root->fs_info; 523 struct extent_buffer *leaf = path->nodes[0]; 524 struct btrfs_extent_data_ref *dref; 525 struct btrfs_shared_data_ref *sref; 526 u32 count; 527 int i = 0, tree_block_level = 0, ret; 528 struct btrfs_key key; 529 int nritems = btrfs_header_nritems(leaf); 530 531 for (i = 0; i < nritems; i++) { 532 btrfs_item_key_to_cpu(leaf, &key, i); 533 switch (key.type) { 534 case BTRFS_EXTENT_ITEM_KEY: 535 *num_bytes = key.offset; 536 case BTRFS_METADATA_ITEM_KEY: 537 *bytenr = key.objectid; 538 ret = process_extent_item(fs_info, path, &key, i, 539 &tree_block_level); 540 break; 541 case BTRFS_TREE_BLOCK_REF_KEY: 542 ret = add_tree_block(fs_info, key.offset, 0, 543 key.objectid, tree_block_level); 544 break; 545 case BTRFS_SHARED_BLOCK_REF_KEY: 546 ret = add_tree_block(fs_info, 0, key.offset, 547 key.objectid, tree_block_level); 548 break; 549 case BTRFS_EXTENT_DATA_REF_KEY: 550 dref = btrfs_item_ptr(leaf, i, 551 struct btrfs_extent_data_ref); 552 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 553 *num_bytes); 554 break; 555 case BTRFS_SHARED_DATA_REF_KEY: 556 sref = btrfs_item_ptr(leaf, i, 557 struct btrfs_shared_data_ref); 558 count = btrfs_shared_data_ref_count(leaf, sref); 559 ret = add_shared_data_ref(fs_info, key.offset, count, 560 *bytenr, *num_bytes); 561 break; 562 default: 563 break; 564 } 565 if (ret) 566 break; 567 } 568 return ret; 569 } 570 571 /* Walk down to the leaf from the given level */ 572 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 573 int level, u64 *bytenr, u64 *num_bytes) 574 { 575 struct btrfs_fs_info *fs_info = root->fs_info; 576 struct extent_buffer *eb; 577 u64 block_bytenr, gen; 578 int ret = 0; 579 580 while (level >= 0) { 581 if (level) { 582 block_bytenr = btrfs_node_blockptr(path->nodes[level], 583 path->slots[level]); 584 gen = btrfs_node_ptr_generation(path->nodes[level], 585 path->slots[level]); 586 eb = read_tree_block(fs_info, block_bytenr, gen); 587 if (IS_ERR(eb)) 588 return PTR_ERR(eb); 589 if (!extent_buffer_uptodate(eb)) { 590 free_extent_buffer(eb); 591 return -EIO; 592 } 593 btrfs_tree_read_lock(eb); 594 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 595 path->nodes[level-1] = eb; 596 path->slots[level-1] = 0; 597 path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING; 598 } else { 599 ret = process_leaf(root, path, bytenr, num_bytes); 600 if (ret) 601 break; 602 } 603 level--; 604 } 605 return ret; 606 } 607 608 /* Walk up to the next node that needs to be processed */ 609 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, 610 int *level) 611 { 612 int l; 613 614 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 615 if (!path->nodes[l]) 616 continue; 617 if (l) { 618 path->slots[l]++; 619 if (path->slots[l] < 620 btrfs_header_nritems(path->nodes[l])) { 621 *level = l; 622 return 0; 623 } 624 } 625 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 626 free_extent_buffer(path->nodes[l]); 627 path->nodes[l] = NULL; 628 path->slots[l] = 0; 629 path->locks[l] = 0; 630 } 631 632 return 1; 633 } 634 635 static void dump_ref_action(struct btrfs_fs_info *fs_info, 636 struct ref_action *ra) 637 { 638 btrfs_err(fs_info, 639 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 640 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 641 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 642 __print_stack_trace(fs_info, ra); 643 } 644 645 /* 646 * Dumps all the information from the block entry to printk, it's going to be 647 * awesome. 648 */ 649 static void dump_block_entry(struct btrfs_fs_info *fs_info, 650 struct block_entry *be) 651 { 652 struct ref_entry *ref; 653 struct root_entry *re; 654 struct ref_action *ra; 655 struct rb_node *n; 656 657 btrfs_err(fs_info, 658 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 659 be->bytenr, be->len, be->num_refs, be->metadata, 660 be->from_disk); 661 662 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 663 ref = rb_entry(n, struct ref_entry, node); 664 btrfs_err(fs_info, 665 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 666 ref->root_objectid, ref->parent, ref->owner, 667 ref->offset, ref->num_refs); 668 } 669 670 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 671 re = rb_entry(n, struct root_entry, node); 672 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 673 re->root_objectid, re->num_refs); 674 } 675 676 list_for_each_entry(ra, &be->actions, list) 677 dump_ref_action(fs_info, ra); 678 } 679 680 /* 681 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr 682 * @root: the root we are making this modification from. 683 * @bytenr: the bytenr we are modifying. 684 * @num_bytes: number of bytes. 685 * @parent: the parent bytenr. 686 * @ref_root: the original root owner of the bytenr. 687 * @owner: level in the case of metadata, inode in the case of data. 688 * @offset: 0 for metadata, file offset for data. 689 * @action: the action that we are doing, this is the same as the delayed ref 690 * action. 691 * 692 * This will add an action item to the given bytenr and do sanity checks to make 693 * sure we haven't messed something up. If we are making a new allocation and 694 * this block entry has history we will delete all previous actions as long as 695 * our sanity checks pass as they are no longer needed. 696 */ 697 int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, 698 u64 parent, u64 ref_root, u64 owner, u64 offset, 699 int action) 700 { 701 struct btrfs_fs_info *fs_info = root->fs_info; 702 struct ref_entry *ref = NULL, *exist; 703 struct ref_action *ra = NULL; 704 struct block_entry *be = NULL; 705 struct root_entry *re = NULL; 706 int ret = 0; 707 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 708 709 if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) 710 return 0; 711 712 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 713 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 714 if (!ra || !ref) { 715 kfree(ref); 716 kfree(ra); 717 ret = -ENOMEM; 718 goto out; 719 } 720 721 if (parent) { 722 ref->parent = parent; 723 } else { 724 ref->root_objectid = ref_root; 725 ref->owner = owner; 726 ref->offset = offset; 727 } 728 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 729 730 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 731 /* 732 * Save the extra info from the delayed ref in the ref action to make it 733 * easier to figure out what is happening. The real ref's we add to the 734 * ref tree need to reflect what we save on disk so it matches any 735 * on-disk refs we pre-loaded. 736 */ 737 ra->ref.owner = owner; 738 ra->ref.offset = offset; 739 ra->ref.root_objectid = ref_root; 740 __save_stack_trace(ra); 741 742 INIT_LIST_HEAD(&ra->list); 743 ra->action = action; 744 ra->root = root->objectid; 745 746 /* 747 * This is an allocation, preallocate the block_entry in case we haven't 748 * used it before. 749 */ 750 ret = -EINVAL; 751 if (action == BTRFS_ADD_DELAYED_EXTENT) { 752 /* 753 * For subvol_create we'll just pass in whatever the parent root 754 * is and the new root objectid, so let's not treat the passed 755 * in root as if it really has a ref for this bytenr. 756 */ 757 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); 758 if (IS_ERR(be)) { 759 kfree(ra); 760 ret = PTR_ERR(be); 761 goto out; 762 } 763 be->num_refs++; 764 if (metadata) 765 be->metadata = 1; 766 767 if (be->num_refs != 1) { 768 btrfs_err(fs_info, 769 "re-allocated a block that still has references to it!"); 770 dump_block_entry(fs_info, be); 771 dump_ref_action(fs_info, ra); 772 goto out_unlock; 773 } 774 775 while (!list_empty(&be->actions)) { 776 struct ref_action *tmp; 777 778 tmp = list_first_entry(&be->actions, struct ref_action, 779 list); 780 list_del(&tmp->list); 781 kfree(tmp); 782 } 783 } else { 784 struct root_entry *tmp; 785 786 if (!parent) { 787 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 788 if (!re) { 789 kfree(ref); 790 kfree(ra); 791 ret = -ENOMEM; 792 goto out; 793 } 794 /* 795 * This is the root that is modifying us, so it's the 796 * one we want to lookup below when we modify the 797 * re->num_refs. 798 */ 799 ref_root = root->objectid; 800 re->root_objectid = root->objectid; 801 re->num_refs = 0; 802 } 803 804 spin_lock(&root->fs_info->ref_verify_lock); 805 be = lookup_block_entry(&root->fs_info->block_tree, bytenr); 806 if (!be) { 807 btrfs_err(fs_info, 808 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 809 action, (unsigned long long)bytenr, 810 (unsigned long long)num_bytes); 811 dump_ref_action(fs_info, ra); 812 kfree(ref); 813 kfree(ra); 814 goto out_unlock; 815 } 816 817 if (!parent) { 818 tmp = insert_root_entry(&be->roots, re); 819 if (tmp) { 820 kfree(re); 821 re = tmp; 822 } 823 } 824 } 825 826 exist = insert_ref_entry(&be->refs, ref); 827 if (exist) { 828 if (action == BTRFS_DROP_DELAYED_REF) { 829 if (exist->num_refs == 0) { 830 btrfs_err(fs_info, 831 "dropping a ref for a existing root that doesn't have a ref on the block"); 832 dump_block_entry(fs_info, be); 833 dump_ref_action(fs_info, ra); 834 kfree(ra); 835 goto out_unlock; 836 } 837 exist->num_refs--; 838 if (exist->num_refs == 0) { 839 rb_erase(&exist->node, &be->refs); 840 kfree(exist); 841 } 842 } else if (!be->metadata) { 843 exist->num_refs++; 844 } else { 845 btrfs_err(fs_info, 846 "attempting to add another ref for an existing ref on a tree block"); 847 dump_block_entry(fs_info, be); 848 dump_ref_action(fs_info, ra); 849 kfree(ra); 850 goto out_unlock; 851 } 852 kfree(ref); 853 } else { 854 if (action == BTRFS_DROP_DELAYED_REF) { 855 btrfs_err(fs_info, 856 "dropping a ref for a root that doesn't have a ref on the block"); 857 dump_block_entry(fs_info, be); 858 dump_ref_action(fs_info, ra); 859 kfree(ra); 860 goto out_unlock; 861 } 862 } 863 864 if (!parent && !re) { 865 re = lookup_root_entry(&be->roots, ref_root); 866 if (!re) { 867 /* 868 * This shouldn't happen because we will add our re 869 * above when we lookup the be with !parent, but just in 870 * case catch this case so we don't panic because I 871 * didn't thik of some other corner case. 872 */ 873 btrfs_err(fs_info, "failed to find root %llu for %llu", 874 root->objectid, be->bytenr); 875 dump_block_entry(fs_info, be); 876 dump_ref_action(fs_info, ra); 877 kfree(ra); 878 goto out_unlock; 879 } 880 } 881 if (action == BTRFS_DROP_DELAYED_REF) { 882 if (re) 883 re->num_refs--; 884 be->num_refs--; 885 } else if (action == BTRFS_ADD_DELAYED_REF) { 886 be->num_refs++; 887 if (re) 888 re->num_refs++; 889 } 890 list_add_tail(&ra->list, &be->actions); 891 ret = 0; 892 out_unlock: 893 spin_unlock(&root->fs_info->ref_verify_lock); 894 out: 895 if (ret) 896 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 897 return ret; 898 } 899 900 /* Free up the ref cache */ 901 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 902 { 903 struct block_entry *be; 904 struct rb_node *n; 905 906 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 907 return; 908 909 spin_lock(&fs_info->ref_verify_lock); 910 while ((n = rb_first(&fs_info->block_tree))) { 911 be = rb_entry(n, struct block_entry, node); 912 rb_erase(&be->node, &fs_info->block_tree); 913 free_block_entry(be); 914 cond_resched_lock(&fs_info->ref_verify_lock); 915 } 916 spin_unlock(&fs_info->ref_verify_lock); 917 } 918 919 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 920 u64 len) 921 { 922 struct block_entry *be = NULL, *entry; 923 struct rb_node *n; 924 925 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 926 return; 927 928 spin_lock(&fs_info->ref_verify_lock); 929 n = fs_info->block_tree.rb_node; 930 while (n) { 931 entry = rb_entry(n, struct block_entry, node); 932 if (entry->bytenr < start) { 933 n = n->rb_right; 934 } else if (entry->bytenr > start) { 935 n = n->rb_left; 936 } else { 937 be = entry; 938 break; 939 } 940 /* We want to get as close to start as possible */ 941 if (be == NULL || 942 (entry->bytenr < start && be->bytenr > start) || 943 (entry->bytenr < start && entry->bytenr > be->bytenr)) 944 be = entry; 945 } 946 947 /* 948 * Could have an empty block group, maybe have something to check for 949 * this case to verify we were actually empty? 950 */ 951 if (!be) { 952 spin_unlock(&fs_info->ref_verify_lock); 953 return; 954 } 955 956 n = &be->node; 957 while (n) { 958 be = rb_entry(n, struct block_entry, node); 959 n = rb_next(n); 960 if (be->bytenr < start && be->bytenr + be->len > start) { 961 btrfs_err(fs_info, 962 "block entry overlaps a block group [%llu,%llu]!", 963 start, len); 964 dump_block_entry(fs_info, be); 965 continue; 966 } 967 if (be->bytenr < start) 968 continue; 969 if (be->bytenr >= start + len) 970 break; 971 if (be->bytenr + be->len > start + len) { 972 btrfs_err(fs_info, 973 "block entry overlaps a block group [%llu,%llu]!", 974 start, len); 975 dump_block_entry(fs_info, be); 976 } 977 rb_erase(&be->node, &fs_info->block_tree); 978 free_block_entry(be); 979 } 980 spin_unlock(&fs_info->ref_verify_lock); 981 } 982 983 /* Walk down all roots and build the ref tree, meant to be called at mount */ 984 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 985 { 986 struct btrfs_path *path; 987 struct btrfs_root *root; 988 struct extent_buffer *eb; 989 u64 bytenr = 0, num_bytes = 0; 990 int ret, level; 991 992 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 993 return 0; 994 995 path = btrfs_alloc_path(); 996 if (!path) 997 return -ENOMEM; 998 999 eb = btrfs_read_lock_root_node(fs_info->extent_root); 1000 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1001 level = btrfs_header_level(eb); 1002 path->nodes[level] = eb; 1003 path->slots[level] = 0; 1004 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1005 1006 while (1) { 1007 /* 1008 * We have to keep track of the bytenr/num_bytes we last hit 1009 * because we could have run out of space for an inline ref, and 1010 * would have had to added a ref key item which may appear on a 1011 * different leaf from the original extent item. 1012 */ 1013 ret = walk_down_tree(fs_info->extent_root, path, level, 1014 &bytenr, &num_bytes); 1015 if (ret) 1016 break; 1017 ret = walk_up_tree(root, path, &level); 1018 if (ret < 0) 1019 break; 1020 if (ret > 0) { 1021 ret = 0; 1022 break; 1023 } 1024 } 1025 if (ret) { 1026 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1027 btrfs_free_ref_cache(fs_info); 1028 } 1029 btrfs_free_path(path); 1030 return ret; 1031 } 1032