1 /* 2 * Copyright (C) 2014 Facebook. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/stacktrace.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "locking.h" 24 #include "delayed-ref.h" 25 #include "ref-verify.h" 26 27 /* 28 * Used to keep track the roots and number of refs each root has for a given 29 * bytenr. This just tracks the number of direct references, no shared 30 * references. 31 */ 32 struct root_entry { 33 u64 root_objectid; 34 u64 num_refs; 35 struct rb_node node; 36 }; 37 38 /* 39 * These are meant to represent what should exist in the extent tree, these can 40 * be used to verify the extent tree is consistent as these should all match 41 * what the extent tree says. 42 */ 43 struct ref_entry { 44 u64 root_objectid; 45 u64 parent; 46 u64 owner; 47 u64 offset; 48 u64 num_refs; 49 struct rb_node node; 50 }; 51 52 #define MAX_TRACE 16 53 54 /* 55 * Whenever we add/remove a reference we record the action. The action maps 56 * back to the delayed ref action. We hold the ref we are changing in the 57 * action so we can account for the history properly, and we record the root we 58 * were called with since it could be different from ref_root. We also store 59 * stack traces because thats how I roll. 60 */ 61 struct ref_action { 62 int action; 63 u64 root; 64 struct ref_entry ref; 65 struct list_head list; 66 unsigned long trace[MAX_TRACE]; 67 unsigned int trace_len; 68 }; 69 70 /* 71 * One of these for every block we reference, it holds the roots and references 72 * to it as well as all of the ref actions that have occured to it. We never 73 * free it until we unmount the file system in order to make sure re-allocations 74 * are happening properly. 75 */ 76 struct block_entry { 77 u64 bytenr; 78 u64 len; 79 u64 num_refs; 80 int metadata; 81 int from_disk; 82 struct rb_root roots; 83 struct rb_root refs; 84 struct rb_node node; 85 struct list_head actions; 86 }; 87 88 static struct block_entry *insert_block_entry(struct rb_root *root, 89 struct block_entry *be) 90 { 91 struct rb_node **p = &root->rb_node; 92 struct rb_node *parent_node = NULL; 93 struct block_entry *entry; 94 95 while (*p) { 96 parent_node = *p; 97 entry = rb_entry(parent_node, struct block_entry, node); 98 if (entry->bytenr > be->bytenr) 99 p = &(*p)->rb_left; 100 else if (entry->bytenr < be->bytenr) 101 p = &(*p)->rb_right; 102 else 103 return entry; 104 } 105 106 rb_link_node(&be->node, parent_node, p); 107 rb_insert_color(&be->node, root); 108 return NULL; 109 } 110 111 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 112 { 113 struct rb_node *n; 114 struct block_entry *entry = NULL; 115 116 n = root->rb_node; 117 while (n) { 118 entry = rb_entry(n, struct block_entry, node); 119 if (entry->bytenr < bytenr) 120 n = n->rb_right; 121 else if (entry->bytenr > bytenr) 122 n = n->rb_left; 123 else 124 return entry; 125 } 126 return NULL; 127 } 128 129 static struct root_entry *insert_root_entry(struct rb_root *root, 130 struct root_entry *re) 131 { 132 struct rb_node **p = &root->rb_node; 133 struct rb_node *parent_node = NULL; 134 struct root_entry *entry; 135 136 while (*p) { 137 parent_node = *p; 138 entry = rb_entry(parent_node, struct root_entry, node); 139 if (entry->root_objectid > re->root_objectid) 140 p = &(*p)->rb_left; 141 else if (entry->root_objectid < re->root_objectid) 142 p = &(*p)->rb_right; 143 else 144 return entry; 145 } 146 147 rb_link_node(&re->node, parent_node, p); 148 rb_insert_color(&re->node, root); 149 return NULL; 150 151 } 152 153 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 154 { 155 if (ref1->root_objectid < ref2->root_objectid) 156 return -1; 157 if (ref1->root_objectid > ref2->root_objectid) 158 return 1; 159 if (ref1->parent < ref2->parent) 160 return -1; 161 if (ref1->parent > ref2->parent) 162 return 1; 163 if (ref1->owner < ref2->owner) 164 return -1; 165 if (ref1->owner > ref2->owner) 166 return 1; 167 if (ref1->offset < ref2->offset) 168 return -1; 169 if (ref1->offset > ref2->offset) 170 return 1; 171 return 0; 172 } 173 174 static struct ref_entry *insert_ref_entry(struct rb_root *root, 175 struct ref_entry *ref) 176 { 177 struct rb_node **p = &root->rb_node; 178 struct rb_node *parent_node = NULL; 179 struct ref_entry *entry; 180 int cmp; 181 182 while (*p) { 183 parent_node = *p; 184 entry = rb_entry(parent_node, struct ref_entry, node); 185 cmp = comp_refs(entry, ref); 186 if (cmp > 0) 187 p = &(*p)->rb_left; 188 else if (cmp < 0) 189 p = &(*p)->rb_right; 190 else 191 return entry; 192 } 193 194 rb_link_node(&ref->node, parent_node, p); 195 rb_insert_color(&ref->node, root); 196 return NULL; 197 198 } 199 200 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 201 { 202 struct rb_node *n; 203 struct root_entry *entry = NULL; 204 205 n = root->rb_node; 206 while (n) { 207 entry = rb_entry(n, struct root_entry, node); 208 if (entry->root_objectid < objectid) 209 n = n->rb_right; 210 else if (entry->root_objectid > objectid) 211 n = n->rb_left; 212 else 213 return entry; 214 } 215 return NULL; 216 } 217 218 #ifdef CONFIG_STACKTRACE 219 static void __save_stack_trace(struct ref_action *ra) 220 { 221 struct stack_trace stack_trace; 222 223 stack_trace.max_entries = MAX_TRACE; 224 stack_trace.nr_entries = 0; 225 stack_trace.entries = ra->trace; 226 stack_trace.skip = 2; 227 save_stack_trace(&stack_trace); 228 ra->trace_len = stack_trace.nr_entries; 229 } 230 231 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 232 struct ref_action *ra) 233 { 234 struct stack_trace trace; 235 236 if (ra->trace_len == 0) { 237 btrfs_err(fs_info, " ref-verify: no stacktrace"); 238 return; 239 } 240 trace.nr_entries = ra->trace_len; 241 trace.entries = ra->trace; 242 print_stack_trace(&trace, 2); 243 } 244 #else 245 static void inline __save_stack_trace(struct ref_action *ra) 246 { 247 } 248 249 static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, 250 struct ref_action *ra) 251 { 252 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 253 } 254 #endif 255 256 static void free_block_entry(struct block_entry *be) 257 { 258 struct root_entry *re; 259 struct ref_entry *ref; 260 struct ref_action *ra; 261 struct rb_node *n; 262 263 while ((n = rb_first(&be->roots))) { 264 re = rb_entry(n, struct root_entry, node); 265 rb_erase(&re->node, &be->roots); 266 kfree(re); 267 } 268 269 while((n = rb_first(&be->refs))) { 270 ref = rb_entry(n, struct ref_entry, node); 271 rb_erase(&ref->node, &be->refs); 272 kfree(ref); 273 } 274 275 while (!list_empty(&be->actions)) { 276 ra = list_first_entry(&be->actions, struct ref_action, 277 list); 278 list_del(&ra->list); 279 kfree(ra); 280 } 281 kfree(be); 282 } 283 284 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 285 u64 bytenr, u64 len, 286 u64 root_objectid) 287 { 288 struct block_entry *be = NULL, *exist; 289 struct root_entry *re = NULL; 290 291 re = kzalloc(sizeof(struct root_entry), GFP_KERNEL); 292 be = kzalloc(sizeof(struct block_entry), GFP_KERNEL); 293 if (!be || !re) { 294 kfree(re); 295 kfree(be); 296 return ERR_PTR(-ENOMEM); 297 } 298 be->bytenr = bytenr; 299 be->len = len; 300 301 re->root_objectid = root_objectid; 302 re->num_refs = 0; 303 304 spin_lock(&fs_info->ref_verify_lock); 305 exist = insert_block_entry(&fs_info->block_tree, be); 306 if (exist) { 307 if (root_objectid) { 308 struct root_entry *exist_re; 309 310 exist_re = insert_root_entry(&exist->roots, re); 311 if (exist_re) 312 kfree(re); 313 } 314 kfree(be); 315 return exist; 316 } 317 318 be->num_refs = 0; 319 be->metadata = 0; 320 be->from_disk = 0; 321 be->roots = RB_ROOT; 322 be->refs = RB_ROOT; 323 INIT_LIST_HEAD(&be->actions); 324 if (root_objectid) 325 insert_root_entry(&be->roots, re); 326 else 327 kfree(re); 328 return be; 329 } 330 331 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 332 u64 parent, u64 bytenr, int level) 333 { 334 struct block_entry *be; 335 struct root_entry *re; 336 struct ref_entry *ref = NULL, *exist; 337 338 ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL); 339 if (!ref) 340 return -ENOMEM; 341 342 if (parent) 343 ref->root_objectid = 0; 344 else 345 ref->root_objectid = ref_root; 346 ref->parent = parent; 347 ref->owner = level; 348 ref->offset = 0; 349 ref->num_refs = 1; 350 351 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 352 if (IS_ERR(be)) { 353 kfree(ref); 354 return PTR_ERR(be); 355 } 356 be->num_refs++; 357 be->from_disk = 1; 358 be->metadata = 1; 359 360 if (!parent) { 361 ASSERT(ref_root); 362 re = lookup_root_entry(&be->roots, ref_root); 363 ASSERT(re); 364 re->num_refs++; 365 } 366 exist = insert_ref_entry(&be->refs, ref); 367 if (exist) { 368 exist->num_refs++; 369 kfree(ref); 370 } 371 spin_unlock(&fs_info->ref_verify_lock); 372 373 return 0; 374 } 375 376 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 377 u64 parent, u32 num_refs, u64 bytenr, 378 u64 num_bytes) 379 { 380 struct block_entry *be; 381 struct ref_entry *ref; 382 383 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 384 if (!ref) 385 return -ENOMEM; 386 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 387 if (IS_ERR(be)) { 388 kfree(ref); 389 return PTR_ERR(be); 390 } 391 be->num_refs += num_refs; 392 393 ref->parent = parent; 394 ref->num_refs = num_refs; 395 if (insert_ref_entry(&be->refs, ref)) { 396 spin_unlock(&fs_info->ref_verify_lock); 397 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 398 kfree(ref); 399 return -EINVAL; 400 } 401 spin_unlock(&fs_info->ref_verify_lock); 402 return 0; 403 } 404 405 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 406 struct extent_buffer *leaf, 407 struct btrfs_extent_data_ref *dref, 408 u64 bytenr, u64 num_bytes) 409 { 410 struct block_entry *be; 411 struct ref_entry *ref; 412 struct root_entry *re; 413 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 414 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 415 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 416 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 417 418 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 419 if (!ref) 420 return -ENOMEM; 421 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 422 if (IS_ERR(be)) { 423 kfree(ref); 424 return PTR_ERR(be); 425 } 426 be->num_refs += num_refs; 427 428 ref->parent = 0; 429 ref->owner = owner; 430 ref->root_objectid = ref_root; 431 ref->offset = offset; 432 ref->num_refs = num_refs; 433 if (insert_ref_entry(&be->refs, ref)) { 434 spin_unlock(&fs_info->ref_verify_lock); 435 btrfs_err(fs_info, "existing ref when reading from disk?"); 436 kfree(ref); 437 return -EINVAL; 438 } 439 440 re = lookup_root_entry(&be->roots, ref_root); 441 if (!re) { 442 spin_unlock(&fs_info->ref_verify_lock); 443 btrfs_err(fs_info, "missing root in new block entry?"); 444 return -EINVAL; 445 } 446 re->num_refs += num_refs; 447 spin_unlock(&fs_info->ref_verify_lock); 448 return 0; 449 } 450 451 static int process_extent_item(struct btrfs_fs_info *fs_info, 452 struct btrfs_path *path, struct btrfs_key *key, 453 int slot, int *tree_block_level) 454 { 455 struct btrfs_extent_item *ei; 456 struct btrfs_extent_inline_ref *iref; 457 struct btrfs_extent_data_ref *dref; 458 struct btrfs_shared_data_ref *sref; 459 struct extent_buffer *leaf = path->nodes[0]; 460 u32 item_size = btrfs_item_size_nr(leaf, slot); 461 unsigned long end, ptr; 462 u64 offset, flags, count; 463 int type, ret; 464 465 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 466 flags = btrfs_extent_flags(leaf, ei); 467 468 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 469 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 470 struct btrfs_tree_block_info *info; 471 472 info = (struct btrfs_tree_block_info *)(ei + 1); 473 *tree_block_level = btrfs_tree_block_level(leaf, info); 474 iref = (struct btrfs_extent_inline_ref *)(info + 1); 475 } else { 476 if (key->type == BTRFS_METADATA_ITEM_KEY) 477 *tree_block_level = key->offset; 478 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 479 } 480 481 ptr = (unsigned long)iref; 482 end = (unsigned long)ei + item_size; 483 while (ptr < end) { 484 iref = (struct btrfs_extent_inline_ref *)ptr; 485 type = btrfs_extent_inline_ref_type(leaf, iref); 486 offset = btrfs_extent_inline_ref_offset(leaf, iref); 487 switch (type) { 488 case BTRFS_TREE_BLOCK_REF_KEY: 489 ret = add_tree_block(fs_info, offset, 0, key->objectid, 490 *tree_block_level); 491 break; 492 case BTRFS_SHARED_BLOCK_REF_KEY: 493 ret = add_tree_block(fs_info, 0, offset, key->objectid, 494 *tree_block_level); 495 break; 496 case BTRFS_EXTENT_DATA_REF_KEY: 497 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 498 ret = add_extent_data_ref(fs_info, leaf, dref, 499 key->objectid, key->offset); 500 break; 501 case BTRFS_SHARED_DATA_REF_KEY: 502 sref = (struct btrfs_shared_data_ref *)(iref + 1); 503 count = btrfs_shared_data_ref_count(leaf, sref); 504 ret = add_shared_data_ref(fs_info, offset, count, 505 key->objectid, key->offset); 506 break; 507 default: 508 btrfs_err(fs_info, "invalid key type in iref"); 509 ret = -EINVAL; 510 break; 511 } 512 if (ret) 513 break; 514 ptr += btrfs_extent_inline_ref_size(type); 515 } 516 return ret; 517 } 518 519 static int process_leaf(struct btrfs_root *root, 520 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes) 521 { 522 struct btrfs_fs_info *fs_info = root->fs_info; 523 struct extent_buffer *leaf = path->nodes[0]; 524 struct btrfs_extent_data_ref *dref; 525 struct btrfs_shared_data_ref *sref; 526 u32 count; 527 int i = 0, tree_block_level = 0, ret; 528 struct btrfs_key key; 529 int nritems = btrfs_header_nritems(leaf); 530 531 for (i = 0; i < nritems; i++) { 532 btrfs_item_key_to_cpu(leaf, &key, i); 533 switch (key.type) { 534 case BTRFS_EXTENT_ITEM_KEY: 535 *num_bytes = key.offset; 536 case BTRFS_METADATA_ITEM_KEY: 537 *bytenr = key.objectid; 538 ret = process_extent_item(fs_info, path, &key, i, 539 &tree_block_level); 540 break; 541 case BTRFS_TREE_BLOCK_REF_KEY: 542 ret = add_tree_block(fs_info, key.offset, 0, 543 key.objectid, tree_block_level); 544 break; 545 case BTRFS_SHARED_BLOCK_REF_KEY: 546 ret = add_tree_block(fs_info, 0, key.offset, 547 key.objectid, tree_block_level); 548 break; 549 case BTRFS_EXTENT_DATA_REF_KEY: 550 dref = btrfs_item_ptr(leaf, i, 551 struct btrfs_extent_data_ref); 552 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 553 *num_bytes); 554 break; 555 case BTRFS_SHARED_DATA_REF_KEY: 556 sref = btrfs_item_ptr(leaf, i, 557 struct btrfs_shared_data_ref); 558 count = btrfs_shared_data_ref_count(leaf, sref); 559 ret = add_shared_data_ref(fs_info, key.offset, count, 560 *bytenr, *num_bytes); 561 break; 562 default: 563 break; 564 } 565 if (ret) 566 break; 567 } 568 return ret; 569 } 570 571 /* Walk down to the leaf from the given level */ 572 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 573 int level, u64 *bytenr, u64 *num_bytes) 574 { 575 struct btrfs_fs_info *fs_info = root->fs_info; 576 struct extent_buffer *eb; 577 u64 block_bytenr, gen; 578 int ret = 0; 579 580 while (level >= 0) { 581 if (level) { 582 struct btrfs_key first_key; 583 584 block_bytenr = btrfs_node_blockptr(path->nodes[level], 585 path->slots[level]); 586 gen = btrfs_node_ptr_generation(path->nodes[level], 587 path->slots[level]); 588 btrfs_node_key_to_cpu(path->nodes[level], &first_key, 589 path->slots[level]); 590 eb = read_tree_block(fs_info, block_bytenr, gen, 591 level - 1, &first_key); 592 if (IS_ERR(eb)) 593 return PTR_ERR(eb); 594 if (!extent_buffer_uptodate(eb)) { 595 free_extent_buffer(eb); 596 return -EIO; 597 } 598 btrfs_tree_read_lock(eb); 599 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 600 path->nodes[level-1] = eb; 601 path->slots[level-1] = 0; 602 path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING; 603 } else { 604 ret = process_leaf(root, path, bytenr, num_bytes); 605 if (ret) 606 break; 607 } 608 level--; 609 } 610 return ret; 611 } 612 613 /* Walk up to the next node that needs to be processed */ 614 static int walk_up_tree(struct btrfs_path *path, int *level) 615 { 616 int l; 617 618 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 619 if (!path->nodes[l]) 620 continue; 621 if (l) { 622 path->slots[l]++; 623 if (path->slots[l] < 624 btrfs_header_nritems(path->nodes[l])) { 625 *level = l; 626 return 0; 627 } 628 } 629 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 630 free_extent_buffer(path->nodes[l]); 631 path->nodes[l] = NULL; 632 path->slots[l] = 0; 633 path->locks[l] = 0; 634 } 635 636 return 1; 637 } 638 639 static void dump_ref_action(struct btrfs_fs_info *fs_info, 640 struct ref_action *ra) 641 { 642 btrfs_err(fs_info, 643 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 644 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 645 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 646 __print_stack_trace(fs_info, ra); 647 } 648 649 /* 650 * Dumps all the information from the block entry to printk, it's going to be 651 * awesome. 652 */ 653 static void dump_block_entry(struct btrfs_fs_info *fs_info, 654 struct block_entry *be) 655 { 656 struct ref_entry *ref; 657 struct root_entry *re; 658 struct ref_action *ra; 659 struct rb_node *n; 660 661 btrfs_err(fs_info, 662 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 663 be->bytenr, be->len, be->num_refs, be->metadata, 664 be->from_disk); 665 666 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 667 ref = rb_entry(n, struct ref_entry, node); 668 btrfs_err(fs_info, 669 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 670 ref->root_objectid, ref->parent, ref->owner, 671 ref->offset, ref->num_refs); 672 } 673 674 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 675 re = rb_entry(n, struct root_entry, node); 676 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 677 re->root_objectid, re->num_refs); 678 } 679 680 list_for_each_entry(ra, &be->actions, list) 681 dump_ref_action(fs_info, ra); 682 } 683 684 /* 685 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr 686 * @root: the root we are making this modification from. 687 * @bytenr: the bytenr we are modifying. 688 * @num_bytes: number of bytes. 689 * @parent: the parent bytenr. 690 * @ref_root: the original root owner of the bytenr. 691 * @owner: level in the case of metadata, inode in the case of data. 692 * @offset: 0 for metadata, file offset for data. 693 * @action: the action that we are doing, this is the same as the delayed ref 694 * action. 695 * 696 * This will add an action item to the given bytenr and do sanity checks to make 697 * sure we haven't messed something up. If we are making a new allocation and 698 * this block entry has history we will delete all previous actions as long as 699 * our sanity checks pass as they are no longer needed. 700 */ 701 int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, 702 u64 parent, u64 ref_root, u64 owner, u64 offset, 703 int action) 704 { 705 struct btrfs_fs_info *fs_info = root->fs_info; 706 struct ref_entry *ref = NULL, *exist; 707 struct ref_action *ra = NULL; 708 struct block_entry *be = NULL; 709 struct root_entry *re = NULL; 710 int ret = 0; 711 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 712 713 if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) 714 return 0; 715 716 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 717 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 718 if (!ra || !ref) { 719 kfree(ref); 720 kfree(ra); 721 ret = -ENOMEM; 722 goto out; 723 } 724 725 if (parent) { 726 ref->parent = parent; 727 } else { 728 ref->root_objectid = ref_root; 729 ref->owner = owner; 730 ref->offset = offset; 731 } 732 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 733 734 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 735 /* 736 * Save the extra info from the delayed ref in the ref action to make it 737 * easier to figure out what is happening. The real ref's we add to the 738 * ref tree need to reflect what we save on disk so it matches any 739 * on-disk refs we pre-loaded. 740 */ 741 ra->ref.owner = owner; 742 ra->ref.offset = offset; 743 ra->ref.root_objectid = ref_root; 744 __save_stack_trace(ra); 745 746 INIT_LIST_HEAD(&ra->list); 747 ra->action = action; 748 ra->root = root->objectid; 749 750 /* 751 * This is an allocation, preallocate the block_entry in case we haven't 752 * used it before. 753 */ 754 ret = -EINVAL; 755 if (action == BTRFS_ADD_DELAYED_EXTENT) { 756 /* 757 * For subvol_create we'll just pass in whatever the parent root 758 * is and the new root objectid, so let's not treat the passed 759 * in root as if it really has a ref for this bytenr. 760 */ 761 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); 762 if (IS_ERR(be)) { 763 kfree(ra); 764 ret = PTR_ERR(be); 765 goto out; 766 } 767 be->num_refs++; 768 if (metadata) 769 be->metadata = 1; 770 771 if (be->num_refs != 1) { 772 btrfs_err(fs_info, 773 "re-allocated a block that still has references to it!"); 774 dump_block_entry(fs_info, be); 775 dump_ref_action(fs_info, ra); 776 goto out_unlock; 777 } 778 779 while (!list_empty(&be->actions)) { 780 struct ref_action *tmp; 781 782 tmp = list_first_entry(&be->actions, struct ref_action, 783 list); 784 list_del(&tmp->list); 785 kfree(tmp); 786 } 787 } else { 788 struct root_entry *tmp; 789 790 if (!parent) { 791 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 792 if (!re) { 793 kfree(ref); 794 kfree(ra); 795 ret = -ENOMEM; 796 goto out; 797 } 798 /* 799 * This is the root that is modifying us, so it's the 800 * one we want to lookup below when we modify the 801 * re->num_refs. 802 */ 803 ref_root = root->objectid; 804 re->root_objectid = root->objectid; 805 re->num_refs = 0; 806 } 807 808 spin_lock(&root->fs_info->ref_verify_lock); 809 be = lookup_block_entry(&root->fs_info->block_tree, bytenr); 810 if (!be) { 811 btrfs_err(fs_info, 812 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 813 action, (unsigned long long)bytenr, 814 (unsigned long long)num_bytes); 815 dump_ref_action(fs_info, ra); 816 kfree(ref); 817 kfree(ra); 818 goto out_unlock; 819 } 820 821 if (!parent) { 822 tmp = insert_root_entry(&be->roots, re); 823 if (tmp) { 824 kfree(re); 825 re = tmp; 826 } 827 } 828 } 829 830 exist = insert_ref_entry(&be->refs, ref); 831 if (exist) { 832 if (action == BTRFS_DROP_DELAYED_REF) { 833 if (exist->num_refs == 0) { 834 btrfs_err(fs_info, 835 "dropping a ref for a existing root that doesn't have a ref on the block"); 836 dump_block_entry(fs_info, be); 837 dump_ref_action(fs_info, ra); 838 kfree(ra); 839 goto out_unlock; 840 } 841 exist->num_refs--; 842 if (exist->num_refs == 0) { 843 rb_erase(&exist->node, &be->refs); 844 kfree(exist); 845 } 846 } else if (!be->metadata) { 847 exist->num_refs++; 848 } else { 849 btrfs_err(fs_info, 850 "attempting to add another ref for an existing ref on a tree block"); 851 dump_block_entry(fs_info, be); 852 dump_ref_action(fs_info, ra); 853 kfree(ra); 854 goto out_unlock; 855 } 856 kfree(ref); 857 } else { 858 if (action == BTRFS_DROP_DELAYED_REF) { 859 btrfs_err(fs_info, 860 "dropping a ref for a root that doesn't have a ref on the block"); 861 dump_block_entry(fs_info, be); 862 dump_ref_action(fs_info, ra); 863 kfree(ra); 864 goto out_unlock; 865 } 866 } 867 868 if (!parent && !re) { 869 re = lookup_root_entry(&be->roots, ref_root); 870 if (!re) { 871 /* 872 * This shouldn't happen because we will add our re 873 * above when we lookup the be with !parent, but just in 874 * case catch this case so we don't panic because I 875 * didn't thik of some other corner case. 876 */ 877 btrfs_err(fs_info, "failed to find root %llu for %llu", 878 root->objectid, be->bytenr); 879 dump_block_entry(fs_info, be); 880 dump_ref_action(fs_info, ra); 881 kfree(ra); 882 goto out_unlock; 883 } 884 } 885 if (action == BTRFS_DROP_DELAYED_REF) { 886 if (re) 887 re->num_refs--; 888 be->num_refs--; 889 } else if (action == BTRFS_ADD_DELAYED_REF) { 890 be->num_refs++; 891 if (re) 892 re->num_refs++; 893 } 894 list_add_tail(&ra->list, &be->actions); 895 ret = 0; 896 out_unlock: 897 spin_unlock(&root->fs_info->ref_verify_lock); 898 out: 899 if (ret) 900 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 901 return ret; 902 } 903 904 /* Free up the ref cache */ 905 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 906 { 907 struct block_entry *be; 908 struct rb_node *n; 909 910 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 911 return; 912 913 spin_lock(&fs_info->ref_verify_lock); 914 while ((n = rb_first(&fs_info->block_tree))) { 915 be = rb_entry(n, struct block_entry, node); 916 rb_erase(&be->node, &fs_info->block_tree); 917 free_block_entry(be); 918 cond_resched_lock(&fs_info->ref_verify_lock); 919 } 920 spin_unlock(&fs_info->ref_verify_lock); 921 } 922 923 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 924 u64 len) 925 { 926 struct block_entry *be = NULL, *entry; 927 struct rb_node *n; 928 929 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 930 return; 931 932 spin_lock(&fs_info->ref_verify_lock); 933 n = fs_info->block_tree.rb_node; 934 while (n) { 935 entry = rb_entry(n, struct block_entry, node); 936 if (entry->bytenr < start) { 937 n = n->rb_right; 938 } else if (entry->bytenr > start) { 939 n = n->rb_left; 940 } else { 941 be = entry; 942 break; 943 } 944 /* We want to get as close to start as possible */ 945 if (be == NULL || 946 (entry->bytenr < start && be->bytenr > start) || 947 (entry->bytenr < start && entry->bytenr > be->bytenr)) 948 be = entry; 949 } 950 951 /* 952 * Could have an empty block group, maybe have something to check for 953 * this case to verify we were actually empty? 954 */ 955 if (!be) { 956 spin_unlock(&fs_info->ref_verify_lock); 957 return; 958 } 959 960 n = &be->node; 961 while (n) { 962 be = rb_entry(n, struct block_entry, node); 963 n = rb_next(n); 964 if (be->bytenr < start && be->bytenr + be->len > start) { 965 btrfs_err(fs_info, 966 "block entry overlaps a block group [%llu,%llu]!", 967 start, len); 968 dump_block_entry(fs_info, be); 969 continue; 970 } 971 if (be->bytenr < start) 972 continue; 973 if (be->bytenr >= start + len) 974 break; 975 if (be->bytenr + be->len > start + len) { 976 btrfs_err(fs_info, 977 "block entry overlaps a block group [%llu,%llu]!", 978 start, len); 979 dump_block_entry(fs_info, be); 980 } 981 rb_erase(&be->node, &fs_info->block_tree); 982 free_block_entry(be); 983 } 984 spin_unlock(&fs_info->ref_verify_lock); 985 } 986 987 /* Walk down all roots and build the ref tree, meant to be called at mount */ 988 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 989 { 990 struct btrfs_path *path; 991 struct extent_buffer *eb; 992 u64 bytenr = 0, num_bytes = 0; 993 int ret, level; 994 995 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 996 return 0; 997 998 path = btrfs_alloc_path(); 999 if (!path) 1000 return -ENOMEM; 1001 1002 eb = btrfs_read_lock_root_node(fs_info->extent_root); 1003 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1004 level = btrfs_header_level(eb); 1005 path->nodes[level] = eb; 1006 path->slots[level] = 0; 1007 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1008 1009 while (1) { 1010 /* 1011 * We have to keep track of the bytenr/num_bytes we last hit 1012 * because we could have run out of space for an inline ref, and 1013 * would have had to added a ref key item which may appear on a 1014 * different leaf from the original extent item. 1015 */ 1016 ret = walk_down_tree(fs_info->extent_root, path, level, 1017 &bytenr, &num_bytes); 1018 if (ret) 1019 break; 1020 ret = walk_up_tree(path, &level); 1021 if (ret < 0) 1022 break; 1023 if (ret > 0) { 1024 ret = 0; 1025 break; 1026 } 1027 } 1028 if (ret) { 1029 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1030 btrfs_free_ref_cache(fs_info); 1031 } 1032 btrfs_free_path(path); 1033 return ret; 1034 } 1035