1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2014 Facebook. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/stacktrace.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "delayed-ref.h" 13 #include "ref-verify.h" 14 #include "fs.h" 15 #include "accessors.h" 16 17 /* 18 * Used to keep track the roots and number of refs each root has for a given 19 * bytenr. This just tracks the number of direct references, no shared 20 * references. 21 */ 22 struct root_entry { 23 u64 root_objectid; 24 u64 num_refs; 25 struct rb_node node; 26 }; 27 28 /* 29 * These are meant to represent what should exist in the extent tree, these can 30 * be used to verify the extent tree is consistent as these should all match 31 * what the extent tree says. 32 */ 33 struct ref_entry { 34 u64 root_objectid; 35 u64 parent; 36 u64 owner; 37 u64 offset; 38 u64 num_refs; 39 struct rb_node node; 40 }; 41 42 #define MAX_TRACE 16 43 44 /* 45 * Whenever we add/remove a reference we record the action. The action maps 46 * back to the delayed ref action. We hold the ref we are changing in the 47 * action so we can account for the history properly, and we record the root we 48 * were called with since it could be different from ref_root. We also store 49 * stack traces because that's how I roll. 50 */ 51 struct ref_action { 52 int action; 53 u64 root; 54 struct ref_entry ref; 55 struct list_head list; 56 unsigned long trace[MAX_TRACE]; 57 unsigned int trace_len; 58 }; 59 60 /* 61 * One of these for every block we reference, it holds the roots and references 62 * to it as well as all of the ref actions that have occurred to it. We never 63 * free it until we unmount the file system in order to make sure re-allocations 64 * are happening properly. 65 */ 66 struct block_entry { 67 u64 bytenr; 68 u64 len; 69 u64 num_refs; 70 int metadata; 71 int from_disk; 72 struct rb_root roots; 73 struct rb_root refs; 74 struct rb_node node; 75 struct list_head actions; 76 }; 77 78 static struct block_entry *insert_block_entry(struct rb_root *root, 79 struct block_entry *be) 80 { 81 struct rb_node **p = &root->rb_node; 82 struct rb_node *parent_node = NULL; 83 struct block_entry *entry; 84 85 while (*p) { 86 parent_node = *p; 87 entry = rb_entry(parent_node, struct block_entry, node); 88 if (entry->bytenr > be->bytenr) 89 p = &(*p)->rb_left; 90 else if (entry->bytenr < be->bytenr) 91 p = &(*p)->rb_right; 92 else 93 return entry; 94 } 95 96 rb_link_node(&be->node, parent_node, p); 97 rb_insert_color(&be->node, root); 98 return NULL; 99 } 100 101 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 102 { 103 struct rb_node *n; 104 struct block_entry *entry = NULL; 105 106 n = root->rb_node; 107 while (n) { 108 entry = rb_entry(n, struct block_entry, node); 109 if (entry->bytenr < bytenr) 110 n = n->rb_right; 111 else if (entry->bytenr > bytenr) 112 n = n->rb_left; 113 else 114 return entry; 115 } 116 return NULL; 117 } 118 119 static struct root_entry *insert_root_entry(struct rb_root *root, 120 struct root_entry *re) 121 { 122 struct rb_node **p = &root->rb_node; 123 struct rb_node *parent_node = NULL; 124 struct root_entry *entry; 125 126 while (*p) { 127 parent_node = *p; 128 entry = rb_entry(parent_node, struct root_entry, node); 129 if (entry->root_objectid > re->root_objectid) 130 p = &(*p)->rb_left; 131 else if (entry->root_objectid < re->root_objectid) 132 p = &(*p)->rb_right; 133 else 134 return entry; 135 } 136 137 rb_link_node(&re->node, parent_node, p); 138 rb_insert_color(&re->node, root); 139 return NULL; 140 141 } 142 143 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 144 { 145 if (ref1->root_objectid < ref2->root_objectid) 146 return -1; 147 if (ref1->root_objectid > ref2->root_objectid) 148 return 1; 149 if (ref1->parent < ref2->parent) 150 return -1; 151 if (ref1->parent > ref2->parent) 152 return 1; 153 if (ref1->owner < ref2->owner) 154 return -1; 155 if (ref1->owner > ref2->owner) 156 return 1; 157 if (ref1->offset < ref2->offset) 158 return -1; 159 if (ref1->offset > ref2->offset) 160 return 1; 161 return 0; 162 } 163 164 static struct ref_entry *insert_ref_entry(struct rb_root *root, 165 struct ref_entry *ref) 166 { 167 struct rb_node **p = &root->rb_node; 168 struct rb_node *parent_node = NULL; 169 struct ref_entry *entry; 170 int cmp; 171 172 while (*p) { 173 parent_node = *p; 174 entry = rb_entry(parent_node, struct ref_entry, node); 175 cmp = comp_refs(entry, ref); 176 if (cmp > 0) 177 p = &(*p)->rb_left; 178 else if (cmp < 0) 179 p = &(*p)->rb_right; 180 else 181 return entry; 182 } 183 184 rb_link_node(&ref->node, parent_node, p); 185 rb_insert_color(&ref->node, root); 186 return NULL; 187 188 } 189 190 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 191 { 192 struct rb_node *n; 193 struct root_entry *entry = NULL; 194 195 n = root->rb_node; 196 while (n) { 197 entry = rb_entry(n, struct root_entry, node); 198 if (entry->root_objectid < objectid) 199 n = n->rb_right; 200 else if (entry->root_objectid > objectid) 201 n = n->rb_left; 202 else 203 return entry; 204 } 205 return NULL; 206 } 207 208 #ifdef CONFIG_STACKTRACE 209 static void __save_stack_trace(struct ref_action *ra) 210 { 211 ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2); 212 } 213 214 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 215 struct ref_action *ra) 216 { 217 if (ra->trace_len == 0) { 218 btrfs_err(fs_info, " ref-verify: no stacktrace"); 219 return; 220 } 221 stack_trace_print(ra->trace, ra->trace_len, 2); 222 } 223 #else 224 static inline void __save_stack_trace(struct ref_action *ra) 225 { 226 } 227 228 static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, 229 struct ref_action *ra) 230 { 231 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 232 } 233 #endif 234 235 static void free_block_entry(struct block_entry *be) 236 { 237 struct root_entry *re; 238 struct ref_entry *ref; 239 struct ref_action *ra; 240 struct rb_node *n; 241 242 while ((n = rb_first(&be->roots))) { 243 re = rb_entry(n, struct root_entry, node); 244 rb_erase(&re->node, &be->roots); 245 kfree(re); 246 } 247 248 while((n = rb_first(&be->refs))) { 249 ref = rb_entry(n, struct ref_entry, node); 250 rb_erase(&ref->node, &be->refs); 251 kfree(ref); 252 } 253 254 while (!list_empty(&be->actions)) { 255 ra = list_first_entry(&be->actions, struct ref_action, 256 list); 257 list_del(&ra->list); 258 kfree(ra); 259 } 260 kfree(be); 261 } 262 263 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 264 u64 bytenr, u64 len, 265 u64 root_objectid) 266 { 267 struct block_entry *be = NULL, *exist; 268 struct root_entry *re = NULL; 269 270 re = kzalloc(sizeof(struct root_entry), GFP_NOFS); 271 be = kzalloc(sizeof(struct block_entry), GFP_NOFS); 272 if (!be || !re) { 273 kfree(re); 274 kfree(be); 275 return ERR_PTR(-ENOMEM); 276 } 277 be->bytenr = bytenr; 278 be->len = len; 279 280 re->root_objectid = root_objectid; 281 re->num_refs = 0; 282 283 spin_lock(&fs_info->ref_verify_lock); 284 exist = insert_block_entry(&fs_info->block_tree, be); 285 if (exist) { 286 if (root_objectid) { 287 struct root_entry *exist_re; 288 289 exist_re = insert_root_entry(&exist->roots, re); 290 if (exist_re) 291 kfree(re); 292 } else { 293 kfree(re); 294 } 295 kfree(be); 296 return exist; 297 } 298 299 be->num_refs = 0; 300 be->metadata = 0; 301 be->from_disk = 0; 302 be->roots = RB_ROOT; 303 be->refs = RB_ROOT; 304 INIT_LIST_HEAD(&be->actions); 305 if (root_objectid) 306 insert_root_entry(&be->roots, re); 307 else 308 kfree(re); 309 return be; 310 } 311 312 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 313 u64 parent, u64 bytenr, int level) 314 { 315 struct block_entry *be; 316 struct root_entry *re; 317 struct ref_entry *ref = NULL, *exist; 318 319 ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS); 320 if (!ref) 321 return -ENOMEM; 322 323 if (parent) 324 ref->root_objectid = 0; 325 else 326 ref->root_objectid = ref_root; 327 ref->parent = parent; 328 ref->owner = level; 329 ref->offset = 0; 330 ref->num_refs = 1; 331 332 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 333 if (IS_ERR(be)) { 334 kfree(ref); 335 return PTR_ERR(be); 336 } 337 be->num_refs++; 338 be->from_disk = 1; 339 be->metadata = 1; 340 341 if (!parent) { 342 ASSERT(ref_root); 343 re = lookup_root_entry(&be->roots, ref_root); 344 ASSERT(re); 345 re->num_refs++; 346 } 347 exist = insert_ref_entry(&be->refs, ref); 348 if (exist) { 349 exist->num_refs++; 350 kfree(ref); 351 } 352 spin_unlock(&fs_info->ref_verify_lock); 353 354 return 0; 355 } 356 357 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 358 u64 parent, u32 num_refs, u64 bytenr, 359 u64 num_bytes) 360 { 361 struct block_entry *be; 362 struct ref_entry *ref; 363 364 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 365 if (!ref) 366 return -ENOMEM; 367 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 368 if (IS_ERR(be)) { 369 kfree(ref); 370 return PTR_ERR(be); 371 } 372 be->num_refs += num_refs; 373 374 ref->parent = parent; 375 ref->num_refs = num_refs; 376 if (insert_ref_entry(&be->refs, ref)) { 377 spin_unlock(&fs_info->ref_verify_lock); 378 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 379 kfree(ref); 380 return -EINVAL; 381 } 382 spin_unlock(&fs_info->ref_verify_lock); 383 return 0; 384 } 385 386 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 387 struct extent_buffer *leaf, 388 struct btrfs_extent_data_ref *dref, 389 u64 bytenr, u64 num_bytes) 390 { 391 struct block_entry *be; 392 struct ref_entry *ref; 393 struct root_entry *re; 394 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 395 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 396 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 397 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 398 399 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 400 if (!ref) 401 return -ENOMEM; 402 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 403 if (IS_ERR(be)) { 404 kfree(ref); 405 return PTR_ERR(be); 406 } 407 be->num_refs += num_refs; 408 409 ref->parent = 0; 410 ref->owner = owner; 411 ref->root_objectid = ref_root; 412 ref->offset = offset; 413 ref->num_refs = num_refs; 414 if (insert_ref_entry(&be->refs, ref)) { 415 spin_unlock(&fs_info->ref_verify_lock); 416 btrfs_err(fs_info, "existing ref when reading from disk?"); 417 kfree(ref); 418 return -EINVAL; 419 } 420 421 re = lookup_root_entry(&be->roots, ref_root); 422 if (!re) { 423 spin_unlock(&fs_info->ref_verify_lock); 424 btrfs_err(fs_info, "missing root in new block entry?"); 425 return -EINVAL; 426 } 427 re->num_refs += num_refs; 428 spin_unlock(&fs_info->ref_verify_lock); 429 return 0; 430 } 431 432 static int process_extent_item(struct btrfs_fs_info *fs_info, 433 struct btrfs_path *path, struct btrfs_key *key, 434 int slot, int *tree_block_level) 435 { 436 struct btrfs_extent_item *ei; 437 struct btrfs_extent_inline_ref *iref; 438 struct btrfs_extent_data_ref *dref; 439 struct btrfs_shared_data_ref *sref; 440 struct extent_buffer *leaf = path->nodes[0]; 441 u32 item_size = btrfs_item_size(leaf, slot); 442 unsigned long end, ptr; 443 u64 offset, flags, count; 444 int type; 445 int ret = 0; 446 447 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 448 flags = btrfs_extent_flags(leaf, ei); 449 450 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 451 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 452 struct btrfs_tree_block_info *info; 453 454 info = (struct btrfs_tree_block_info *)(ei + 1); 455 *tree_block_level = btrfs_tree_block_level(leaf, info); 456 iref = (struct btrfs_extent_inline_ref *)(info + 1); 457 } else { 458 if (key->type == BTRFS_METADATA_ITEM_KEY) 459 *tree_block_level = key->offset; 460 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 461 } 462 463 ptr = (unsigned long)iref; 464 end = (unsigned long)ei + item_size; 465 while (ptr < end) { 466 iref = (struct btrfs_extent_inline_ref *)ptr; 467 type = btrfs_extent_inline_ref_type(leaf, iref); 468 offset = btrfs_extent_inline_ref_offset(leaf, iref); 469 switch (type) { 470 case BTRFS_TREE_BLOCK_REF_KEY: 471 ret = add_tree_block(fs_info, offset, 0, key->objectid, 472 *tree_block_level); 473 break; 474 case BTRFS_SHARED_BLOCK_REF_KEY: 475 ret = add_tree_block(fs_info, 0, offset, key->objectid, 476 *tree_block_level); 477 break; 478 case BTRFS_EXTENT_DATA_REF_KEY: 479 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 480 ret = add_extent_data_ref(fs_info, leaf, dref, 481 key->objectid, key->offset); 482 break; 483 case BTRFS_SHARED_DATA_REF_KEY: 484 sref = (struct btrfs_shared_data_ref *)(iref + 1); 485 count = btrfs_shared_data_ref_count(leaf, sref); 486 ret = add_shared_data_ref(fs_info, offset, count, 487 key->objectid, key->offset); 488 break; 489 case BTRFS_EXTENT_OWNER_REF_KEY: 490 if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { 491 btrfs_err(fs_info, 492 "found extent owner ref without simple quotas enabled"); 493 ret = -EINVAL; 494 } 495 break; 496 default: 497 btrfs_err(fs_info, "invalid key type in iref"); 498 ret = -EINVAL; 499 break; 500 } 501 if (ret) 502 break; 503 ptr += btrfs_extent_inline_ref_size(type); 504 } 505 return ret; 506 } 507 508 static int process_leaf(struct btrfs_root *root, 509 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes, 510 int *tree_block_level) 511 { 512 struct btrfs_fs_info *fs_info = root->fs_info; 513 struct extent_buffer *leaf = path->nodes[0]; 514 struct btrfs_extent_data_ref *dref; 515 struct btrfs_shared_data_ref *sref; 516 u32 count; 517 int i = 0, ret = 0; 518 struct btrfs_key key; 519 int nritems = btrfs_header_nritems(leaf); 520 521 for (i = 0; i < nritems; i++) { 522 btrfs_item_key_to_cpu(leaf, &key, i); 523 switch (key.type) { 524 case BTRFS_EXTENT_ITEM_KEY: 525 *num_bytes = key.offset; 526 fallthrough; 527 case BTRFS_METADATA_ITEM_KEY: 528 *bytenr = key.objectid; 529 ret = process_extent_item(fs_info, path, &key, i, 530 tree_block_level); 531 break; 532 case BTRFS_TREE_BLOCK_REF_KEY: 533 ret = add_tree_block(fs_info, key.offset, 0, 534 key.objectid, *tree_block_level); 535 break; 536 case BTRFS_SHARED_BLOCK_REF_KEY: 537 ret = add_tree_block(fs_info, 0, key.offset, 538 key.objectid, *tree_block_level); 539 break; 540 case BTRFS_EXTENT_DATA_REF_KEY: 541 dref = btrfs_item_ptr(leaf, i, 542 struct btrfs_extent_data_ref); 543 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 544 *num_bytes); 545 break; 546 case BTRFS_SHARED_DATA_REF_KEY: 547 sref = btrfs_item_ptr(leaf, i, 548 struct btrfs_shared_data_ref); 549 count = btrfs_shared_data_ref_count(leaf, sref); 550 ret = add_shared_data_ref(fs_info, key.offset, count, 551 *bytenr, *num_bytes); 552 break; 553 default: 554 break; 555 } 556 if (ret) 557 break; 558 } 559 return ret; 560 } 561 562 /* Walk down to the leaf from the given level */ 563 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 564 int level, u64 *bytenr, u64 *num_bytes, 565 int *tree_block_level) 566 { 567 struct extent_buffer *eb; 568 int ret = 0; 569 570 while (level >= 0) { 571 if (level) { 572 eb = btrfs_read_node_slot(path->nodes[level], 573 path->slots[level]); 574 if (IS_ERR(eb)) 575 return PTR_ERR(eb); 576 btrfs_tree_read_lock(eb); 577 path->nodes[level-1] = eb; 578 path->slots[level-1] = 0; 579 path->locks[level-1] = BTRFS_READ_LOCK; 580 } else { 581 ret = process_leaf(root, path, bytenr, num_bytes, 582 tree_block_level); 583 if (ret) 584 break; 585 } 586 level--; 587 } 588 return ret; 589 } 590 591 /* Walk up to the next node that needs to be processed */ 592 static int walk_up_tree(struct btrfs_path *path, int *level) 593 { 594 int l; 595 596 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 597 if (!path->nodes[l]) 598 continue; 599 if (l) { 600 path->slots[l]++; 601 if (path->slots[l] < 602 btrfs_header_nritems(path->nodes[l])) { 603 *level = l; 604 return 0; 605 } 606 } 607 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 608 free_extent_buffer(path->nodes[l]); 609 path->nodes[l] = NULL; 610 path->slots[l] = 0; 611 path->locks[l] = 0; 612 } 613 614 return 1; 615 } 616 617 static void dump_ref_action(struct btrfs_fs_info *fs_info, 618 struct ref_action *ra) 619 { 620 btrfs_err(fs_info, 621 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 622 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 623 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 624 __print_stack_trace(fs_info, ra); 625 } 626 627 /* 628 * Dumps all the information from the block entry to printk, it's going to be 629 * awesome. 630 */ 631 static void dump_block_entry(struct btrfs_fs_info *fs_info, 632 struct block_entry *be) 633 { 634 struct ref_entry *ref; 635 struct root_entry *re; 636 struct ref_action *ra; 637 struct rb_node *n; 638 639 btrfs_err(fs_info, 640 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 641 be->bytenr, be->len, be->num_refs, be->metadata, 642 be->from_disk); 643 644 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 645 ref = rb_entry(n, struct ref_entry, node); 646 btrfs_err(fs_info, 647 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 648 ref->root_objectid, ref->parent, ref->owner, 649 ref->offset, ref->num_refs); 650 } 651 652 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 653 re = rb_entry(n, struct root_entry, node); 654 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 655 re->root_objectid, re->num_refs); 656 } 657 658 list_for_each_entry(ra, &be->actions, list) 659 dump_ref_action(fs_info, ra); 660 } 661 662 /* 663 * Called when we modify a ref for a bytenr. 664 * 665 * This will add an action item to the given bytenr and do sanity checks to make 666 * sure we haven't messed something up. If we are making a new allocation and 667 * this block entry has history we will delete all previous actions as long as 668 * our sanity checks pass as they are no longer needed. 669 */ 670 int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, 671 struct btrfs_ref *generic_ref) 672 { 673 struct ref_entry *ref = NULL, *exist; 674 struct ref_action *ra = NULL; 675 struct block_entry *be = NULL; 676 struct root_entry *re = NULL; 677 int action = generic_ref->action; 678 int ret = 0; 679 bool metadata; 680 u64 bytenr = generic_ref->bytenr; 681 u64 num_bytes = generic_ref->num_bytes; 682 u64 parent = generic_ref->parent; 683 u64 ref_root = 0; 684 u64 owner = 0; 685 u64 offset = 0; 686 687 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 688 return 0; 689 690 if (generic_ref->type == BTRFS_REF_METADATA) { 691 if (!parent) 692 ref_root = generic_ref->ref_root; 693 owner = generic_ref->tree_ref.level; 694 } else if (!parent) { 695 ref_root = generic_ref->ref_root; 696 owner = generic_ref->data_ref.objectid; 697 offset = generic_ref->data_ref.offset; 698 } 699 metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 700 701 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 702 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 703 if (!ra || !ref) { 704 kfree(ref); 705 kfree(ra); 706 ret = -ENOMEM; 707 goto out; 708 } 709 710 ref->parent = parent; 711 ref->owner = owner; 712 ref->root_objectid = ref_root; 713 ref->offset = offset; 714 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 715 716 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 717 /* 718 * Save the extra info from the delayed ref in the ref action to make it 719 * easier to figure out what is happening. The real ref's we add to the 720 * ref tree need to reflect what we save on disk so it matches any 721 * on-disk refs we pre-loaded. 722 */ 723 ra->ref.owner = owner; 724 ra->ref.offset = offset; 725 ra->ref.root_objectid = ref_root; 726 __save_stack_trace(ra); 727 728 INIT_LIST_HEAD(&ra->list); 729 ra->action = action; 730 ra->root = generic_ref->real_root; 731 732 /* 733 * This is an allocation, preallocate the block_entry in case we haven't 734 * used it before. 735 */ 736 ret = -EINVAL; 737 if (action == BTRFS_ADD_DELAYED_EXTENT) { 738 /* 739 * For subvol_create we'll just pass in whatever the parent root 740 * is and the new root objectid, so let's not treat the passed 741 * in root as if it really has a ref for this bytenr. 742 */ 743 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 744 if (IS_ERR(be)) { 745 kfree(ref); 746 kfree(ra); 747 ret = PTR_ERR(be); 748 goto out; 749 } 750 be->num_refs++; 751 if (metadata) 752 be->metadata = 1; 753 754 if (be->num_refs != 1) { 755 btrfs_err(fs_info, 756 "re-allocated a block that still has references to it!"); 757 dump_block_entry(fs_info, be); 758 dump_ref_action(fs_info, ra); 759 kfree(ref); 760 kfree(ra); 761 goto out_unlock; 762 } 763 764 while (!list_empty(&be->actions)) { 765 struct ref_action *tmp; 766 767 tmp = list_first_entry(&be->actions, struct ref_action, 768 list); 769 list_del(&tmp->list); 770 kfree(tmp); 771 } 772 } else { 773 struct root_entry *tmp; 774 775 if (!parent) { 776 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 777 if (!re) { 778 kfree(ref); 779 kfree(ra); 780 ret = -ENOMEM; 781 goto out; 782 } 783 /* 784 * This is the root that is modifying us, so it's the 785 * one we want to lookup below when we modify the 786 * re->num_refs. 787 */ 788 ref_root = generic_ref->real_root; 789 re->root_objectid = generic_ref->real_root; 790 re->num_refs = 0; 791 } 792 793 spin_lock(&fs_info->ref_verify_lock); 794 be = lookup_block_entry(&fs_info->block_tree, bytenr); 795 if (!be) { 796 btrfs_err(fs_info, 797 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 798 action, bytenr, num_bytes); 799 dump_ref_action(fs_info, ra); 800 kfree(ref); 801 kfree(ra); 802 kfree(re); 803 goto out_unlock; 804 } else if (be->num_refs == 0) { 805 btrfs_err(fs_info, 806 "trying to do action %d for a bytenr that has 0 total references", 807 action); 808 dump_block_entry(fs_info, be); 809 dump_ref_action(fs_info, ra); 810 kfree(ref); 811 kfree(ra); 812 kfree(re); 813 goto out_unlock; 814 } 815 816 if (!parent) { 817 tmp = insert_root_entry(&be->roots, re); 818 if (tmp) { 819 kfree(re); 820 re = tmp; 821 } 822 } 823 } 824 825 exist = insert_ref_entry(&be->refs, ref); 826 if (exist) { 827 if (action == BTRFS_DROP_DELAYED_REF) { 828 if (exist->num_refs == 0) { 829 btrfs_err(fs_info, 830 "dropping a ref for a existing root that doesn't have a ref on the block"); 831 dump_block_entry(fs_info, be); 832 dump_ref_action(fs_info, ra); 833 kfree(ref); 834 kfree(ra); 835 goto out_unlock; 836 } 837 exist->num_refs--; 838 if (exist->num_refs == 0) { 839 rb_erase(&exist->node, &be->refs); 840 kfree(exist); 841 } 842 } else if (!be->metadata) { 843 exist->num_refs++; 844 } else { 845 btrfs_err(fs_info, 846 "attempting to add another ref for an existing ref on a tree block"); 847 dump_block_entry(fs_info, be); 848 dump_ref_action(fs_info, ra); 849 kfree(ref); 850 kfree(ra); 851 goto out_unlock; 852 } 853 kfree(ref); 854 } else { 855 if (action == BTRFS_DROP_DELAYED_REF) { 856 btrfs_err(fs_info, 857 "dropping a ref for a root that doesn't have a ref on the block"); 858 dump_block_entry(fs_info, be); 859 dump_ref_action(fs_info, ra); 860 rb_erase(&ref->node, &be->refs); 861 kfree(ref); 862 kfree(ra); 863 goto out_unlock; 864 } 865 } 866 867 if (!parent && !re) { 868 re = lookup_root_entry(&be->roots, ref_root); 869 if (!re) { 870 /* 871 * This shouldn't happen because we will add our re 872 * above when we lookup the be with !parent, but just in 873 * case catch this case so we don't panic because I 874 * didn't think of some other corner case. 875 */ 876 btrfs_err(fs_info, "failed to find root %llu for %llu", 877 generic_ref->real_root, be->bytenr); 878 dump_block_entry(fs_info, be); 879 dump_ref_action(fs_info, ra); 880 kfree(ra); 881 goto out_unlock; 882 } 883 } 884 if (action == BTRFS_DROP_DELAYED_REF) { 885 if (re) 886 re->num_refs--; 887 be->num_refs--; 888 } else if (action == BTRFS_ADD_DELAYED_REF) { 889 be->num_refs++; 890 if (re) 891 re->num_refs++; 892 } 893 list_add_tail(&ra->list, &be->actions); 894 ret = 0; 895 out_unlock: 896 spin_unlock(&fs_info->ref_verify_lock); 897 out: 898 if (ret) { 899 btrfs_free_ref_cache(fs_info); 900 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 901 } 902 return ret; 903 } 904 905 /* Free up the ref cache */ 906 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 907 { 908 struct block_entry *be; 909 struct rb_node *n; 910 911 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 912 return; 913 914 spin_lock(&fs_info->ref_verify_lock); 915 while ((n = rb_first(&fs_info->block_tree))) { 916 be = rb_entry(n, struct block_entry, node); 917 rb_erase(&be->node, &fs_info->block_tree); 918 free_block_entry(be); 919 cond_resched_lock(&fs_info->ref_verify_lock); 920 } 921 spin_unlock(&fs_info->ref_verify_lock); 922 } 923 924 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 925 u64 len) 926 { 927 struct block_entry *be = NULL, *entry; 928 struct rb_node *n; 929 930 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 931 return; 932 933 spin_lock(&fs_info->ref_verify_lock); 934 n = fs_info->block_tree.rb_node; 935 while (n) { 936 entry = rb_entry(n, struct block_entry, node); 937 if (entry->bytenr < start) { 938 n = n->rb_right; 939 } else if (entry->bytenr > start) { 940 n = n->rb_left; 941 } else { 942 be = entry; 943 break; 944 } 945 /* We want to get as close to start as possible */ 946 if (be == NULL || 947 (entry->bytenr < start && be->bytenr > start) || 948 (entry->bytenr < start && entry->bytenr > be->bytenr)) 949 be = entry; 950 } 951 952 /* 953 * Could have an empty block group, maybe have something to check for 954 * this case to verify we were actually empty? 955 */ 956 if (!be) { 957 spin_unlock(&fs_info->ref_verify_lock); 958 return; 959 } 960 961 n = &be->node; 962 while (n) { 963 be = rb_entry(n, struct block_entry, node); 964 n = rb_next(n); 965 if (be->bytenr < start && be->bytenr + be->len > start) { 966 btrfs_err(fs_info, 967 "block entry overlaps a block group [%llu,%llu]!", 968 start, len); 969 dump_block_entry(fs_info, be); 970 continue; 971 } 972 if (be->bytenr < start) 973 continue; 974 if (be->bytenr >= start + len) 975 break; 976 if (be->bytenr + be->len > start + len) { 977 btrfs_err(fs_info, 978 "block entry overlaps a block group [%llu,%llu]!", 979 start, len); 980 dump_block_entry(fs_info, be); 981 } 982 rb_erase(&be->node, &fs_info->block_tree); 983 free_block_entry(be); 984 } 985 spin_unlock(&fs_info->ref_verify_lock); 986 } 987 988 /* Walk down all roots and build the ref tree, meant to be called at mount */ 989 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 990 { 991 struct btrfs_root *extent_root; 992 struct btrfs_path *path; 993 struct extent_buffer *eb; 994 int tree_block_level = 0; 995 u64 bytenr = 0, num_bytes = 0; 996 int ret, level; 997 998 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 999 return 0; 1000 1001 path = btrfs_alloc_path(); 1002 if (!path) 1003 return -ENOMEM; 1004 1005 extent_root = btrfs_extent_root(fs_info, 0); 1006 eb = btrfs_read_lock_root_node(extent_root); 1007 level = btrfs_header_level(eb); 1008 path->nodes[level] = eb; 1009 path->slots[level] = 0; 1010 path->locks[level] = BTRFS_READ_LOCK; 1011 1012 while (1) { 1013 /* 1014 * We have to keep track of the bytenr/num_bytes we last hit 1015 * because we could have run out of space for an inline ref, and 1016 * would have had to added a ref key item which may appear on a 1017 * different leaf from the original extent item. 1018 */ 1019 ret = walk_down_tree(extent_root, path, level, 1020 &bytenr, &num_bytes, &tree_block_level); 1021 if (ret) 1022 break; 1023 ret = walk_up_tree(path, &level); 1024 if (ret < 0) 1025 break; 1026 if (ret > 0) { 1027 ret = 0; 1028 break; 1029 } 1030 } 1031 if (ret) { 1032 btrfs_free_ref_cache(fs_info); 1033 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1034 } 1035 btrfs_free_path(path); 1036 return ret; 1037 } 1038