1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2014 Facebook. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/stacktrace.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "delayed-ref.h" 13 #include "ref-verify.h" 14 #include "fs.h" 15 #include "accessors.h" 16 17 /* 18 * Used to keep track the roots and number of refs each root has for a given 19 * bytenr. This just tracks the number of direct references, no shared 20 * references. 21 */ 22 struct root_entry { 23 u64 root_objectid; 24 u64 num_refs; 25 struct rb_node node; 26 }; 27 28 /* 29 * These are meant to represent what should exist in the extent tree, these can 30 * be used to verify the extent tree is consistent as these should all match 31 * what the extent tree says. 32 */ 33 struct ref_entry { 34 u64 root_objectid; 35 u64 parent; 36 u64 owner; 37 u64 offset; 38 u64 num_refs; 39 struct rb_node node; 40 }; 41 42 #define MAX_TRACE 16 43 44 /* 45 * Whenever we add/remove a reference we record the action. The action maps 46 * back to the delayed ref action. We hold the ref we are changing in the 47 * action so we can account for the history properly, and we record the root we 48 * were called with since it could be different from ref_root. We also store 49 * stack traces because that's how I roll. 50 */ 51 struct ref_action { 52 int action; 53 u64 root; 54 struct ref_entry ref; 55 struct list_head list; 56 unsigned long trace[MAX_TRACE]; 57 unsigned int trace_len; 58 }; 59 60 /* 61 * One of these for every block we reference, it holds the roots and references 62 * to it as well as all of the ref actions that have occurred to it. We never 63 * free it until we unmount the file system in order to make sure re-allocations 64 * are happening properly. 65 */ 66 struct block_entry { 67 u64 bytenr; 68 u64 len; 69 u64 num_refs; 70 int metadata; 71 int from_disk; 72 struct rb_root roots; 73 struct rb_root refs; 74 struct rb_node node; 75 struct list_head actions; 76 }; 77 78 static struct block_entry *insert_block_entry(struct rb_root *root, 79 struct block_entry *be) 80 { 81 struct rb_node **p = &root->rb_node; 82 struct rb_node *parent_node = NULL; 83 struct block_entry *entry; 84 85 while (*p) { 86 parent_node = *p; 87 entry = rb_entry(parent_node, struct block_entry, node); 88 if (entry->bytenr > be->bytenr) 89 p = &(*p)->rb_left; 90 else if (entry->bytenr < be->bytenr) 91 p = &(*p)->rb_right; 92 else 93 return entry; 94 } 95 96 rb_link_node(&be->node, parent_node, p); 97 rb_insert_color(&be->node, root); 98 return NULL; 99 } 100 101 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 102 { 103 struct rb_node *n; 104 struct block_entry *entry = NULL; 105 106 n = root->rb_node; 107 while (n) { 108 entry = rb_entry(n, struct block_entry, node); 109 if (entry->bytenr < bytenr) 110 n = n->rb_right; 111 else if (entry->bytenr > bytenr) 112 n = n->rb_left; 113 else 114 return entry; 115 } 116 return NULL; 117 } 118 119 static struct root_entry *insert_root_entry(struct rb_root *root, 120 struct root_entry *re) 121 { 122 struct rb_node **p = &root->rb_node; 123 struct rb_node *parent_node = NULL; 124 struct root_entry *entry; 125 126 while (*p) { 127 parent_node = *p; 128 entry = rb_entry(parent_node, struct root_entry, node); 129 if (entry->root_objectid > re->root_objectid) 130 p = &(*p)->rb_left; 131 else if (entry->root_objectid < re->root_objectid) 132 p = &(*p)->rb_right; 133 else 134 return entry; 135 } 136 137 rb_link_node(&re->node, parent_node, p); 138 rb_insert_color(&re->node, root); 139 return NULL; 140 141 } 142 143 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 144 { 145 if (ref1->root_objectid < ref2->root_objectid) 146 return -1; 147 if (ref1->root_objectid > ref2->root_objectid) 148 return 1; 149 if (ref1->parent < ref2->parent) 150 return -1; 151 if (ref1->parent > ref2->parent) 152 return 1; 153 if (ref1->owner < ref2->owner) 154 return -1; 155 if (ref1->owner > ref2->owner) 156 return 1; 157 if (ref1->offset < ref2->offset) 158 return -1; 159 if (ref1->offset > ref2->offset) 160 return 1; 161 return 0; 162 } 163 164 static struct ref_entry *insert_ref_entry(struct rb_root *root, 165 struct ref_entry *ref) 166 { 167 struct rb_node **p = &root->rb_node; 168 struct rb_node *parent_node = NULL; 169 struct ref_entry *entry; 170 int cmp; 171 172 while (*p) { 173 parent_node = *p; 174 entry = rb_entry(parent_node, struct ref_entry, node); 175 cmp = comp_refs(entry, ref); 176 if (cmp > 0) 177 p = &(*p)->rb_left; 178 else if (cmp < 0) 179 p = &(*p)->rb_right; 180 else 181 return entry; 182 } 183 184 rb_link_node(&ref->node, parent_node, p); 185 rb_insert_color(&ref->node, root); 186 return NULL; 187 188 } 189 190 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 191 { 192 struct rb_node *n; 193 struct root_entry *entry = NULL; 194 195 n = root->rb_node; 196 while (n) { 197 entry = rb_entry(n, struct root_entry, node); 198 if (entry->root_objectid < objectid) 199 n = n->rb_right; 200 else if (entry->root_objectid > objectid) 201 n = n->rb_left; 202 else 203 return entry; 204 } 205 return NULL; 206 } 207 208 #ifdef CONFIG_STACKTRACE 209 static void __save_stack_trace(struct ref_action *ra) 210 { 211 ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2); 212 } 213 214 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 215 struct ref_action *ra) 216 { 217 if (ra->trace_len == 0) { 218 btrfs_err(fs_info, " ref-verify: no stacktrace"); 219 return; 220 } 221 stack_trace_print(ra->trace, ra->trace_len, 2); 222 } 223 #else 224 static inline void __save_stack_trace(struct ref_action *ra) 225 { 226 } 227 228 static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, 229 struct ref_action *ra) 230 { 231 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 232 } 233 #endif 234 235 static void free_block_entry(struct block_entry *be) 236 { 237 struct root_entry *re; 238 struct ref_entry *ref; 239 struct ref_action *ra; 240 struct rb_node *n; 241 242 while ((n = rb_first(&be->roots))) { 243 re = rb_entry(n, struct root_entry, node); 244 rb_erase(&re->node, &be->roots); 245 kfree(re); 246 } 247 248 while((n = rb_first(&be->refs))) { 249 ref = rb_entry(n, struct ref_entry, node); 250 rb_erase(&ref->node, &be->refs); 251 kfree(ref); 252 } 253 254 while (!list_empty(&be->actions)) { 255 ra = list_first_entry(&be->actions, struct ref_action, 256 list); 257 list_del(&ra->list); 258 kfree(ra); 259 } 260 kfree(be); 261 } 262 263 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 264 u64 bytenr, u64 len, 265 u64 root_objectid) 266 { 267 struct block_entry *be = NULL, *exist; 268 struct root_entry *re = NULL; 269 270 re = kzalloc(sizeof(struct root_entry), GFP_NOFS); 271 be = kzalloc(sizeof(struct block_entry), GFP_NOFS); 272 if (!be || !re) { 273 kfree(re); 274 kfree(be); 275 return ERR_PTR(-ENOMEM); 276 } 277 be->bytenr = bytenr; 278 be->len = len; 279 280 re->root_objectid = root_objectid; 281 re->num_refs = 0; 282 283 spin_lock(&fs_info->ref_verify_lock); 284 exist = insert_block_entry(&fs_info->block_tree, be); 285 if (exist) { 286 if (root_objectid) { 287 struct root_entry *exist_re; 288 289 exist_re = insert_root_entry(&exist->roots, re); 290 if (exist_re) 291 kfree(re); 292 } else { 293 kfree(re); 294 } 295 kfree(be); 296 return exist; 297 } 298 299 be->num_refs = 0; 300 be->metadata = 0; 301 be->from_disk = 0; 302 be->roots = RB_ROOT; 303 be->refs = RB_ROOT; 304 INIT_LIST_HEAD(&be->actions); 305 if (root_objectid) 306 insert_root_entry(&be->roots, re); 307 else 308 kfree(re); 309 return be; 310 } 311 312 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 313 u64 parent, u64 bytenr, int level) 314 { 315 struct block_entry *be; 316 struct root_entry *re; 317 struct ref_entry *ref = NULL, *exist; 318 319 ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS); 320 if (!ref) 321 return -ENOMEM; 322 323 if (parent) 324 ref->root_objectid = 0; 325 else 326 ref->root_objectid = ref_root; 327 ref->parent = parent; 328 ref->owner = level; 329 ref->offset = 0; 330 ref->num_refs = 1; 331 332 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 333 if (IS_ERR(be)) { 334 kfree(ref); 335 return PTR_ERR(be); 336 } 337 be->num_refs++; 338 be->from_disk = 1; 339 be->metadata = 1; 340 341 if (!parent) { 342 ASSERT(ref_root); 343 re = lookup_root_entry(&be->roots, ref_root); 344 ASSERT(re); 345 re->num_refs++; 346 } 347 exist = insert_ref_entry(&be->refs, ref); 348 if (exist) { 349 exist->num_refs++; 350 kfree(ref); 351 } 352 spin_unlock(&fs_info->ref_verify_lock); 353 354 return 0; 355 } 356 357 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 358 u64 parent, u32 num_refs, u64 bytenr, 359 u64 num_bytes) 360 { 361 struct block_entry *be; 362 struct ref_entry *ref; 363 364 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 365 if (!ref) 366 return -ENOMEM; 367 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 368 if (IS_ERR(be)) { 369 kfree(ref); 370 return PTR_ERR(be); 371 } 372 be->num_refs += num_refs; 373 374 ref->parent = parent; 375 ref->num_refs = num_refs; 376 if (insert_ref_entry(&be->refs, ref)) { 377 spin_unlock(&fs_info->ref_verify_lock); 378 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 379 kfree(ref); 380 return -EINVAL; 381 } 382 spin_unlock(&fs_info->ref_verify_lock); 383 return 0; 384 } 385 386 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 387 struct extent_buffer *leaf, 388 struct btrfs_extent_data_ref *dref, 389 u64 bytenr, u64 num_bytes) 390 { 391 struct block_entry *be; 392 struct ref_entry *ref; 393 struct root_entry *re; 394 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 395 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 396 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 397 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 398 399 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 400 if (!ref) 401 return -ENOMEM; 402 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 403 if (IS_ERR(be)) { 404 kfree(ref); 405 return PTR_ERR(be); 406 } 407 be->num_refs += num_refs; 408 409 ref->parent = 0; 410 ref->owner = owner; 411 ref->root_objectid = ref_root; 412 ref->offset = offset; 413 ref->num_refs = num_refs; 414 if (insert_ref_entry(&be->refs, ref)) { 415 spin_unlock(&fs_info->ref_verify_lock); 416 btrfs_err(fs_info, "existing ref when reading from disk?"); 417 kfree(ref); 418 return -EINVAL; 419 } 420 421 re = lookup_root_entry(&be->roots, ref_root); 422 if (!re) { 423 spin_unlock(&fs_info->ref_verify_lock); 424 btrfs_err(fs_info, "missing root in new block entry?"); 425 return -EINVAL; 426 } 427 re->num_refs += num_refs; 428 spin_unlock(&fs_info->ref_verify_lock); 429 return 0; 430 } 431 432 static int process_extent_item(struct btrfs_fs_info *fs_info, 433 struct btrfs_path *path, struct btrfs_key *key, 434 int slot, int *tree_block_level) 435 { 436 struct btrfs_extent_item *ei; 437 struct btrfs_extent_inline_ref *iref; 438 struct btrfs_extent_data_ref *dref; 439 struct btrfs_shared_data_ref *sref; 440 struct extent_buffer *leaf = path->nodes[0]; 441 u32 item_size = btrfs_item_size(leaf, slot); 442 unsigned long end, ptr; 443 u64 offset, flags, count; 444 int type, ret; 445 446 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 447 flags = btrfs_extent_flags(leaf, ei); 448 449 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 450 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 451 struct btrfs_tree_block_info *info; 452 453 info = (struct btrfs_tree_block_info *)(ei + 1); 454 *tree_block_level = btrfs_tree_block_level(leaf, info); 455 iref = (struct btrfs_extent_inline_ref *)(info + 1); 456 } else { 457 if (key->type == BTRFS_METADATA_ITEM_KEY) 458 *tree_block_level = key->offset; 459 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 460 } 461 462 ptr = (unsigned long)iref; 463 end = (unsigned long)ei + item_size; 464 while (ptr < end) { 465 iref = (struct btrfs_extent_inline_ref *)ptr; 466 type = btrfs_extent_inline_ref_type(leaf, iref); 467 offset = btrfs_extent_inline_ref_offset(leaf, iref); 468 switch (type) { 469 case BTRFS_TREE_BLOCK_REF_KEY: 470 ret = add_tree_block(fs_info, offset, 0, key->objectid, 471 *tree_block_level); 472 break; 473 case BTRFS_SHARED_BLOCK_REF_KEY: 474 ret = add_tree_block(fs_info, 0, offset, key->objectid, 475 *tree_block_level); 476 break; 477 case BTRFS_EXTENT_DATA_REF_KEY: 478 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 479 ret = add_extent_data_ref(fs_info, leaf, dref, 480 key->objectid, key->offset); 481 break; 482 case BTRFS_SHARED_DATA_REF_KEY: 483 sref = (struct btrfs_shared_data_ref *)(iref + 1); 484 count = btrfs_shared_data_ref_count(leaf, sref); 485 ret = add_shared_data_ref(fs_info, offset, count, 486 key->objectid, key->offset); 487 break; 488 case BTRFS_EXTENT_OWNER_REF_KEY: 489 WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); 490 break; 491 default: 492 btrfs_err(fs_info, "invalid key type in iref"); 493 ret = -EINVAL; 494 break; 495 } 496 if (ret) 497 break; 498 ptr += btrfs_extent_inline_ref_size(type); 499 } 500 return ret; 501 } 502 503 static int process_leaf(struct btrfs_root *root, 504 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes, 505 int *tree_block_level) 506 { 507 struct btrfs_fs_info *fs_info = root->fs_info; 508 struct extent_buffer *leaf = path->nodes[0]; 509 struct btrfs_extent_data_ref *dref; 510 struct btrfs_shared_data_ref *sref; 511 u32 count; 512 int i = 0, ret = 0; 513 struct btrfs_key key; 514 int nritems = btrfs_header_nritems(leaf); 515 516 for (i = 0; i < nritems; i++) { 517 btrfs_item_key_to_cpu(leaf, &key, i); 518 switch (key.type) { 519 case BTRFS_EXTENT_ITEM_KEY: 520 *num_bytes = key.offset; 521 fallthrough; 522 case BTRFS_METADATA_ITEM_KEY: 523 *bytenr = key.objectid; 524 ret = process_extent_item(fs_info, path, &key, i, 525 tree_block_level); 526 break; 527 case BTRFS_TREE_BLOCK_REF_KEY: 528 ret = add_tree_block(fs_info, key.offset, 0, 529 key.objectid, *tree_block_level); 530 break; 531 case BTRFS_SHARED_BLOCK_REF_KEY: 532 ret = add_tree_block(fs_info, 0, key.offset, 533 key.objectid, *tree_block_level); 534 break; 535 case BTRFS_EXTENT_DATA_REF_KEY: 536 dref = btrfs_item_ptr(leaf, i, 537 struct btrfs_extent_data_ref); 538 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 539 *num_bytes); 540 break; 541 case BTRFS_SHARED_DATA_REF_KEY: 542 sref = btrfs_item_ptr(leaf, i, 543 struct btrfs_shared_data_ref); 544 count = btrfs_shared_data_ref_count(leaf, sref); 545 ret = add_shared_data_ref(fs_info, key.offset, count, 546 *bytenr, *num_bytes); 547 break; 548 default: 549 break; 550 } 551 if (ret) 552 break; 553 } 554 return ret; 555 } 556 557 /* Walk down to the leaf from the given level */ 558 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 559 int level, u64 *bytenr, u64 *num_bytes, 560 int *tree_block_level) 561 { 562 struct extent_buffer *eb; 563 int ret = 0; 564 565 while (level >= 0) { 566 if (level) { 567 eb = btrfs_read_node_slot(path->nodes[level], 568 path->slots[level]); 569 if (IS_ERR(eb)) 570 return PTR_ERR(eb); 571 btrfs_tree_read_lock(eb); 572 path->nodes[level-1] = eb; 573 path->slots[level-1] = 0; 574 path->locks[level-1] = BTRFS_READ_LOCK; 575 } else { 576 ret = process_leaf(root, path, bytenr, num_bytes, 577 tree_block_level); 578 if (ret) 579 break; 580 } 581 level--; 582 } 583 return ret; 584 } 585 586 /* Walk up to the next node that needs to be processed */ 587 static int walk_up_tree(struct btrfs_path *path, int *level) 588 { 589 int l; 590 591 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 592 if (!path->nodes[l]) 593 continue; 594 if (l) { 595 path->slots[l]++; 596 if (path->slots[l] < 597 btrfs_header_nritems(path->nodes[l])) { 598 *level = l; 599 return 0; 600 } 601 } 602 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 603 free_extent_buffer(path->nodes[l]); 604 path->nodes[l] = NULL; 605 path->slots[l] = 0; 606 path->locks[l] = 0; 607 } 608 609 return 1; 610 } 611 612 static void dump_ref_action(struct btrfs_fs_info *fs_info, 613 struct ref_action *ra) 614 { 615 btrfs_err(fs_info, 616 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 617 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 618 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 619 __print_stack_trace(fs_info, ra); 620 } 621 622 /* 623 * Dumps all the information from the block entry to printk, it's going to be 624 * awesome. 625 */ 626 static void dump_block_entry(struct btrfs_fs_info *fs_info, 627 struct block_entry *be) 628 { 629 struct ref_entry *ref; 630 struct root_entry *re; 631 struct ref_action *ra; 632 struct rb_node *n; 633 634 btrfs_err(fs_info, 635 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 636 be->bytenr, be->len, be->num_refs, be->metadata, 637 be->from_disk); 638 639 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 640 ref = rb_entry(n, struct ref_entry, node); 641 btrfs_err(fs_info, 642 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 643 ref->root_objectid, ref->parent, ref->owner, 644 ref->offset, ref->num_refs); 645 } 646 647 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 648 re = rb_entry(n, struct root_entry, node); 649 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 650 re->root_objectid, re->num_refs); 651 } 652 653 list_for_each_entry(ra, &be->actions, list) 654 dump_ref_action(fs_info, ra); 655 } 656 657 /* 658 * Called when we modify a ref for a bytenr. 659 * 660 * This will add an action item to the given bytenr and do sanity checks to make 661 * sure we haven't messed something up. If we are making a new allocation and 662 * this block entry has history we will delete all previous actions as long as 663 * our sanity checks pass as they are no longer needed. 664 */ 665 int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, 666 struct btrfs_ref *generic_ref) 667 { 668 struct ref_entry *ref = NULL, *exist; 669 struct ref_action *ra = NULL; 670 struct block_entry *be = NULL; 671 struct root_entry *re = NULL; 672 int action = generic_ref->action; 673 int ret = 0; 674 bool metadata; 675 u64 bytenr = generic_ref->bytenr; 676 u64 num_bytes = generic_ref->len; 677 u64 parent = generic_ref->parent; 678 u64 ref_root = 0; 679 u64 owner = 0; 680 u64 offset = 0; 681 682 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 683 return 0; 684 685 if (generic_ref->type == BTRFS_REF_METADATA) { 686 if (!parent) 687 ref_root = generic_ref->tree_ref.ref_root; 688 owner = generic_ref->tree_ref.level; 689 } else if (!parent) { 690 ref_root = generic_ref->data_ref.ref_root; 691 owner = generic_ref->data_ref.ino; 692 offset = generic_ref->data_ref.offset; 693 } 694 metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 695 696 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 697 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 698 if (!ra || !ref) { 699 kfree(ref); 700 kfree(ra); 701 ret = -ENOMEM; 702 goto out; 703 } 704 705 ref->parent = parent; 706 ref->owner = owner; 707 ref->root_objectid = ref_root; 708 ref->offset = offset; 709 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 710 711 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 712 /* 713 * Save the extra info from the delayed ref in the ref action to make it 714 * easier to figure out what is happening. The real ref's we add to the 715 * ref tree need to reflect what we save on disk so it matches any 716 * on-disk refs we pre-loaded. 717 */ 718 ra->ref.owner = owner; 719 ra->ref.offset = offset; 720 ra->ref.root_objectid = ref_root; 721 __save_stack_trace(ra); 722 723 INIT_LIST_HEAD(&ra->list); 724 ra->action = action; 725 ra->root = generic_ref->real_root; 726 727 /* 728 * This is an allocation, preallocate the block_entry in case we haven't 729 * used it before. 730 */ 731 ret = -EINVAL; 732 if (action == BTRFS_ADD_DELAYED_EXTENT) { 733 /* 734 * For subvol_create we'll just pass in whatever the parent root 735 * is and the new root objectid, so let's not treat the passed 736 * in root as if it really has a ref for this bytenr. 737 */ 738 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 739 if (IS_ERR(be)) { 740 kfree(ref); 741 kfree(ra); 742 ret = PTR_ERR(be); 743 goto out; 744 } 745 be->num_refs++; 746 if (metadata) 747 be->metadata = 1; 748 749 if (be->num_refs != 1) { 750 btrfs_err(fs_info, 751 "re-allocated a block that still has references to it!"); 752 dump_block_entry(fs_info, be); 753 dump_ref_action(fs_info, ra); 754 kfree(ref); 755 kfree(ra); 756 goto out_unlock; 757 } 758 759 while (!list_empty(&be->actions)) { 760 struct ref_action *tmp; 761 762 tmp = list_first_entry(&be->actions, struct ref_action, 763 list); 764 list_del(&tmp->list); 765 kfree(tmp); 766 } 767 } else { 768 struct root_entry *tmp; 769 770 if (!parent) { 771 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 772 if (!re) { 773 kfree(ref); 774 kfree(ra); 775 ret = -ENOMEM; 776 goto out; 777 } 778 /* 779 * This is the root that is modifying us, so it's the 780 * one we want to lookup below when we modify the 781 * re->num_refs. 782 */ 783 ref_root = generic_ref->real_root; 784 re->root_objectid = generic_ref->real_root; 785 re->num_refs = 0; 786 } 787 788 spin_lock(&fs_info->ref_verify_lock); 789 be = lookup_block_entry(&fs_info->block_tree, bytenr); 790 if (!be) { 791 btrfs_err(fs_info, 792 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 793 action, bytenr, num_bytes); 794 dump_ref_action(fs_info, ra); 795 kfree(ref); 796 kfree(ra); 797 kfree(re); 798 goto out_unlock; 799 } else if (be->num_refs == 0) { 800 btrfs_err(fs_info, 801 "trying to do action %d for a bytenr that has 0 total references", 802 action); 803 dump_block_entry(fs_info, be); 804 dump_ref_action(fs_info, ra); 805 kfree(ref); 806 kfree(ra); 807 kfree(re); 808 goto out_unlock; 809 } 810 811 if (!parent) { 812 tmp = insert_root_entry(&be->roots, re); 813 if (tmp) { 814 kfree(re); 815 re = tmp; 816 } 817 } 818 } 819 820 exist = insert_ref_entry(&be->refs, ref); 821 if (exist) { 822 if (action == BTRFS_DROP_DELAYED_REF) { 823 if (exist->num_refs == 0) { 824 btrfs_err(fs_info, 825 "dropping a ref for a existing root that doesn't have a ref on the block"); 826 dump_block_entry(fs_info, be); 827 dump_ref_action(fs_info, ra); 828 kfree(ref); 829 kfree(ra); 830 goto out_unlock; 831 } 832 exist->num_refs--; 833 if (exist->num_refs == 0) { 834 rb_erase(&exist->node, &be->refs); 835 kfree(exist); 836 } 837 } else if (!be->metadata) { 838 exist->num_refs++; 839 } else { 840 btrfs_err(fs_info, 841 "attempting to add another ref for an existing ref on a tree block"); 842 dump_block_entry(fs_info, be); 843 dump_ref_action(fs_info, ra); 844 kfree(ref); 845 kfree(ra); 846 goto out_unlock; 847 } 848 kfree(ref); 849 } else { 850 if (action == BTRFS_DROP_DELAYED_REF) { 851 btrfs_err(fs_info, 852 "dropping a ref for a root that doesn't have a ref on the block"); 853 dump_block_entry(fs_info, be); 854 dump_ref_action(fs_info, ra); 855 kfree(ref); 856 kfree(ra); 857 goto out_unlock; 858 } 859 } 860 861 if (!parent && !re) { 862 re = lookup_root_entry(&be->roots, ref_root); 863 if (!re) { 864 /* 865 * This shouldn't happen because we will add our re 866 * above when we lookup the be with !parent, but just in 867 * case catch this case so we don't panic because I 868 * didn't think of some other corner case. 869 */ 870 btrfs_err(fs_info, "failed to find root %llu for %llu", 871 generic_ref->real_root, be->bytenr); 872 dump_block_entry(fs_info, be); 873 dump_ref_action(fs_info, ra); 874 kfree(ra); 875 goto out_unlock; 876 } 877 } 878 if (action == BTRFS_DROP_DELAYED_REF) { 879 if (re) 880 re->num_refs--; 881 be->num_refs--; 882 } else if (action == BTRFS_ADD_DELAYED_REF) { 883 be->num_refs++; 884 if (re) 885 re->num_refs++; 886 } 887 list_add_tail(&ra->list, &be->actions); 888 ret = 0; 889 out_unlock: 890 spin_unlock(&fs_info->ref_verify_lock); 891 out: 892 if (ret) 893 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 894 return ret; 895 } 896 897 /* Free up the ref cache */ 898 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 899 { 900 struct block_entry *be; 901 struct rb_node *n; 902 903 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 904 return; 905 906 spin_lock(&fs_info->ref_verify_lock); 907 while ((n = rb_first(&fs_info->block_tree))) { 908 be = rb_entry(n, struct block_entry, node); 909 rb_erase(&be->node, &fs_info->block_tree); 910 free_block_entry(be); 911 cond_resched_lock(&fs_info->ref_verify_lock); 912 } 913 spin_unlock(&fs_info->ref_verify_lock); 914 } 915 916 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 917 u64 len) 918 { 919 struct block_entry *be = NULL, *entry; 920 struct rb_node *n; 921 922 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 923 return; 924 925 spin_lock(&fs_info->ref_verify_lock); 926 n = fs_info->block_tree.rb_node; 927 while (n) { 928 entry = rb_entry(n, struct block_entry, node); 929 if (entry->bytenr < start) { 930 n = n->rb_right; 931 } else if (entry->bytenr > start) { 932 n = n->rb_left; 933 } else { 934 be = entry; 935 break; 936 } 937 /* We want to get as close to start as possible */ 938 if (be == NULL || 939 (entry->bytenr < start && be->bytenr > start) || 940 (entry->bytenr < start && entry->bytenr > be->bytenr)) 941 be = entry; 942 } 943 944 /* 945 * Could have an empty block group, maybe have something to check for 946 * this case to verify we were actually empty? 947 */ 948 if (!be) { 949 spin_unlock(&fs_info->ref_verify_lock); 950 return; 951 } 952 953 n = &be->node; 954 while (n) { 955 be = rb_entry(n, struct block_entry, node); 956 n = rb_next(n); 957 if (be->bytenr < start && be->bytenr + be->len > start) { 958 btrfs_err(fs_info, 959 "block entry overlaps a block group [%llu,%llu]!", 960 start, len); 961 dump_block_entry(fs_info, be); 962 continue; 963 } 964 if (be->bytenr < start) 965 continue; 966 if (be->bytenr >= start + len) 967 break; 968 if (be->bytenr + be->len > start + len) { 969 btrfs_err(fs_info, 970 "block entry overlaps a block group [%llu,%llu]!", 971 start, len); 972 dump_block_entry(fs_info, be); 973 } 974 rb_erase(&be->node, &fs_info->block_tree); 975 free_block_entry(be); 976 } 977 spin_unlock(&fs_info->ref_verify_lock); 978 } 979 980 /* Walk down all roots and build the ref tree, meant to be called at mount */ 981 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 982 { 983 struct btrfs_root *extent_root; 984 struct btrfs_path *path; 985 struct extent_buffer *eb; 986 int tree_block_level = 0; 987 u64 bytenr = 0, num_bytes = 0; 988 int ret, level; 989 990 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 991 return 0; 992 993 path = btrfs_alloc_path(); 994 if (!path) 995 return -ENOMEM; 996 997 extent_root = btrfs_extent_root(fs_info, 0); 998 eb = btrfs_read_lock_root_node(extent_root); 999 level = btrfs_header_level(eb); 1000 path->nodes[level] = eb; 1001 path->slots[level] = 0; 1002 path->locks[level] = BTRFS_READ_LOCK; 1003 1004 while (1) { 1005 /* 1006 * We have to keep track of the bytenr/num_bytes we last hit 1007 * because we could have run out of space for an inline ref, and 1008 * would have had to added a ref key item which may appear on a 1009 * different leaf from the original extent item. 1010 */ 1011 ret = walk_down_tree(extent_root, path, level, 1012 &bytenr, &num_bytes, &tree_block_level); 1013 if (ret) 1014 break; 1015 ret = walk_up_tree(path, &level); 1016 if (ret < 0) 1017 break; 1018 if (ret > 0) { 1019 ret = 0; 1020 break; 1021 } 1022 } 1023 if (ret) { 1024 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1025 btrfs_free_ref_cache(fs_info); 1026 } 1027 btrfs_free_path(path); 1028 return ret; 1029 } 1030