1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2014 Facebook. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/stacktrace.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "delayed-ref.h" 13 #include "ref-verify.h" 14 #include "fs.h" 15 #include "accessors.h" 16 17 /* 18 * Used to keep track the roots and number of refs each root has for a given 19 * bytenr. This just tracks the number of direct references, no shared 20 * references. 21 */ 22 struct root_entry { 23 u64 root_objectid; 24 u64 num_refs; 25 struct rb_node node; 26 }; 27 28 /* 29 * These are meant to represent what should exist in the extent tree, these can 30 * be used to verify the extent tree is consistent as these should all match 31 * what the extent tree says. 32 */ 33 struct ref_entry { 34 u64 root_objectid; 35 u64 parent; 36 u64 owner; 37 u64 offset; 38 u64 num_refs; 39 struct rb_node node; 40 }; 41 42 #define MAX_TRACE 16 43 44 /* 45 * Whenever we add/remove a reference we record the action. The action maps 46 * back to the delayed ref action. We hold the ref we are changing in the 47 * action so we can account for the history properly, and we record the root we 48 * were called with since it could be different from ref_root. We also store 49 * stack traces because that's how I roll. 50 */ 51 struct ref_action { 52 int action; 53 u64 root; 54 struct ref_entry ref; 55 struct list_head list; 56 unsigned long trace[MAX_TRACE]; 57 unsigned int trace_len; 58 }; 59 60 /* 61 * One of these for every block we reference, it holds the roots and references 62 * to it as well as all of the ref actions that have occurred to it. We never 63 * free it until we unmount the file system in order to make sure re-allocations 64 * are happening properly. 65 */ 66 struct block_entry { 67 u64 bytenr; 68 u64 len; 69 u64 num_refs; 70 int metadata; 71 int from_disk; 72 struct rb_root roots; 73 struct rb_root refs; 74 struct rb_node node; 75 struct list_head actions; 76 }; 77 78 static int block_entry_bytenr_key_cmp(const void *key, const struct rb_node *node) 79 { 80 const u64 *bytenr = key; 81 const struct block_entry *entry = rb_entry(node, struct block_entry, node); 82 83 if (entry->bytenr < *bytenr) 84 return 1; 85 else if (entry->bytenr > *bytenr) 86 return -1; 87 88 return 0; 89 } 90 91 static int block_entry_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) 92 { 93 const struct block_entry *new_entry = rb_entry(new, struct block_entry, node); 94 95 return block_entry_bytenr_key_cmp(&new_entry->bytenr, existing); 96 } 97 98 static struct block_entry *insert_block_entry(struct rb_root *root, 99 struct block_entry *be) 100 { 101 struct rb_node *node; 102 103 node = rb_find_add(&be->node, root, block_entry_bytenr_cmp); 104 return rb_entry_safe(node, struct block_entry, node); 105 } 106 107 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 108 { 109 struct rb_node *node; 110 111 node = rb_find(&bytenr, root, block_entry_bytenr_key_cmp); 112 return rb_entry_safe(node, struct block_entry, node); 113 } 114 115 static int root_entry_root_objectid_key_cmp(const void *key, const struct rb_node *node) 116 { 117 const u64 *objectid = key; 118 const struct root_entry *entry = rb_entry(node, struct root_entry, node); 119 120 if (entry->root_objectid < *objectid) 121 return 1; 122 else if (entry->root_objectid > *objectid) 123 return -1; 124 125 return 0; 126 } 127 128 static int root_entry_root_objectid_cmp(struct rb_node *new, const struct rb_node *existing) 129 { 130 const struct root_entry *new_entry = rb_entry(new, struct root_entry, node); 131 132 return root_entry_root_objectid_key_cmp(&new_entry->root_objectid, existing); 133 } 134 135 static struct root_entry *insert_root_entry(struct rb_root *root, 136 struct root_entry *re) 137 { 138 struct rb_node *node; 139 140 node = rb_find_add(&re->node, root, root_entry_root_objectid_cmp); 141 return rb_entry_safe(node, struct root_entry, node); 142 } 143 144 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 145 { 146 if (ref1->root_objectid < ref2->root_objectid) 147 return -1; 148 if (ref1->root_objectid > ref2->root_objectid) 149 return 1; 150 if (ref1->parent < ref2->parent) 151 return -1; 152 if (ref1->parent > ref2->parent) 153 return 1; 154 if (ref1->owner < ref2->owner) 155 return -1; 156 if (ref1->owner > ref2->owner) 157 return 1; 158 if (ref1->offset < ref2->offset) 159 return -1; 160 if (ref1->offset > ref2->offset) 161 return 1; 162 return 0; 163 } 164 165 static int ref_entry_cmp(struct rb_node *new, const struct rb_node *existing) 166 { 167 struct ref_entry *new_entry = rb_entry(new, struct ref_entry, node); 168 struct ref_entry *existing_entry = rb_entry(existing, struct ref_entry, node); 169 170 return comp_refs(new_entry, existing_entry); 171 } 172 173 static struct ref_entry *insert_ref_entry(struct rb_root *root, 174 struct ref_entry *ref) 175 { 176 struct rb_node *node; 177 178 node = rb_find_add(&ref->node, root, ref_entry_cmp); 179 return rb_entry_safe(node, struct ref_entry, node); 180 } 181 182 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 183 { 184 struct rb_node *node; 185 186 node = rb_find(&objectid, root, root_entry_root_objectid_key_cmp); 187 return rb_entry_safe(node, struct root_entry, node); 188 } 189 190 #ifdef CONFIG_STACKTRACE 191 static void __save_stack_trace(struct ref_action *ra) 192 { 193 ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2); 194 } 195 196 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 197 struct ref_action *ra) 198 { 199 if (ra->trace_len == 0) { 200 btrfs_err(fs_info, " ref-verify: no stacktrace"); 201 return; 202 } 203 stack_trace_print(ra->trace, ra->trace_len, 2); 204 } 205 #else 206 static inline void __save_stack_trace(struct ref_action *ra) 207 { 208 } 209 210 static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, 211 struct ref_action *ra) 212 { 213 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 214 } 215 #endif 216 217 static void free_block_entry(struct block_entry *be) 218 { 219 struct root_entry *re; 220 struct ref_entry *ref; 221 struct ref_action *ra; 222 struct rb_node *n; 223 224 while ((n = rb_first(&be->roots))) { 225 re = rb_entry(n, struct root_entry, node); 226 rb_erase(&re->node, &be->roots); 227 kfree(re); 228 } 229 230 while((n = rb_first(&be->refs))) { 231 ref = rb_entry(n, struct ref_entry, node); 232 rb_erase(&ref->node, &be->refs); 233 kfree(ref); 234 } 235 236 while (!list_empty(&be->actions)) { 237 ra = list_first_entry(&be->actions, struct ref_action, 238 list); 239 list_del(&ra->list); 240 kfree(ra); 241 } 242 kfree(be); 243 } 244 245 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 246 u64 bytenr, u64 len, 247 u64 root_objectid) 248 { 249 struct block_entry *be = NULL, *exist; 250 struct root_entry *re = NULL; 251 252 re = kzalloc(sizeof(struct root_entry), GFP_NOFS); 253 be = kzalloc(sizeof(struct block_entry), GFP_NOFS); 254 if (!be || !re) { 255 kfree(re); 256 kfree(be); 257 return ERR_PTR(-ENOMEM); 258 } 259 be->bytenr = bytenr; 260 be->len = len; 261 262 re->root_objectid = root_objectid; 263 re->num_refs = 0; 264 265 spin_lock(&fs_info->ref_verify_lock); 266 exist = insert_block_entry(&fs_info->block_tree, be); 267 if (exist) { 268 if (root_objectid) { 269 struct root_entry *exist_re; 270 271 exist_re = insert_root_entry(&exist->roots, re); 272 if (exist_re) 273 kfree(re); 274 } else { 275 kfree(re); 276 } 277 kfree(be); 278 return exist; 279 } 280 281 be->num_refs = 0; 282 be->metadata = 0; 283 be->from_disk = 0; 284 be->roots = RB_ROOT; 285 be->refs = RB_ROOT; 286 INIT_LIST_HEAD(&be->actions); 287 if (root_objectid) 288 insert_root_entry(&be->roots, re); 289 else 290 kfree(re); 291 return be; 292 } 293 294 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 295 u64 parent, u64 bytenr, int level) 296 { 297 struct block_entry *be; 298 struct root_entry *re; 299 struct ref_entry *ref = NULL, *exist; 300 301 ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS); 302 if (!ref) 303 return -ENOMEM; 304 305 if (parent) 306 ref->root_objectid = 0; 307 else 308 ref->root_objectid = ref_root; 309 ref->parent = parent; 310 ref->owner = level; 311 ref->offset = 0; 312 ref->num_refs = 1; 313 314 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 315 if (IS_ERR(be)) { 316 kfree(ref); 317 return PTR_ERR(be); 318 } 319 be->num_refs++; 320 be->from_disk = 1; 321 be->metadata = 1; 322 323 if (!parent) { 324 ASSERT(ref_root); 325 re = lookup_root_entry(&be->roots, ref_root); 326 ASSERT(re); 327 re->num_refs++; 328 } 329 exist = insert_ref_entry(&be->refs, ref); 330 if (exist) { 331 exist->num_refs++; 332 kfree(ref); 333 } 334 spin_unlock(&fs_info->ref_verify_lock); 335 336 return 0; 337 } 338 339 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 340 u64 parent, u32 num_refs, u64 bytenr, 341 u64 num_bytes) 342 { 343 struct block_entry *be; 344 struct ref_entry *ref; 345 346 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 347 if (!ref) 348 return -ENOMEM; 349 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 350 if (IS_ERR(be)) { 351 kfree(ref); 352 return PTR_ERR(be); 353 } 354 be->num_refs += num_refs; 355 356 ref->parent = parent; 357 ref->num_refs = num_refs; 358 if (insert_ref_entry(&be->refs, ref)) { 359 spin_unlock(&fs_info->ref_verify_lock); 360 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 361 kfree(ref); 362 return -EINVAL; 363 } 364 spin_unlock(&fs_info->ref_verify_lock); 365 return 0; 366 } 367 368 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 369 struct extent_buffer *leaf, 370 struct btrfs_extent_data_ref *dref, 371 u64 bytenr, u64 num_bytes) 372 { 373 struct block_entry *be; 374 struct ref_entry *ref; 375 struct root_entry *re; 376 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 377 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 378 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 379 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 380 381 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 382 if (!ref) 383 return -ENOMEM; 384 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 385 if (IS_ERR(be)) { 386 kfree(ref); 387 return PTR_ERR(be); 388 } 389 be->num_refs += num_refs; 390 391 ref->parent = 0; 392 ref->owner = owner; 393 ref->root_objectid = ref_root; 394 ref->offset = offset; 395 ref->num_refs = num_refs; 396 if (insert_ref_entry(&be->refs, ref)) { 397 spin_unlock(&fs_info->ref_verify_lock); 398 btrfs_err(fs_info, "existing ref when reading from disk?"); 399 kfree(ref); 400 return -EINVAL; 401 } 402 403 re = lookup_root_entry(&be->roots, ref_root); 404 if (!re) { 405 spin_unlock(&fs_info->ref_verify_lock); 406 btrfs_err(fs_info, "missing root in new block entry?"); 407 return -EINVAL; 408 } 409 re->num_refs += num_refs; 410 spin_unlock(&fs_info->ref_verify_lock); 411 return 0; 412 } 413 414 static int process_extent_item(struct btrfs_fs_info *fs_info, 415 struct btrfs_path *path, struct btrfs_key *key, 416 int slot, int *tree_block_level) 417 { 418 struct btrfs_extent_item *ei; 419 struct btrfs_extent_inline_ref *iref; 420 struct btrfs_extent_data_ref *dref; 421 struct btrfs_shared_data_ref *sref; 422 struct extent_buffer *leaf = path->nodes[0]; 423 u32 item_size = btrfs_item_size(leaf, slot); 424 unsigned long end, ptr; 425 u64 offset, flags, count; 426 int type; 427 int ret = 0; 428 429 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 430 flags = btrfs_extent_flags(leaf, ei); 431 432 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 433 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 434 struct btrfs_tree_block_info *info; 435 436 info = (struct btrfs_tree_block_info *)(ei + 1); 437 *tree_block_level = btrfs_tree_block_level(leaf, info); 438 iref = (struct btrfs_extent_inline_ref *)(info + 1); 439 } else { 440 if (key->type == BTRFS_METADATA_ITEM_KEY) 441 *tree_block_level = key->offset; 442 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 443 } 444 445 ptr = (unsigned long)iref; 446 end = (unsigned long)ei + item_size; 447 while (ptr < end) { 448 iref = (struct btrfs_extent_inline_ref *)ptr; 449 type = btrfs_extent_inline_ref_type(leaf, iref); 450 offset = btrfs_extent_inline_ref_offset(leaf, iref); 451 switch (type) { 452 case BTRFS_TREE_BLOCK_REF_KEY: 453 ret = add_tree_block(fs_info, offset, 0, key->objectid, 454 *tree_block_level); 455 break; 456 case BTRFS_SHARED_BLOCK_REF_KEY: 457 ret = add_tree_block(fs_info, 0, offset, key->objectid, 458 *tree_block_level); 459 break; 460 case BTRFS_EXTENT_DATA_REF_KEY: 461 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 462 ret = add_extent_data_ref(fs_info, leaf, dref, 463 key->objectid, key->offset); 464 break; 465 case BTRFS_SHARED_DATA_REF_KEY: 466 sref = (struct btrfs_shared_data_ref *)(iref + 1); 467 count = btrfs_shared_data_ref_count(leaf, sref); 468 ret = add_shared_data_ref(fs_info, offset, count, 469 key->objectid, key->offset); 470 break; 471 case BTRFS_EXTENT_OWNER_REF_KEY: 472 if (!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) { 473 btrfs_err(fs_info, 474 "found extent owner ref without simple quotas enabled"); 475 ret = -EINVAL; 476 } 477 break; 478 default: 479 btrfs_err(fs_info, "invalid key type in iref"); 480 ret = -EINVAL; 481 break; 482 } 483 if (ret) 484 break; 485 ptr += btrfs_extent_inline_ref_size(type); 486 } 487 return ret; 488 } 489 490 static int process_leaf(struct btrfs_root *root, 491 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes, 492 int *tree_block_level) 493 { 494 struct btrfs_fs_info *fs_info = root->fs_info; 495 struct extent_buffer *leaf = path->nodes[0]; 496 struct btrfs_extent_data_ref *dref; 497 struct btrfs_shared_data_ref *sref; 498 u32 count; 499 int i = 0, ret = 0; 500 struct btrfs_key key; 501 int nritems = btrfs_header_nritems(leaf); 502 503 for (i = 0; i < nritems; i++) { 504 btrfs_item_key_to_cpu(leaf, &key, i); 505 switch (key.type) { 506 case BTRFS_EXTENT_ITEM_KEY: 507 *num_bytes = key.offset; 508 fallthrough; 509 case BTRFS_METADATA_ITEM_KEY: 510 *bytenr = key.objectid; 511 ret = process_extent_item(fs_info, path, &key, i, 512 tree_block_level); 513 break; 514 case BTRFS_TREE_BLOCK_REF_KEY: 515 ret = add_tree_block(fs_info, key.offset, 0, 516 key.objectid, *tree_block_level); 517 break; 518 case BTRFS_SHARED_BLOCK_REF_KEY: 519 ret = add_tree_block(fs_info, 0, key.offset, 520 key.objectid, *tree_block_level); 521 break; 522 case BTRFS_EXTENT_DATA_REF_KEY: 523 dref = btrfs_item_ptr(leaf, i, 524 struct btrfs_extent_data_ref); 525 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 526 *num_bytes); 527 break; 528 case BTRFS_SHARED_DATA_REF_KEY: 529 sref = btrfs_item_ptr(leaf, i, 530 struct btrfs_shared_data_ref); 531 count = btrfs_shared_data_ref_count(leaf, sref); 532 ret = add_shared_data_ref(fs_info, key.offset, count, 533 *bytenr, *num_bytes); 534 break; 535 default: 536 break; 537 } 538 if (ret) 539 break; 540 } 541 return ret; 542 } 543 544 /* Walk down to the leaf from the given level */ 545 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 546 int level, u64 *bytenr, u64 *num_bytes, 547 int *tree_block_level) 548 { 549 struct extent_buffer *eb; 550 int ret = 0; 551 552 while (level >= 0) { 553 if (level) { 554 eb = btrfs_read_node_slot(path->nodes[level], 555 path->slots[level]); 556 if (IS_ERR(eb)) 557 return PTR_ERR(eb); 558 btrfs_tree_read_lock(eb); 559 path->nodes[level-1] = eb; 560 path->slots[level-1] = 0; 561 path->locks[level-1] = BTRFS_READ_LOCK; 562 } else { 563 ret = process_leaf(root, path, bytenr, num_bytes, 564 tree_block_level); 565 if (ret) 566 break; 567 } 568 level--; 569 } 570 return ret; 571 } 572 573 /* Walk up to the next node that needs to be processed */ 574 static int walk_up_tree(struct btrfs_path *path, int *level) 575 { 576 int l; 577 578 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 579 if (!path->nodes[l]) 580 continue; 581 if (l) { 582 path->slots[l]++; 583 if (path->slots[l] < 584 btrfs_header_nritems(path->nodes[l])) { 585 *level = l; 586 return 0; 587 } 588 } 589 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 590 free_extent_buffer(path->nodes[l]); 591 path->nodes[l] = NULL; 592 path->slots[l] = 0; 593 path->locks[l] = 0; 594 } 595 596 return 1; 597 } 598 599 static void dump_ref_action(struct btrfs_fs_info *fs_info, 600 struct ref_action *ra) 601 { 602 btrfs_err(fs_info, 603 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 604 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 605 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 606 __print_stack_trace(fs_info, ra); 607 } 608 609 /* 610 * Dumps all the information from the block entry to printk, it's going to be 611 * awesome. 612 */ 613 static void dump_block_entry(struct btrfs_fs_info *fs_info, 614 struct block_entry *be) 615 { 616 struct ref_entry *ref; 617 struct root_entry *re; 618 struct ref_action *ra; 619 struct rb_node *n; 620 621 btrfs_err(fs_info, 622 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 623 be->bytenr, be->len, be->num_refs, be->metadata, 624 be->from_disk); 625 626 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 627 ref = rb_entry(n, struct ref_entry, node); 628 btrfs_err(fs_info, 629 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 630 ref->root_objectid, ref->parent, ref->owner, 631 ref->offset, ref->num_refs); 632 } 633 634 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 635 re = rb_entry(n, struct root_entry, node); 636 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 637 re->root_objectid, re->num_refs); 638 } 639 640 list_for_each_entry(ra, &be->actions, list) 641 dump_ref_action(fs_info, ra); 642 } 643 644 /* 645 * Called when we modify a ref for a bytenr. 646 * 647 * This will add an action item to the given bytenr and do sanity checks to make 648 * sure we haven't messed something up. If we are making a new allocation and 649 * this block entry has history we will delete all previous actions as long as 650 * our sanity checks pass as they are no longer needed. 651 */ 652 int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, 653 const struct btrfs_ref *generic_ref) 654 { 655 struct ref_entry *ref = NULL, *exist; 656 struct ref_action *ra = NULL; 657 struct block_entry *be = NULL; 658 struct root_entry *re = NULL; 659 int action = generic_ref->action; 660 int ret = 0; 661 bool metadata; 662 u64 bytenr = generic_ref->bytenr; 663 u64 num_bytes = generic_ref->num_bytes; 664 u64 parent = generic_ref->parent; 665 u64 ref_root = 0; 666 u64 owner = 0; 667 u64 offset = 0; 668 669 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 670 return 0; 671 672 if (generic_ref->type == BTRFS_REF_METADATA) { 673 if (!parent) 674 ref_root = generic_ref->ref_root; 675 owner = generic_ref->tree_ref.level; 676 } else if (!parent) { 677 ref_root = generic_ref->ref_root; 678 owner = generic_ref->data_ref.objectid; 679 offset = generic_ref->data_ref.offset; 680 } 681 metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 682 683 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 684 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 685 if (!ra || !ref) { 686 kfree(ref); 687 kfree(ra); 688 ret = -ENOMEM; 689 goto out; 690 } 691 692 ref->parent = parent; 693 ref->owner = owner; 694 ref->root_objectid = ref_root; 695 ref->offset = offset; 696 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 697 698 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 699 /* 700 * Save the extra info from the delayed ref in the ref action to make it 701 * easier to figure out what is happening. The real ref's we add to the 702 * ref tree need to reflect what we save on disk so it matches any 703 * on-disk refs we pre-loaded. 704 */ 705 ra->ref.owner = owner; 706 ra->ref.offset = offset; 707 ra->ref.root_objectid = ref_root; 708 __save_stack_trace(ra); 709 710 INIT_LIST_HEAD(&ra->list); 711 ra->action = action; 712 ra->root = generic_ref->real_root; 713 714 /* 715 * This is an allocation, preallocate the block_entry in case we haven't 716 * used it before. 717 */ 718 ret = -EINVAL; 719 if (action == BTRFS_ADD_DELAYED_EXTENT) { 720 /* 721 * For subvol_create we'll just pass in whatever the parent root 722 * is and the new root objectid, so let's not treat the passed 723 * in root as if it really has a ref for this bytenr. 724 */ 725 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 726 if (IS_ERR(be)) { 727 kfree(ref); 728 kfree(ra); 729 ret = PTR_ERR(be); 730 goto out; 731 } 732 be->num_refs++; 733 if (metadata) 734 be->metadata = 1; 735 736 if (be->num_refs != 1) { 737 btrfs_err(fs_info, 738 "re-allocated a block that still has references to it!"); 739 dump_block_entry(fs_info, be); 740 dump_ref_action(fs_info, ra); 741 kfree(ref); 742 kfree(ra); 743 goto out_unlock; 744 } 745 746 while (!list_empty(&be->actions)) { 747 struct ref_action *tmp; 748 749 tmp = list_first_entry(&be->actions, struct ref_action, 750 list); 751 list_del(&tmp->list); 752 kfree(tmp); 753 } 754 } else { 755 struct root_entry *tmp; 756 757 if (!parent) { 758 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 759 if (!re) { 760 kfree(ref); 761 kfree(ra); 762 ret = -ENOMEM; 763 goto out; 764 } 765 /* 766 * This is the root that is modifying us, so it's the 767 * one we want to lookup below when we modify the 768 * re->num_refs. 769 */ 770 ref_root = generic_ref->real_root; 771 re->root_objectid = generic_ref->real_root; 772 re->num_refs = 0; 773 } 774 775 spin_lock(&fs_info->ref_verify_lock); 776 be = lookup_block_entry(&fs_info->block_tree, bytenr); 777 if (!be) { 778 btrfs_err(fs_info, 779 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 780 action, bytenr, num_bytes); 781 dump_ref_action(fs_info, ra); 782 kfree(ref); 783 kfree(ra); 784 kfree(re); 785 goto out_unlock; 786 } else if (be->num_refs == 0) { 787 btrfs_err(fs_info, 788 "trying to do action %d for a bytenr that has 0 total references", 789 action); 790 dump_block_entry(fs_info, be); 791 dump_ref_action(fs_info, ra); 792 kfree(ref); 793 kfree(ra); 794 kfree(re); 795 goto out_unlock; 796 } 797 798 if (!parent) { 799 tmp = insert_root_entry(&be->roots, re); 800 if (tmp) { 801 kfree(re); 802 re = tmp; 803 } 804 } 805 } 806 807 exist = insert_ref_entry(&be->refs, ref); 808 if (exist) { 809 if (action == BTRFS_DROP_DELAYED_REF) { 810 if (exist->num_refs == 0) { 811 btrfs_err(fs_info, 812 "dropping a ref for a existing root that doesn't have a ref on the block"); 813 dump_block_entry(fs_info, be); 814 dump_ref_action(fs_info, ra); 815 kfree(ref); 816 kfree(ra); 817 goto out_unlock; 818 } 819 exist->num_refs--; 820 if (exist->num_refs == 0) { 821 rb_erase(&exist->node, &be->refs); 822 kfree(exist); 823 } 824 } else if (!be->metadata) { 825 exist->num_refs++; 826 } else { 827 btrfs_err(fs_info, 828 "attempting to add another ref for an existing ref on a tree block"); 829 dump_block_entry(fs_info, be); 830 dump_ref_action(fs_info, ra); 831 kfree(ref); 832 kfree(ra); 833 goto out_unlock; 834 } 835 kfree(ref); 836 } else { 837 if (action == BTRFS_DROP_DELAYED_REF) { 838 btrfs_err(fs_info, 839 "dropping a ref for a root that doesn't have a ref on the block"); 840 dump_block_entry(fs_info, be); 841 dump_ref_action(fs_info, ra); 842 rb_erase(&ref->node, &be->refs); 843 kfree(ref); 844 kfree(ra); 845 goto out_unlock; 846 } 847 } 848 849 if (!parent && !re) { 850 re = lookup_root_entry(&be->roots, ref_root); 851 if (!re) { 852 /* 853 * This shouldn't happen because we will add our re 854 * above when we lookup the be with !parent, but just in 855 * case catch this case so we don't panic because I 856 * didn't think of some other corner case. 857 */ 858 btrfs_err(fs_info, "failed to find root %llu for %llu", 859 generic_ref->real_root, be->bytenr); 860 dump_block_entry(fs_info, be); 861 dump_ref_action(fs_info, ra); 862 kfree(ra); 863 goto out_unlock; 864 } 865 } 866 if (action == BTRFS_DROP_DELAYED_REF) { 867 if (re) 868 re->num_refs--; 869 be->num_refs--; 870 } else if (action == BTRFS_ADD_DELAYED_REF) { 871 be->num_refs++; 872 if (re) 873 re->num_refs++; 874 } 875 list_add_tail(&ra->list, &be->actions); 876 ret = 0; 877 out_unlock: 878 spin_unlock(&fs_info->ref_verify_lock); 879 out: 880 if (ret) { 881 btrfs_free_ref_cache(fs_info); 882 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 883 } 884 return ret; 885 } 886 887 /* Free up the ref cache */ 888 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 889 { 890 struct block_entry *be; 891 struct rb_node *n; 892 893 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 894 return; 895 896 spin_lock(&fs_info->ref_verify_lock); 897 while ((n = rb_first(&fs_info->block_tree))) { 898 be = rb_entry(n, struct block_entry, node); 899 rb_erase(&be->node, &fs_info->block_tree); 900 free_block_entry(be); 901 cond_resched_lock(&fs_info->ref_verify_lock); 902 } 903 spin_unlock(&fs_info->ref_verify_lock); 904 } 905 906 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 907 u64 len) 908 { 909 struct block_entry *be = NULL, *entry; 910 struct rb_node *n; 911 912 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 913 return; 914 915 spin_lock(&fs_info->ref_verify_lock); 916 n = fs_info->block_tree.rb_node; 917 while (n) { 918 entry = rb_entry(n, struct block_entry, node); 919 if (entry->bytenr < start) { 920 n = n->rb_right; 921 } else if (entry->bytenr > start) { 922 n = n->rb_left; 923 } else { 924 be = entry; 925 break; 926 } 927 /* We want to get as close to start as possible */ 928 if (be == NULL || 929 (entry->bytenr < start && be->bytenr > start) || 930 (entry->bytenr < start && entry->bytenr > be->bytenr)) 931 be = entry; 932 } 933 934 /* 935 * Could have an empty block group, maybe have something to check for 936 * this case to verify we were actually empty? 937 */ 938 if (!be) { 939 spin_unlock(&fs_info->ref_verify_lock); 940 return; 941 } 942 943 n = &be->node; 944 while (n) { 945 be = rb_entry(n, struct block_entry, node); 946 n = rb_next(n); 947 if (be->bytenr < start && be->bytenr + be->len > start) { 948 btrfs_err(fs_info, 949 "block entry overlaps a block group [%llu,%llu]!", 950 start, len); 951 dump_block_entry(fs_info, be); 952 continue; 953 } 954 if (be->bytenr < start) 955 continue; 956 if (be->bytenr >= start + len) 957 break; 958 if (be->bytenr + be->len > start + len) { 959 btrfs_err(fs_info, 960 "block entry overlaps a block group [%llu,%llu]!", 961 start, len); 962 dump_block_entry(fs_info, be); 963 } 964 rb_erase(&be->node, &fs_info->block_tree); 965 free_block_entry(be); 966 } 967 spin_unlock(&fs_info->ref_verify_lock); 968 } 969 970 /* Walk down all roots and build the ref tree, meant to be called at mount */ 971 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 972 { 973 struct btrfs_root *extent_root; 974 struct btrfs_path *path; 975 struct extent_buffer *eb; 976 int tree_block_level = 0; 977 u64 bytenr = 0, num_bytes = 0; 978 int ret, level; 979 980 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 981 return 0; 982 983 path = btrfs_alloc_path(); 984 if (!path) 985 return -ENOMEM; 986 987 extent_root = btrfs_extent_root(fs_info, 0); 988 eb = btrfs_read_lock_root_node(extent_root); 989 level = btrfs_header_level(eb); 990 path->nodes[level] = eb; 991 path->slots[level] = 0; 992 path->locks[level] = BTRFS_READ_LOCK; 993 994 while (1) { 995 /* 996 * We have to keep track of the bytenr/num_bytes we last hit 997 * because we could have run out of space for an inline ref, and 998 * would have had to added a ref key item which may appear on a 999 * different leaf from the original extent item. 1000 */ 1001 ret = walk_down_tree(extent_root, path, level, 1002 &bytenr, &num_bytes, &tree_block_level); 1003 if (ret) 1004 break; 1005 ret = walk_up_tree(path, &level); 1006 if (ret < 0) 1007 break; 1008 if (ret > 0) { 1009 ret = 0; 1010 break; 1011 } 1012 } 1013 if (ret) { 1014 btrfs_free_ref_cache(fs_info); 1015 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1016 } 1017 btrfs_free_path(path); 1018 return ret; 1019 } 1020