1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Assorted bcachefs debug code 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include "bcachefs.h" 10 #include "bkey_methods.h" 11 #include "btree_cache.h" 12 #include "btree_io.h" 13 #include "btree_iter.h" 14 #include "btree_locking.h" 15 #include "btree_update.h" 16 #include "btree_update_interior.h" 17 #include "buckets.h" 18 #include "debug.h" 19 #include "error.h" 20 #include "extents.h" 21 #include "fsck.h" 22 #include "inode.h" 23 #include "super.h" 24 25 #include <linux/console.h> 26 #include <linux/debugfs.h> 27 #include <linux/module.h> 28 #include <linux/random.h> 29 #include <linux/seq_file.h> 30 31 static struct dentry *bch_debug; 32 33 static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, 34 struct extent_ptr_decoded pick) 35 { 36 struct btree *v = c->verify_data; 37 struct btree_node *n_ondisk = c->verify_ondisk; 38 struct btree_node *n_sorted = c->verify_data->data; 39 struct bset *sorted, *inmemory = &b->data->keys; 40 struct bio *bio; 41 bool failed = false, saw_error = false; 42 43 struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); 44 if (!ca) 45 return false; 46 47 bio = bio_alloc_bioset(ca->disk_sb.bdev, 48 buf_pages(n_sorted, btree_buf_bytes(b)), 49 REQ_OP_READ|REQ_META, 50 GFP_NOFS, 51 &c->btree_bio); 52 bio->bi_iter.bi_sector = pick.ptr.offset; 53 bch2_bio_map(bio, n_sorted, btree_buf_bytes(b)); 54 55 submit_bio_wait(bio); 56 57 bio_put(bio); 58 percpu_ref_put(&ca->io_ref); 59 60 memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); 61 62 v->written = 0; 63 if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) 64 return false; 65 66 n_sorted = c->verify_data->data; 67 sorted = &n_sorted->keys; 68 69 if (inmemory->u64s != sorted->u64s || 70 memcmp(inmemory->start, 71 sorted->start, 72 vstruct_end(inmemory) - (void *) inmemory->start)) { 73 unsigned offset = 0, sectors; 74 struct bset *i; 75 unsigned j; 76 77 console_lock(); 78 79 printk(KERN_ERR "*** in memory:\n"); 80 bch2_dump_bset(c, b, inmemory, 0); 81 82 printk(KERN_ERR "*** read back in:\n"); 83 bch2_dump_bset(c, v, sorted, 0); 84 85 while (offset < v->written) { 86 if (!offset) { 87 i = &n_ondisk->keys; 88 sectors = vstruct_blocks(n_ondisk, c->block_bits) << 89 c->block_bits; 90 } else { 91 struct btree_node_entry *bne = 92 (void *) n_ondisk + (offset << 9); 93 i = &bne->keys; 94 95 sectors = vstruct_blocks(bne, c->block_bits) << 96 c->block_bits; 97 } 98 99 printk(KERN_ERR "*** on disk block %u:\n", offset); 100 bch2_dump_bset(c, b, i, offset); 101 102 offset += sectors; 103 } 104 105 for (j = 0; j < le16_to_cpu(inmemory->u64s); j++) 106 if (inmemory->_data[j] != sorted->_data[j]) 107 break; 108 109 console_unlock(); 110 bch_err(c, "verify failed at key %u", j); 111 112 failed = true; 113 } 114 115 if (v->written != b->written) { 116 bch_err(c, "written wrong: expected %u, got %u", 117 b->written, v->written); 118 failed = true; 119 } 120 121 return failed; 122 } 123 124 void __bch2_btree_verify(struct bch_fs *c, struct btree *b) 125 { 126 struct bkey_ptrs_c ptrs; 127 struct extent_ptr_decoded p; 128 const union bch_extent_entry *entry; 129 struct btree *v; 130 struct bset *inmemory = &b->data->keys; 131 struct bkey_packed *k; 132 bool failed = false; 133 134 if (c->opts.nochanges) 135 return; 136 137 bch2_btree_node_io_lock(b); 138 mutex_lock(&c->verify_lock); 139 140 if (!c->verify_ondisk) { 141 c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); 142 if (!c->verify_ondisk) 143 goto out; 144 } 145 146 if (!c->verify_data) { 147 c->verify_data = __bch2_btree_node_mem_alloc(c); 148 if (!c->verify_data) 149 goto out; 150 151 list_del_init(&c->verify_data->list); 152 } 153 154 BUG_ON(b->nsets != 1); 155 156 for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k)) 157 if (k->type == KEY_TYPE_btree_ptr_v2) 158 ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0; 159 160 v = c->verify_data; 161 bkey_copy(&v->key, &b->key); 162 v->c.level = b->c.level; 163 v->c.btree_id = b->c.btree_id; 164 bch2_btree_keys_init(v); 165 166 ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)); 167 bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry) 168 failed |= bch2_btree_verify_replica(c, b, p); 169 170 if (failed) { 171 struct printbuf buf = PRINTBUF; 172 173 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); 174 bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf); 175 printbuf_exit(&buf); 176 } 177 out: 178 mutex_unlock(&c->verify_lock); 179 bch2_btree_node_io_unlock(b); 180 } 181 182 void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c, 183 const struct btree *b) 184 { 185 struct btree_node *n_ondisk = NULL; 186 struct extent_ptr_decoded pick; 187 struct bch_dev *ca; 188 struct bio *bio = NULL; 189 unsigned offset = 0; 190 int ret; 191 192 if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick) <= 0) { 193 prt_printf(out, "error getting device to read from: invalid device\n"); 194 return; 195 } 196 197 ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); 198 if (!ca) { 199 prt_printf(out, "error getting device to read from: not online\n"); 200 return; 201 } 202 203 n_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); 204 if (!n_ondisk) { 205 prt_printf(out, "memory allocation failure\n"); 206 goto out; 207 } 208 209 bio = bio_alloc_bioset(ca->disk_sb.bdev, 210 buf_pages(n_ondisk, btree_buf_bytes(b)), 211 REQ_OP_READ|REQ_META, 212 GFP_NOFS, 213 &c->btree_bio); 214 bio->bi_iter.bi_sector = pick.ptr.offset; 215 bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b)); 216 217 ret = submit_bio_wait(bio); 218 if (ret) { 219 prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret)); 220 goto out; 221 } 222 223 while (offset < btree_sectors(c)) { 224 struct bset *i; 225 struct nonce nonce; 226 struct bch_csum csum; 227 struct bkey_packed *k; 228 unsigned sectors; 229 230 if (!offset) { 231 i = &n_ondisk->keys; 232 233 if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { 234 prt_printf(out, "unknown checksum type at offset %u: %llu\n", 235 offset, BSET_CSUM_TYPE(i)); 236 goto out; 237 } 238 239 nonce = btree_nonce(i, offset << 9); 240 csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk); 241 242 if (bch2_crc_cmp(csum, n_ondisk->csum)) { 243 prt_printf(out, "invalid checksum\n"); 244 goto out; 245 } 246 247 bset_encrypt(c, i, offset << 9); 248 249 sectors = vstruct_sectors(n_ondisk, c->block_bits); 250 } else { 251 struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9); 252 253 i = &bne->keys; 254 255 if (i->seq != n_ondisk->keys.seq) 256 break; 257 258 if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) { 259 prt_printf(out, "unknown checksum type at offset %u: %llu\n", 260 offset, BSET_CSUM_TYPE(i)); 261 goto out; 262 } 263 264 nonce = btree_nonce(i, offset << 9); 265 csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); 266 267 if (bch2_crc_cmp(csum, bne->csum)) { 268 prt_printf(out, "invalid checksum"); 269 goto out; 270 } 271 272 bset_encrypt(c, i, offset << 9); 273 274 sectors = vstruct_sectors(bne, c->block_bits); 275 } 276 277 prt_printf(out, " offset %u version %u, journal seq %llu\n", 278 offset, 279 le16_to_cpu(i->version), 280 le64_to_cpu(i->journal_seq)); 281 offset += sectors; 282 283 printbuf_indent_add(out, 4); 284 285 for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) { 286 struct bkey u; 287 288 bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u)); 289 prt_newline(out); 290 } 291 292 printbuf_indent_sub(out, 4); 293 } 294 out: 295 if (bio) 296 bio_put(bio); 297 kvfree(n_ondisk); 298 percpu_ref_put(&ca->io_ref); 299 } 300 301 #ifdef CONFIG_DEBUG_FS 302 303 /* XXX: bch_fs refcounting */ 304 305 struct dump_iter { 306 struct bch_fs *c; 307 enum btree_id id; 308 struct bpos from; 309 struct bpos prev_node; 310 u64 iter; 311 312 struct printbuf buf; 313 314 char __user *ubuf; /* destination user buffer */ 315 size_t size; /* size of requested read */ 316 ssize_t ret; /* bytes read so far */ 317 }; 318 319 static ssize_t flush_buf(struct dump_iter *i) 320 { 321 if (i->buf.pos) { 322 size_t bytes = min_t(size_t, i->buf.pos, i->size); 323 int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes); 324 325 i->ret += copied; 326 i->ubuf += copied; 327 i->size -= copied; 328 i->buf.pos -= copied; 329 memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos); 330 331 if (copied != bytes) 332 return -EFAULT; 333 } 334 335 return i->size ? 0 : i->ret; 336 } 337 338 static int bch2_dump_open(struct inode *inode, struct file *file) 339 { 340 struct btree_debug *bd = inode->i_private; 341 struct dump_iter *i; 342 343 i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); 344 if (!i) 345 return -ENOMEM; 346 347 file->private_data = i; 348 i->from = POS_MIN; 349 i->iter = 0; 350 i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); 351 i->id = bd->id; 352 i->buf = PRINTBUF; 353 354 return 0; 355 } 356 357 static int bch2_dump_release(struct inode *inode, struct file *file) 358 { 359 struct dump_iter *i = file->private_data; 360 361 printbuf_exit(&i->buf); 362 kfree(i); 363 return 0; 364 } 365 366 static ssize_t bch2_read_btree(struct file *file, char __user *buf, 367 size_t size, loff_t *ppos) 368 { 369 struct dump_iter *i = file->private_data; 370 371 i->ubuf = buf; 372 i->size = size; 373 i->ret = 0; 374 375 return flush_buf(i) ?: 376 bch2_trans_run(i->c, 377 for_each_btree_key(trans, iter, i->id, i->from, 378 BTREE_ITER_prefetch| 379 BTREE_ITER_all_snapshots, k, ({ 380 bch2_bkey_val_to_text(&i->buf, i->c, k); 381 prt_newline(&i->buf); 382 bch2_trans_unlock(trans); 383 i->from = bpos_successor(iter.pos); 384 flush_buf(i); 385 }))) ?: 386 i->ret; 387 } 388 389 static const struct file_operations btree_debug_ops = { 390 .owner = THIS_MODULE, 391 .open = bch2_dump_open, 392 .release = bch2_dump_release, 393 .read = bch2_read_btree, 394 }; 395 396 static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, 397 size_t size, loff_t *ppos) 398 { 399 struct dump_iter *i = file->private_data; 400 struct btree_trans *trans; 401 struct btree_iter iter; 402 struct btree *b; 403 ssize_t ret; 404 405 i->ubuf = buf; 406 i->size = size; 407 i->ret = 0; 408 409 ret = flush_buf(i); 410 if (ret) 411 return ret; 412 413 if (bpos_eq(SPOS_MAX, i->from)) 414 return i->ret; 415 416 trans = bch2_trans_get(i->c); 417 retry: 418 bch2_trans_begin(trans); 419 420 for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { 421 bch2_btree_node_to_text(&i->buf, i->c, b); 422 i->from = !bpos_eq(SPOS_MAX, b->key.k.p) 423 ? bpos_successor(b->key.k.p) 424 : b->key.k.p; 425 426 ret = drop_locks_do(trans, flush_buf(i)); 427 if (ret) 428 break; 429 } 430 bch2_trans_iter_exit(trans, &iter); 431 432 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 433 goto retry; 434 435 bch2_trans_put(trans); 436 437 if (!ret) 438 ret = flush_buf(i); 439 440 return ret ?: i->ret; 441 } 442 443 static const struct file_operations btree_format_debug_ops = { 444 .owner = THIS_MODULE, 445 .open = bch2_dump_open, 446 .release = bch2_dump_release, 447 .read = bch2_read_btree_formats, 448 }; 449 450 static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, 451 size_t size, loff_t *ppos) 452 { 453 struct dump_iter *i = file->private_data; 454 455 i->ubuf = buf; 456 i->size = size; 457 i->ret = 0; 458 459 return flush_buf(i) ?: 460 bch2_trans_run(i->c, 461 for_each_btree_key(trans, iter, i->id, i->from, 462 BTREE_ITER_prefetch| 463 BTREE_ITER_all_snapshots, k, ({ 464 struct btree_path_level *l = 465 &btree_iter_path(trans, &iter)->l[0]; 466 struct bkey_packed *_k = 467 bch2_btree_node_iter_peek(&l->iter, l->b); 468 469 if (bpos_gt(l->b->key.k.p, i->prev_node)) { 470 bch2_btree_node_to_text(&i->buf, i->c, l->b); 471 i->prev_node = l->b->key.k.p; 472 } 473 474 bch2_bfloat_to_text(&i->buf, l->b, _k); 475 bch2_trans_unlock(trans); 476 i->from = bpos_successor(iter.pos); 477 flush_buf(i); 478 }))) ?: 479 i->ret; 480 } 481 482 static const struct file_operations bfloat_failed_debug_ops = { 483 .owner = THIS_MODULE, 484 .open = bch2_dump_open, 485 .release = bch2_dump_release, 486 .read = bch2_read_bfloat_failed, 487 }; 488 489 static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, 490 struct btree *b) 491 { 492 if (!out->nr_tabstops) 493 printbuf_tabstop_push(out, 32); 494 495 prt_printf(out, "%px btree=%s l=%u\n", b, bch2_btree_id_str(b->c.btree_id), b->c.level); 496 497 printbuf_indent_add(out, 2); 498 499 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); 500 prt_newline(out); 501 502 prt_printf(out, "flags:\t"); 503 prt_bitflags(out, bch2_btree_node_flags, b->flags); 504 prt_newline(out); 505 506 prt_printf(out, "pcpu read locks:\t%u\n", b->c.lock.readers != NULL); 507 prt_printf(out, "written:\t%u\n", b->written); 508 prt_printf(out, "writes blocked:\t%u\n", !list_empty_careful(&b->write_blocked)); 509 prt_printf(out, "will make reachable:\t%lx\n", b->will_make_reachable); 510 511 prt_printf(out, "journal pin %px:\t%llu\n", 512 &b->writes[0].journal, b->writes[0].journal.seq); 513 prt_printf(out, "journal pin %px:\t%llu\n", 514 &b->writes[1].journal, b->writes[1].journal.seq); 515 516 printbuf_indent_sub(out, 2); 517 } 518 519 static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, 520 size_t size, loff_t *ppos) 521 { 522 struct dump_iter *i = file->private_data; 523 struct bch_fs *c = i->c; 524 bool done = false; 525 ssize_t ret = 0; 526 527 i->ubuf = buf; 528 i->size = size; 529 i->ret = 0; 530 531 do { 532 struct bucket_table *tbl; 533 struct rhash_head *pos; 534 struct btree *b; 535 536 ret = flush_buf(i); 537 if (ret) 538 return ret; 539 540 rcu_read_lock(); 541 i->buf.atomic++; 542 tbl = rht_dereference_rcu(c->btree_cache.table.tbl, 543 &c->btree_cache.table); 544 if (i->iter < tbl->size) { 545 rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) 546 bch2_cached_btree_node_to_text(&i->buf, c, b); 547 i->iter++; 548 } else { 549 done = true; 550 } 551 --i->buf.atomic; 552 rcu_read_unlock(); 553 } while (!done); 554 555 if (i->buf.allocation_failure) 556 ret = -ENOMEM; 557 558 if (!ret) 559 ret = flush_buf(i); 560 561 return ret ?: i->ret; 562 } 563 564 static const struct file_operations cached_btree_nodes_ops = { 565 .owner = THIS_MODULE, 566 .open = bch2_dump_open, 567 .release = bch2_dump_release, 568 .read = bch2_cached_btree_nodes_read, 569 }; 570 571 typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r); 572 573 static void list_sort(struct list_head *head, list_cmp_fn cmp) 574 { 575 struct list_head *pos; 576 577 list_for_each(pos, head) 578 while (!list_is_last(pos, head) && 579 cmp(pos, pos->next) > 0) { 580 struct list_head *pos2, *next = pos->next; 581 582 list_del(next); 583 list_for_each(pos2, head) 584 if (cmp(next, pos2) < 0) 585 goto pos_found; 586 BUG(); 587 pos_found: 588 list_add_tail(next, pos2); 589 } 590 } 591 592 static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r) 593 { 594 return cmp_int(l, r); 595 } 596 597 static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, 598 size_t size, loff_t *ppos) 599 { 600 struct dump_iter *i = file->private_data; 601 struct bch_fs *c = i->c; 602 struct btree_trans *trans; 603 ssize_t ret = 0; 604 605 i->ubuf = buf; 606 i->size = size; 607 i->ret = 0; 608 restart: 609 seqmutex_lock(&c->btree_trans_lock); 610 list_sort(&c->btree_trans_list, list_ptr_order_cmp); 611 612 list_for_each_entry(trans, &c->btree_trans_list, list) { 613 if ((ulong) trans < i->iter) 614 continue; 615 616 i->iter = (ulong) trans; 617 618 if (!closure_get_not_zero(&trans->ref)) 619 continue; 620 621 u32 seq = seqmutex_unlock(&c->btree_trans_lock); 622 623 bch2_btree_trans_to_text(&i->buf, trans); 624 625 prt_printf(&i->buf, "backtrace:\n"); 626 printbuf_indent_add(&i->buf, 2); 627 bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); 628 printbuf_indent_sub(&i->buf, 2); 629 prt_newline(&i->buf); 630 631 closure_put(&trans->ref); 632 633 ret = flush_buf(i); 634 if (ret) 635 goto unlocked; 636 637 if (!seqmutex_relock(&c->btree_trans_lock, seq)) 638 goto restart; 639 } 640 seqmutex_unlock(&c->btree_trans_lock); 641 unlocked: 642 if (i->buf.allocation_failure) 643 ret = -ENOMEM; 644 645 if (!ret) 646 ret = flush_buf(i); 647 648 return ret ?: i->ret; 649 } 650 651 static const struct file_operations btree_transactions_ops = { 652 .owner = THIS_MODULE, 653 .open = bch2_dump_open, 654 .release = bch2_dump_release, 655 .read = bch2_btree_transactions_read, 656 }; 657 658 static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, 659 size_t size, loff_t *ppos) 660 { 661 struct dump_iter *i = file->private_data; 662 struct bch_fs *c = i->c; 663 bool done = false; 664 int err; 665 666 i->ubuf = buf; 667 i->size = size; 668 i->ret = 0; 669 670 while (1) { 671 err = flush_buf(i); 672 if (err) 673 return err; 674 675 if (!i->size) 676 break; 677 678 if (done) 679 break; 680 681 done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); 682 i->iter++; 683 } 684 685 if (i->buf.allocation_failure) 686 return -ENOMEM; 687 688 return i->ret; 689 } 690 691 static const struct file_operations journal_pins_ops = { 692 .owner = THIS_MODULE, 693 .open = bch2_dump_open, 694 .release = bch2_dump_release, 695 .read = bch2_journal_pins_read, 696 }; 697 698 static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, 699 size_t size, loff_t *ppos) 700 { 701 struct dump_iter *i = file->private_data; 702 struct bch_fs *c = i->c; 703 int err; 704 705 i->ubuf = buf; 706 i->size = size; 707 i->ret = 0; 708 709 if (!i->iter) { 710 bch2_btree_updates_to_text(&i->buf, c); 711 i->iter++; 712 } 713 714 err = flush_buf(i); 715 if (err) 716 return err; 717 718 if (i->buf.allocation_failure) 719 return -ENOMEM; 720 721 return i->ret; 722 } 723 724 static const struct file_operations btree_updates_ops = { 725 .owner = THIS_MODULE, 726 .open = bch2_dump_open, 727 .release = bch2_dump_release, 728 .read = bch2_btree_updates_read, 729 }; 730 731 static int btree_transaction_stats_open(struct inode *inode, struct file *file) 732 { 733 struct bch_fs *c = inode->i_private; 734 struct dump_iter *i; 735 736 i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); 737 if (!i) 738 return -ENOMEM; 739 740 i->iter = 1; 741 i->c = c; 742 i->buf = PRINTBUF; 743 file->private_data = i; 744 745 return 0; 746 } 747 748 static int btree_transaction_stats_release(struct inode *inode, struct file *file) 749 { 750 struct dump_iter *i = file->private_data; 751 752 printbuf_exit(&i->buf); 753 kfree(i); 754 755 return 0; 756 } 757 758 static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, 759 size_t size, loff_t *ppos) 760 { 761 struct dump_iter *i = file->private_data; 762 struct bch_fs *c = i->c; 763 int err; 764 765 i->ubuf = buf; 766 i->size = size; 767 i->ret = 0; 768 769 while (1) { 770 struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; 771 772 err = flush_buf(i); 773 if (err) 774 return err; 775 776 if (!i->size) 777 break; 778 779 if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) || 780 !bch2_btree_transaction_fns[i->iter]) 781 break; 782 783 prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); 784 printbuf_indent_add(&i->buf, 2); 785 786 mutex_lock(&s->lock); 787 788 prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); 789 prt_printf(&i->buf, "Transaction duration:\n"); 790 791 printbuf_indent_add(&i->buf, 2); 792 bch2_time_stats_to_text(&i->buf, &s->duration); 793 printbuf_indent_sub(&i->buf, 2); 794 795 if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { 796 prt_printf(&i->buf, "Lock hold times:\n"); 797 798 printbuf_indent_add(&i->buf, 2); 799 bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); 800 printbuf_indent_sub(&i->buf, 2); 801 } 802 803 if (s->max_paths_text) { 804 prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths); 805 806 printbuf_indent_add(&i->buf, 2); 807 prt_str_indented(&i->buf, s->max_paths_text); 808 printbuf_indent_sub(&i->buf, 2); 809 } 810 811 mutex_unlock(&s->lock); 812 813 printbuf_indent_sub(&i->buf, 2); 814 prt_newline(&i->buf); 815 i->iter++; 816 } 817 818 if (i->buf.allocation_failure) 819 return -ENOMEM; 820 821 return i->ret; 822 } 823 824 static const struct file_operations btree_transaction_stats_op = { 825 .owner = THIS_MODULE, 826 .open = btree_transaction_stats_open, 827 .release = btree_transaction_stats_release, 828 .read = btree_transaction_stats_read, 829 }; 830 831 /* walk btree transactions until we find a deadlock and print it */ 832 static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) 833 { 834 struct btree_trans *trans; 835 pid_t iter = 0; 836 restart: 837 seqmutex_lock(&c->btree_trans_lock); 838 list_for_each_entry(trans, &c->btree_trans_list, list) { 839 struct task_struct *task = READ_ONCE(trans->locking_wait.task); 840 841 if (!task || task->pid <= iter) 842 continue; 843 844 iter = task->pid; 845 846 if (!closure_get_not_zero(&trans->ref)) 847 continue; 848 849 u32 seq = seqmutex_unlock(&c->btree_trans_lock); 850 851 bool found = bch2_check_for_deadlock(trans, out) != 0; 852 853 closure_put(&trans->ref); 854 855 if (found) 856 return; 857 858 if (!seqmutex_relock(&c->btree_trans_lock, seq)) 859 goto restart; 860 } 861 seqmutex_unlock(&c->btree_trans_lock); 862 } 863 864 static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, 865 size_t size, loff_t *ppos) 866 { 867 struct dump_iter *i = file->private_data; 868 struct bch_fs *c = i->c; 869 ssize_t ret = 0; 870 871 i->ubuf = buf; 872 i->size = size; 873 i->ret = 0; 874 875 if (!i->iter) { 876 btree_deadlock_to_text(&i->buf, c); 877 i->iter++; 878 } 879 880 if (i->buf.allocation_failure) 881 ret = -ENOMEM; 882 883 if (!ret) 884 ret = flush_buf(i); 885 886 return ret ?: i->ret; 887 } 888 889 static const struct file_operations btree_deadlock_ops = { 890 .owner = THIS_MODULE, 891 .open = bch2_dump_open, 892 .release = bch2_dump_release, 893 .read = bch2_btree_deadlock_read, 894 }; 895 896 void bch2_fs_debug_exit(struct bch_fs *c) 897 { 898 if (!IS_ERR_OR_NULL(c->fs_debug_dir)) 899 debugfs_remove_recursive(c->fs_debug_dir); 900 } 901 902 static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd) 903 { 904 struct dentry *d; 905 906 d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir); 907 908 debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops); 909 910 debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops); 911 912 debugfs_create_file("bfloat-failed", 0400, d, bd, 913 &bfloat_failed_debug_ops); 914 } 915 916 void bch2_fs_debug_init(struct bch_fs *c) 917 { 918 struct btree_debug *bd; 919 char name[100]; 920 921 if (IS_ERR_OR_NULL(bch_debug)) 922 return; 923 924 snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b); 925 c->fs_debug_dir = debugfs_create_dir(name, bch_debug); 926 if (IS_ERR_OR_NULL(c->fs_debug_dir)) 927 return; 928 929 debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, 930 c->btree_debug, &cached_btree_nodes_ops); 931 932 debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir, 933 c->btree_debug, &btree_transactions_ops); 934 935 debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, 936 c->btree_debug, &journal_pins_ops); 937 938 debugfs_create_file("btree_updates", 0400, c->fs_debug_dir, 939 c->btree_debug, &btree_updates_ops); 940 941 debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, 942 c, &btree_transaction_stats_op); 943 944 debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, 945 c->btree_debug, &btree_deadlock_ops); 946 947 c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); 948 if (IS_ERR_OR_NULL(c->btree_debug_dir)) 949 return; 950 951 for (bd = c->btree_debug; 952 bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); 953 bd++) { 954 bd->id = bd - c->btree_debug; 955 bch2_fs_debug_btree_init(c, bd); 956 } 957 } 958 959 #endif 960 961 void bch2_debug_exit(void) 962 { 963 if (!IS_ERR_OR_NULL(bch_debug)) 964 debugfs_remove_recursive(bch_debug); 965 } 966 967 int __init bch2_debug_init(void) 968 { 969 bch_debug = debugfs_create_dir("bcachefs", NULL); 970 return 0; 971 } 972