1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Assorted bcachefs debug code
4 *
5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6 * Copyright 2012 Google, Inc.
7 */
8
9 #include "bcachefs.h"
10 #include "alloc_foreground.h"
11 #include "async_objs.h"
12 #include "bkey_methods.h"
13 #include "btree_cache.h"
14 #include "btree_io.h"
15 #include "btree_iter.h"
16 #include "btree_locking.h"
17 #include "btree_update.h"
18 #include "btree_update_interior.h"
19 #include "buckets.h"
20 #include "data_update.h"
21 #include "debug.h"
22 #include "error.h"
23 #include "extents.h"
24 #include "fsck.h"
25 #include "inode.h"
26 #include "journal_reclaim.h"
27 #include "super.h"
28
29 #include <linux/console.h>
30 #include <linux/debugfs.h>
31 #include <linux/module.h>
32 #include <linux/random.h>
33 #include <linux/seq_file.h>
34
35 static struct dentry *bch_debug;
36
bch2_btree_verify_replica(struct bch_fs * c,struct btree * b,struct extent_ptr_decoded pick)37 static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
38 struct extent_ptr_decoded pick)
39 {
40 struct btree *v = c->verify_data;
41 struct btree_node *n_ondisk = c->verify_ondisk;
42 struct btree_node *n_sorted = c->verify_data->data;
43 struct bset *sorted, *inmemory = &b->data->keys;
44 struct bio *bio;
45 bool failed = false;
46
47 struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
48 BCH_DEV_READ_REF_btree_verify_replicas);
49 if (!ca)
50 return false;
51
52 bio = bio_alloc_bioset(ca->disk_sb.bdev,
53 buf_pages(n_sorted, btree_buf_bytes(b)),
54 REQ_OP_READ|REQ_META,
55 GFP_NOFS,
56 &c->btree_bio);
57 bio->bi_iter.bi_sector = pick.ptr.offset;
58 bch2_bio_map(bio, n_sorted, btree_buf_bytes(b));
59
60 submit_bio_wait(bio);
61
62 bio_put(bio);
63 enumerated_ref_put(&ca->io_ref[READ],
64 BCH_DEV_READ_REF_btree_verify_replicas);
65
66 memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
67
68 v->written = 0;
69 if (bch2_btree_node_read_done(c, ca, v, NULL, NULL))
70 return false;
71
72 n_sorted = c->verify_data->data;
73 sorted = &n_sorted->keys;
74
75 if (inmemory->u64s != sorted->u64s ||
76 memcmp(inmemory->start,
77 sorted->start,
78 vstruct_end(inmemory) - (void *) inmemory->start)) {
79 unsigned offset = 0, sectors;
80 struct bset *i;
81 unsigned j;
82
83 console_lock();
84
85 printk(KERN_ERR "*** in memory:\n");
86 bch2_dump_bset(c, b, inmemory, 0);
87
88 printk(KERN_ERR "*** read back in:\n");
89 bch2_dump_bset(c, v, sorted, 0);
90
91 while (offset < v->written) {
92 if (!offset) {
93 i = &n_ondisk->keys;
94 sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
95 c->block_bits;
96 } else {
97 struct btree_node_entry *bne =
98 (void *) n_ondisk + (offset << 9);
99 i = &bne->keys;
100
101 sectors = vstruct_blocks(bne, c->block_bits) <<
102 c->block_bits;
103 }
104
105 printk(KERN_ERR "*** on disk block %u:\n", offset);
106 bch2_dump_bset(c, b, i, offset);
107
108 offset += sectors;
109 }
110
111 for (j = 0; j < le16_to_cpu(inmemory->u64s); j++)
112 if (inmemory->_data[j] != sorted->_data[j])
113 break;
114
115 console_unlock();
116 bch_err(c, "verify failed at key %u", j);
117
118 failed = true;
119 }
120
121 if (v->written != b->written) {
122 bch_err(c, "written wrong: expected %u, got %u",
123 b->written, v->written);
124 failed = true;
125 }
126
127 return failed;
128 }
129
__bch2_btree_verify(struct bch_fs * c,struct btree * b)130 void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
131 {
132 struct bkey_ptrs_c ptrs;
133 struct extent_ptr_decoded p;
134 const union bch_extent_entry *entry;
135 struct btree *v;
136 struct bset *inmemory = &b->data->keys;
137 struct bkey_packed *k;
138 bool failed = false;
139
140 if (c->opts.nochanges)
141 return;
142
143 bch2_btree_node_io_lock(b);
144 mutex_lock(&c->verify_lock);
145
146 if (!c->verify_ondisk) {
147 c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
148 if (!c->verify_ondisk)
149 goto out;
150 }
151
152 if (!c->verify_data) {
153 c->verify_data = __bch2_btree_node_mem_alloc(c);
154 if (!c->verify_data)
155 goto out;
156
157 list_del_init(&c->verify_data->list);
158 }
159
160 BUG_ON(b->nsets != 1);
161
162 for (k = inmemory->start; k != vstruct_last(inmemory); k = bkey_p_next(k))
163 if (k->type == KEY_TYPE_btree_ptr_v2)
164 ((struct bch_btree_ptr_v2 *) bkeyp_val(&b->format, k))->mem_ptr = 0;
165
166 v = c->verify_data;
167 bkey_copy(&v->key, &b->key);
168 v->c.level = b->c.level;
169 v->c.btree_id = b->c.btree_id;
170 bch2_btree_keys_init(v);
171
172 ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
173 bkey_for_each_ptr_decode(&b->key.k, ptrs, p, entry)
174 failed |= bch2_btree_verify_replica(c, b, p);
175
176 if (failed) {
177 struct printbuf buf = PRINTBUF;
178
179 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
180 bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
181 printbuf_exit(&buf);
182 }
183 out:
184 mutex_unlock(&c->verify_lock);
185 bch2_btree_node_io_unlock(b);
186 }
187
bch2_btree_node_ondisk_to_text(struct printbuf * out,struct bch_fs * c,const struct btree * b)188 void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
189 const struct btree *b)
190 {
191 struct btree_node *n_ondisk = NULL;
192 struct extent_ptr_decoded pick;
193 struct bch_dev *ca;
194 struct bio *bio = NULL;
195 unsigned offset = 0;
196 int ret;
197
198 if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), NULL, &pick, -1) <= 0) {
199 prt_printf(out, "error getting device to read from: invalid device\n");
200 return;
201 }
202
203 ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ,
204 BCH_DEV_READ_REF_btree_node_ondisk_to_text);
205 if (!ca) {
206 prt_printf(out, "error getting device to read from: not online\n");
207 return;
208 }
209
210 n_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
211 if (!n_ondisk) {
212 prt_printf(out, "memory allocation failure\n");
213 goto out;
214 }
215
216 bio = bio_alloc_bioset(ca->disk_sb.bdev,
217 buf_pages(n_ondisk, btree_buf_bytes(b)),
218 REQ_OP_READ|REQ_META,
219 GFP_NOFS,
220 &c->btree_bio);
221 bio->bi_iter.bi_sector = pick.ptr.offset;
222 bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b));
223
224 ret = submit_bio_wait(bio);
225 if (ret) {
226 prt_printf(out, "IO error reading btree node: %s\n", bch2_err_str(ret));
227 goto out;
228 }
229
230 while (offset < btree_sectors(c)) {
231 struct bset *i;
232 struct nonce nonce;
233 struct bch_csum csum;
234 struct bkey_packed *k;
235 unsigned sectors;
236
237 if (!offset) {
238 i = &n_ondisk->keys;
239
240 if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
241 prt_printf(out, "unknown checksum type at offset %u: %llu\n",
242 offset, BSET_CSUM_TYPE(i));
243 goto out;
244 }
245
246 nonce = btree_nonce(i, offset << 9);
247 csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, n_ondisk);
248
249 if (bch2_crc_cmp(csum, n_ondisk->csum)) {
250 prt_printf(out, "invalid checksum\n");
251 goto out;
252 }
253
254 bset_encrypt(c, i, offset << 9);
255
256 sectors = vstruct_sectors(n_ondisk, c->block_bits);
257 } else {
258 struct btree_node_entry *bne = (void *) n_ondisk + (offset << 9);
259
260 i = &bne->keys;
261
262 if (i->seq != n_ondisk->keys.seq)
263 break;
264
265 if (!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i))) {
266 prt_printf(out, "unknown checksum type at offset %u: %llu\n",
267 offset, BSET_CSUM_TYPE(i));
268 goto out;
269 }
270
271 nonce = btree_nonce(i, offset << 9);
272 csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
273
274 if (bch2_crc_cmp(csum, bne->csum)) {
275 prt_printf(out, "invalid checksum");
276 goto out;
277 }
278
279 bset_encrypt(c, i, offset << 9);
280
281 sectors = vstruct_sectors(bne, c->block_bits);
282 }
283
284 prt_printf(out, " offset %u version %u, journal seq %llu\n",
285 offset,
286 le16_to_cpu(i->version),
287 le64_to_cpu(i->journal_seq));
288 offset += sectors;
289
290 printbuf_indent_add(out, 4);
291
292 for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) {
293 struct bkey u;
294
295 bch2_bkey_val_to_text(out, c, bkey_disassemble(b, k, &u));
296 prt_newline(out);
297 }
298
299 printbuf_indent_sub(out, 4);
300 }
301 out:
302 if (bio)
303 bio_put(bio);
304 kvfree(n_ondisk);
305 enumerated_ref_put(&ca->io_ref[READ],
306 BCH_DEV_READ_REF_btree_node_ondisk_to_text);
307 }
308
309 #ifdef CONFIG_DEBUG_FS
310
bch2_debugfs_flush_buf(struct dump_iter * i)311 ssize_t bch2_debugfs_flush_buf(struct dump_iter *i)
312 {
313 if (i->buf.pos) {
314 size_t bytes = min_t(size_t, i->buf.pos, i->size);
315 int copied = bytes - copy_to_user(i->ubuf, i->buf.buf, bytes);
316
317 i->ret += copied;
318 i->ubuf += copied;
319 i->size -= copied;
320 i->buf.pos -= copied;
321 memmove(i->buf.buf, i->buf.buf + copied, i->buf.pos);
322
323 if (i->buf.last_newline >= copied)
324 i->buf.last_newline -= copied;
325 if (i->buf.last_field >= copied)
326 i->buf.last_field -= copied;
327
328 if (copied != bytes)
329 return -EFAULT;
330 }
331
332 return i->size ? 0 : i->ret;
333 }
334
bch2_dump_open(struct inode * inode,struct file * file)335 static int bch2_dump_open(struct inode *inode, struct file *file)
336 {
337 struct btree_debug *bd = inode->i_private;
338 struct dump_iter *i;
339
340 i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
341 if (!i)
342 return -ENOMEM;
343
344 file->private_data = i;
345 i->from = POS_MIN;
346 i->iter = 0;
347 i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]);
348 i->id = bd->id;
349 i->buf = PRINTBUF;
350
351 return 0;
352 }
353
bch2_dump_release(struct inode * inode,struct file * file)354 int bch2_dump_release(struct inode *inode, struct file *file)
355 {
356 struct dump_iter *i = file->private_data;
357
358 printbuf_exit(&i->buf);
359 kfree(i);
360 return 0;
361 }
362
bch2_read_btree(struct file * file,char __user * buf,size_t size,loff_t * ppos)363 static ssize_t bch2_read_btree(struct file *file, char __user *buf,
364 size_t size, loff_t *ppos)
365 {
366 struct dump_iter *i = file->private_data;
367
368 i->ubuf = buf;
369 i->size = size;
370 i->ret = 0;
371
372 return bch2_debugfs_flush_buf(i) ?:
373 bch2_trans_run(i->c,
374 for_each_btree_key(trans, iter, i->id, i->from,
375 BTREE_ITER_prefetch|
376 BTREE_ITER_all_snapshots, k, ({
377 bch2_bkey_val_to_text(&i->buf, i->c, k);
378 prt_newline(&i->buf);
379 bch2_trans_unlock(trans);
380 i->from = bpos_successor(iter.pos);
381 bch2_debugfs_flush_buf(i);
382 }))) ?:
383 i->ret;
384 }
385
386 static const struct file_operations btree_debug_ops = {
387 .owner = THIS_MODULE,
388 .open = bch2_dump_open,
389 .release = bch2_dump_release,
390 .read = bch2_read_btree,
391 };
392
bch2_read_btree_formats(struct file * file,char __user * buf,size_t size,loff_t * ppos)393 static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
394 size_t size, loff_t *ppos)
395 {
396 struct dump_iter *i = file->private_data;
397
398 i->ubuf = buf;
399 i->size = size;
400 i->ret = 0;
401
402 ssize_t ret = bch2_debugfs_flush_buf(i);
403 if (ret)
404 return ret;
405
406 if (bpos_eq(SPOS_MAX, i->from))
407 return i->ret;
408
409 return bch2_trans_run(i->c,
410 for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({
411 bch2_btree_node_to_text(&i->buf, i->c, b);
412 i->from = !bpos_eq(SPOS_MAX, b->key.k.p)
413 ? bpos_successor(b->key.k.p)
414 : b->key.k.p;
415
416 drop_locks_do(trans, bch2_debugfs_flush_buf(i));
417 }))) ?: i->ret;
418 }
419
420 static const struct file_operations btree_format_debug_ops = {
421 .owner = THIS_MODULE,
422 .open = bch2_dump_open,
423 .release = bch2_dump_release,
424 .read = bch2_read_btree_formats,
425 };
426
bch2_read_bfloat_failed(struct file * file,char __user * buf,size_t size,loff_t * ppos)427 static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
428 size_t size, loff_t *ppos)
429 {
430 struct dump_iter *i = file->private_data;
431
432 i->ubuf = buf;
433 i->size = size;
434 i->ret = 0;
435
436 return bch2_debugfs_flush_buf(i) ?:
437 bch2_trans_run(i->c,
438 for_each_btree_key(trans, iter, i->id, i->from,
439 BTREE_ITER_prefetch|
440 BTREE_ITER_all_snapshots, k, ({
441 struct btree_path_level *l =
442 &btree_iter_path(trans, &iter)->l[0];
443 struct bkey_packed *_k =
444 bch2_btree_node_iter_peek(&l->iter, l->b);
445
446 if (bpos_gt(l->b->key.k.p, i->prev_node)) {
447 bch2_btree_node_to_text(&i->buf, i->c, l->b);
448 i->prev_node = l->b->key.k.p;
449 }
450
451 bch2_bfloat_to_text(&i->buf, l->b, _k);
452 bch2_trans_unlock(trans);
453 i->from = bpos_successor(iter.pos);
454 bch2_debugfs_flush_buf(i);
455 }))) ?:
456 i->ret;
457 }
458
459 static const struct file_operations bfloat_failed_debug_ops = {
460 .owner = THIS_MODULE,
461 .open = bch2_dump_open,
462 .release = bch2_dump_release,
463 .read = bch2_read_bfloat_failed,
464 };
465
bch2_cached_btree_node_to_text(struct printbuf * out,struct bch_fs * c,struct btree * b)466 static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
467 struct btree *b)
468 {
469 if (!out->nr_tabstops)
470 printbuf_tabstop_push(out, 32);
471
472 prt_printf(out, "%px ", b);
473 bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level);
474 prt_printf(out, "\n");
475
476 printbuf_indent_add(out, 2);
477
478 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
479 prt_newline(out);
480
481 prt_printf(out, "flags:\t");
482 prt_bitflags(out, bch2_btree_node_flags, b->flags);
483 prt_newline(out);
484
485 prt_printf(out, "pcpu read locks:\t%u\n", b->c.lock.readers != NULL);
486 prt_printf(out, "written:\t%u\n", b->written);
487 prt_printf(out, "writes blocked:\t%u\n", !list_empty_careful(&b->write_blocked));
488 prt_printf(out, "will make reachable:\t%lx\n", b->will_make_reachable);
489
490 prt_printf(out, "journal pin %px:\t%llu\n",
491 &b->writes[0].journal, b->writes[0].journal.seq);
492 prt_printf(out, "journal pin %px:\t%llu\n",
493 &b->writes[1].journal, b->writes[1].journal.seq);
494
495 prt_printf(out, "ob:\t%u\n", b->ob.nr);
496
497 printbuf_indent_sub(out, 2);
498 }
499
bch2_cached_btree_nodes_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)500 static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
501 size_t size, loff_t *ppos)
502 {
503 struct dump_iter *i = file->private_data;
504 struct bch_fs *c = i->c;
505 bool done = false;
506 ssize_t ret = 0;
507
508 i->ubuf = buf;
509 i->size = size;
510 i->ret = 0;
511
512 do {
513 ret = bch2_debugfs_flush_buf(i);
514 if (ret)
515 return ret;
516
517 i->buf.atomic++;
518 scoped_guard(rcu) {
519 struct bucket_table *tbl =
520 rht_dereference_rcu(c->btree_cache.table.tbl,
521 &c->btree_cache.table);
522 if (i->iter < tbl->size) {
523 struct rhash_head *pos;
524 struct btree *b;
525
526 rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash)
527 bch2_cached_btree_node_to_text(&i->buf, c, b);
528 i->iter++;
529 } else {
530 done = true;
531 }
532 }
533 --i->buf.atomic;
534 } while (!done);
535
536 if (i->buf.allocation_failure)
537 ret = -ENOMEM;
538
539 if (!ret)
540 ret = bch2_debugfs_flush_buf(i);
541
542 return ret ?: i->ret;
543 }
544
545 static const struct file_operations cached_btree_nodes_ops = {
546 .owner = THIS_MODULE,
547 .open = bch2_dump_open,
548 .release = bch2_dump_release,
549 .read = bch2_cached_btree_nodes_read,
550 };
551
552 typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
553
list_sort(struct list_head * head,list_cmp_fn cmp)554 static void list_sort(struct list_head *head, list_cmp_fn cmp)
555 {
556 struct list_head *pos;
557
558 list_for_each(pos, head)
559 while (!list_is_last(pos, head) &&
560 cmp(pos, pos->next) > 0) {
561 struct list_head *pos2, *next = pos->next;
562
563 list_del(next);
564 list_for_each(pos2, head)
565 if (cmp(next, pos2) < 0)
566 goto pos_found;
567 BUG();
568 pos_found:
569 list_add_tail(next, pos2);
570 }
571 }
572
list_ptr_order_cmp(const struct list_head * l,const struct list_head * r)573 static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
574 {
575 return cmp_int(l, r);
576 }
577
bch2_btree_transactions_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)578 static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
579 size_t size, loff_t *ppos)
580 {
581 struct dump_iter *i = file->private_data;
582 struct bch_fs *c = i->c;
583 struct btree_trans *trans;
584 ssize_t ret = 0;
585
586 i->ubuf = buf;
587 i->size = size;
588 i->ret = 0;
589 restart:
590 seqmutex_lock(&c->btree_trans_lock);
591 list_sort(&c->btree_trans_list, list_ptr_order_cmp);
592
593 list_for_each_entry(trans, &c->btree_trans_list, list) {
594 if ((ulong) trans <= i->iter)
595 continue;
596
597 i->iter = (ulong) trans;
598
599 if (!closure_get_not_zero(&trans->ref))
600 continue;
601
602 u32 seq = seqmutex_unlock(&c->btree_trans_lock);
603
604 bch2_btree_trans_to_text(&i->buf, trans);
605
606 prt_printf(&i->buf, "backtrace:\n");
607 printbuf_indent_add(&i->buf, 2);
608 bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
609 printbuf_indent_sub(&i->buf, 2);
610 prt_newline(&i->buf);
611
612 closure_put(&trans->ref);
613
614 ret = bch2_debugfs_flush_buf(i);
615 if (ret)
616 goto unlocked;
617
618 if (!seqmutex_relock(&c->btree_trans_lock, seq))
619 goto restart;
620 }
621 seqmutex_unlock(&c->btree_trans_lock);
622 unlocked:
623 if (i->buf.allocation_failure)
624 ret = -ENOMEM;
625
626 if (!ret)
627 ret = bch2_debugfs_flush_buf(i);
628
629 return ret ?: i->ret;
630 }
631
632 static const struct file_operations btree_transactions_ops = {
633 .owner = THIS_MODULE,
634 .open = bch2_dump_open,
635 .release = bch2_dump_release,
636 .read = bch2_btree_transactions_read,
637 };
638
bch2_journal_pins_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)639 static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
640 size_t size, loff_t *ppos)
641 {
642 struct dump_iter *i = file->private_data;
643 struct bch_fs *c = i->c;
644 bool done = false;
645 int err;
646
647 i->ubuf = buf;
648 i->size = size;
649 i->ret = 0;
650
651 while (1) {
652 err = bch2_debugfs_flush_buf(i);
653 if (err)
654 return err;
655
656 if (!i->size)
657 break;
658
659 if (done)
660 break;
661
662 done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
663 i->iter++;
664 }
665
666 if (i->buf.allocation_failure)
667 return -ENOMEM;
668
669 return i->ret;
670 }
671
672 static const struct file_operations journal_pins_ops = {
673 .owner = THIS_MODULE,
674 .open = bch2_dump_open,
675 .release = bch2_dump_release,
676 .read = bch2_journal_pins_read,
677 };
678
bch2_btree_updates_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)679 static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
680 size_t size, loff_t *ppos)
681 {
682 struct dump_iter *i = file->private_data;
683 struct bch_fs *c = i->c;
684 int err;
685
686 i->ubuf = buf;
687 i->size = size;
688 i->ret = 0;
689
690 if (!i->iter) {
691 bch2_btree_updates_to_text(&i->buf, c);
692 i->iter++;
693 }
694
695 err = bch2_debugfs_flush_buf(i);
696 if (err)
697 return err;
698
699 if (i->buf.allocation_failure)
700 return -ENOMEM;
701
702 return i->ret;
703 }
704
705 static const struct file_operations btree_updates_ops = {
706 .owner = THIS_MODULE,
707 .open = bch2_dump_open,
708 .release = bch2_dump_release,
709 .read = bch2_btree_updates_read,
710 };
711
btree_transaction_stats_open(struct inode * inode,struct file * file)712 static int btree_transaction_stats_open(struct inode *inode, struct file *file)
713 {
714 struct bch_fs *c = inode->i_private;
715 struct dump_iter *i;
716
717 i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
718 if (!i)
719 return -ENOMEM;
720
721 i->iter = 1;
722 i->c = c;
723 i->buf = PRINTBUF;
724 file->private_data = i;
725
726 return 0;
727 }
728
btree_transaction_stats_release(struct inode * inode,struct file * file)729 static int btree_transaction_stats_release(struct inode *inode, struct file *file)
730 {
731 struct dump_iter *i = file->private_data;
732
733 printbuf_exit(&i->buf);
734 kfree(i);
735
736 return 0;
737 }
738
btree_transaction_stats_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)739 static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf,
740 size_t size, loff_t *ppos)
741 {
742 struct dump_iter *i = file->private_data;
743 struct bch_fs *c = i->c;
744 int err;
745
746 i->ubuf = buf;
747 i->size = size;
748 i->ret = 0;
749
750 while (1) {
751 struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
752
753 err = bch2_debugfs_flush_buf(i);
754 if (err)
755 return err;
756
757 if (!i->size)
758 break;
759
760 if (i->iter == ARRAY_SIZE(bch2_btree_transaction_fns) ||
761 !bch2_btree_transaction_fns[i->iter])
762 break;
763
764 prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]);
765 printbuf_indent_add(&i->buf, 2);
766
767 mutex_lock(&s->lock);
768
769 prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem);
770 #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE
771 printbuf_indent_add(&i->buf, 2);
772 bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace);
773 printbuf_indent_sub(&i->buf, 2);
774 #endif
775
776 prt_printf(&i->buf, "Transaction duration:\n");
777
778 printbuf_indent_add(&i->buf, 2);
779 bch2_time_stats_to_text(&i->buf, &s->duration);
780 printbuf_indent_sub(&i->buf, 2);
781
782 if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
783 prt_printf(&i->buf, "Lock hold times:\n");
784
785 printbuf_indent_add(&i->buf, 2);
786 bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
787 printbuf_indent_sub(&i->buf, 2);
788 }
789
790 if (s->max_paths_text) {
791 prt_printf(&i->buf, "Maximum allocated btree paths (%u):\n", s->nr_max_paths);
792
793 printbuf_indent_add(&i->buf, 2);
794 prt_str_indented(&i->buf, s->max_paths_text);
795 printbuf_indent_sub(&i->buf, 2);
796 }
797
798 mutex_unlock(&s->lock);
799
800 printbuf_indent_sub(&i->buf, 2);
801 prt_newline(&i->buf);
802 i->iter++;
803 }
804
805 if (i->buf.allocation_failure)
806 return -ENOMEM;
807
808 return i->ret;
809 }
810
811 static const struct file_operations btree_transaction_stats_op = {
812 .owner = THIS_MODULE,
813 .open = btree_transaction_stats_open,
814 .release = btree_transaction_stats_release,
815 .read = btree_transaction_stats_read,
816 };
817
818 /* walk btree transactions until we find a deadlock and print it */
btree_deadlock_to_text(struct printbuf * out,struct bch_fs * c)819 static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
820 {
821 struct btree_trans *trans;
822 ulong iter = 0;
823 restart:
824 seqmutex_lock(&c->btree_trans_lock);
825 list_sort(&c->btree_trans_list, list_ptr_order_cmp);
826
827 list_for_each_entry(trans, &c->btree_trans_list, list) {
828 if ((ulong) trans <= iter)
829 continue;
830
831 iter = (ulong) trans;
832
833 if (!closure_get_not_zero(&trans->ref))
834 continue;
835
836 u32 seq = seqmutex_unlock(&c->btree_trans_lock);
837
838 bool found = bch2_check_for_deadlock(trans, out) != 0;
839
840 closure_put(&trans->ref);
841
842 if (found)
843 return;
844
845 if (!seqmutex_relock(&c->btree_trans_lock, seq))
846 goto restart;
847 }
848 seqmutex_unlock(&c->btree_trans_lock);
849 }
850
851 typedef void (*fs_to_text_fn)(struct printbuf *, struct bch_fs *);
852
bch2_simple_print(struct file * file,char __user * buf,size_t size,loff_t * ppos,fs_to_text_fn fn)853 static ssize_t bch2_simple_print(struct file *file, char __user *buf,
854 size_t size, loff_t *ppos,
855 fs_to_text_fn fn)
856 {
857 struct dump_iter *i = file->private_data;
858 struct bch_fs *c = i->c;
859 ssize_t ret = 0;
860
861 i->ubuf = buf;
862 i->size = size;
863 i->ret = 0;
864
865 if (!i->iter) {
866 fn(&i->buf, c);
867 i->iter++;
868 }
869
870 if (i->buf.allocation_failure)
871 ret = -ENOMEM;
872
873 if (!ret)
874 ret = bch2_debugfs_flush_buf(i);
875
876 return ret ?: i->ret;
877 }
878
bch2_btree_deadlock_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)879 static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
880 size_t size, loff_t *ppos)
881 {
882 return bch2_simple_print(file, buf, size, ppos, btree_deadlock_to_text);
883 }
884
885 static const struct file_operations btree_deadlock_ops = {
886 .owner = THIS_MODULE,
887 .open = bch2_dump_open,
888 .release = bch2_dump_release,
889 .read = bch2_btree_deadlock_read,
890 };
891
bch2_write_points_read(struct file * file,char __user * buf,size_t size,loff_t * ppos)892 static ssize_t bch2_write_points_read(struct file *file, char __user *buf,
893 size_t size, loff_t *ppos)
894 {
895 return bch2_simple_print(file, buf, size, ppos, bch2_write_points_to_text);
896 }
897
898 static const struct file_operations write_points_ops = {
899 .owner = THIS_MODULE,
900 .open = bch2_dump_open,
901 .release = bch2_dump_release,
902 .read = bch2_write_points_read,
903 };
904
bch2_fs_debug_exit(struct bch_fs * c)905 void bch2_fs_debug_exit(struct bch_fs *c)
906 {
907 if (!IS_ERR_OR_NULL(c->fs_debug_dir))
908 debugfs_remove_recursive(c->fs_debug_dir);
909 }
910
bch2_fs_debug_btree_init(struct bch_fs * c,struct btree_debug * bd)911 static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
912 {
913 struct dentry *d;
914
915 d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
916
917 debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
918
919 debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
920
921 debugfs_create_file("bfloat-failed", 0400, d, bd,
922 &bfloat_failed_debug_ops);
923 }
924
bch2_fs_debug_init(struct bch_fs * c)925 void bch2_fs_debug_init(struct bch_fs *c)
926 {
927 struct btree_debug *bd;
928 char name[100];
929
930 if (IS_ERR_OR_NULL(bch_debug))
931 return;
932
933 if (c->sb.multi_device)
934 snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
935 else
936 strscpy(name, c->name, sizeof(name));
937
938 c->fs_debug_dir = debugfs_create_dir(name, bch_debug);
939 if (IS_ERR_OR_NULL(c->fs_debug_dir))
940 return;
941
942 debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir,
943 c->btree_debug, &cached_btree_nodes_ops);
944
945 debugfs_create_file("btree_transactions", 0400, c->fs_debug_dir,
946 c->btree_debug, &btree_transactions_ops);
947
948 debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
949 c->btree_debug, &journal_pins_ops);
950
951 debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
952 c->btree_debug, &btree_updates_ops);
953
954 debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
955 c, &btree_transaction_stats_op);
956
957 debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir,
958 c->btree_debug, &btree_deadlock_ops);
959
960 debugfs_create_file("write_points", 0400, c->fs_debug_dir,
961 c->btree_debug, &write_points_ops);
962
963 bch2_fs_async_obj_debugfs_init(c);
964
965 c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
966 if (IS_ERR_OR_NULL(c->btree_debug_dir))
967 return;
968
969 for (bd = c->btree_debug;
970 bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
971 bd++) {
972 bd->id = bd - c->btree_debug;
973 bch2_fs_debug_btree_init(c, bd);
974 }
975 }
976
977 #endif
978
bch2_debug_exit(void)979 void bch2_debug_exit(void)
980 {
981 if (!IS_ERR_OR_NULL(bch_debug))
982 debugfs_remove_recursive(bch_debug);
983 }
984
bch2_debug_init(void)985 int __init bch2_debug_init(void)
986 {
987 bch_debug = debugfs_create_dir("bcachefs", NULL);
988 return 0;
989 }
990