1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "disk_accounting.h"
13 #include "error.h"
14 #include "progress.h"
15 #include "recovery_passes.h"
16
17 #include <linux/mm.h>
18
19 static int bch2_bucket_bitmap_set(struct bch_dev *, struct bucket_bitmap *, u64);
20
bp_to_bbpos(struct bch_backpointer bp)21 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
22 {
23 return (struct bbpos) {
24 .btree = bp.btree_id,
25 .pos = bp.pos,
26 };
27 }
28
bch2_backpointer_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)29 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
30 struct bkey_validate_context from)
31 {
32 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
33 int ret = 0;
34
35 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
36 c, backpointer_level_bad,
37 "backpointer level bad: %u >= %u",
38 bp.v->level, BTREE_MAX_DEPTH);
39
40 bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID,
41 c, backpointer_dev_bad,
42 "backpointer for BCH_SB_MEMBER_INVALID");
43 fsck_err:
44 return ret;
45 }
46
bch2_backpointer_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)47 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
48 {
49 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
50
51 struct bch_dev *ca;
52 u32 bucket_offset;
53 struct bpos bucket;
54 scoped_guard(rcu) {
55 ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
56 if (ca)
57 bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
58 }
59
60 if (ca)
61 prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset);
62 else
63 prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
64
65 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
66 prt_str(out, " data_type=");
67 bch2_prt_data_type(out, bp.v->data_type);
68 prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
69 (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
70 bp.v->bucket_len,
71 bp.v->bucket_gen);
72 bch2_bpos_to_text(out, bp.v->pos);
73 }
74
bch2_backpointer_swab(struct bkey_s k)75 void bch2_backpointer_swab(struct bkey_s k)
76 {
77 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
78
79 bp.v->bucket_len = swab32(bp.v->bucket_len);
80 bch2_bpos_swab(&bp.v->pos);
81 }
82
extent_matches_bp(struct bch_fs * c,enum btree_id btree_id,unsigned level,struct bkey_s_c k,struct bkey_s_c_backpointer bp)83 static bool extent_matches_bp(struct bch_fs *c,
84 enum btree_id btree_id, unsigned level,
85 struct bkey_s_c k,
86 struct bkey_s_c_backpointer bp)
87 {
88 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
89 const union bch_extent_entry *entry;
90 struct extent_ptr_decoded p;
91
92 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
93 struct bkey_i_backpointer bp2;
94 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2);
95
96 if (bpos_eq(bp.k->p, bp2.k.p) &&
97 !memcmp(bp.v, &bp2.v, sizeof(bp2.v)))
98 return true;
99 }
100
101 return false;
102 }
103
backpointer_mod_err(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * new_bp,struct bkey_s_c found_bp,bool insert)104 static noinline int backpointer_mod_err(struct btree_trans *trans,
105 struct bkey_s_c orig_k,
106 struct bkey_i_backpointer *new_bp,
107 struct bkey_s_c found_bp,
108 bool insert)
109 {
110 struct bch_fs *c = trans->c;
111 struct printbuf buf = PRINTBUF;
112 bool will_check = c->recovery.passes_to_run &
113 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
114 int ret = 0;
115
116 if (insert) {
117 prt_printf(&buf, "existing backpointer found when inserting ");
118 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
119 prt_newline(&buf);
120 printbuf_indent_add(&buf, 2);
121
122 prt_printf(&buf, "found ");
123 bch2_bkey_val_to_text(&buf, c, found_bp);
124 prt_newline(&buf);
125
126 prt_printf(&buf, "for ");
127 bch2_bkey_val_to_text(&buf, c, orig_k);
128 } else if (!will_check) {
129 prt_printf(&buf, "backpointer not found when deleting\n");
130 printbuf_indent_add(&buf, 2);
131
132 prt_printf(&buf, "searching for ");
133 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
134 prt_newline(&buf);
135
136 prt_printf(&buf, "got ");
137 bch2_bkey_val_to_text(&buf, c, found_bp);
138 prt_newline(&buf);
139
140 prt_printf(&buf, "for ");
141 bch2_bkey_val_to_text(&buf, c, orig_k);
142 }
143
144 if (!will_check && __bch2_inconsistent_error(c, &buf))
145 ret = bch_err_throw(c, erofs_unfixed_errors);
146
147 bch_err(c, "%s", buf.buf);
148 printbuf_exit(&buf);
149 return ret;
150 }
151
bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * bp,bool insert)152 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
153 struct bkey_s_c orig_k,
154 struct bkey_i_backpointer *bp,
155 bool insert)
156 {
157 struct btree_iter bp_iter;
158 struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
159 bp->k.p,
160 BTREE_ITER_intent|
161 BTREE_ITER_slots|
162 BTREE_ITER_with_updates);
163 int ret = bkey_err(k);
164 if (ret)
165 return ret;
166
167 if (insert
168 ? k.k->type
169 : (k.k->type != KEY_TYPE_backpointer ||
170 memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) {
171 ret = backpointer_mod_err(trans, orig_k, bp, k, insert);
172 if (ret)
173 goto err;
174 }
175
176 if (!insert) {
177 bp->k.type = KEY_TYPE_deleted;
178 set_bkey_val_u64s(&bp->k, 0);
179 }
180
181 ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0);
182 err:
183 bch2_trans_iter_exit(trans, &bp_iter);
184 return ret;
185 }
186
bch2_backpointer_del(struct btree_trans * trans,struct bpos pos)187 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
188 {
189 return (!static_branch_unlikely(&bch2_backpointers_no_use_write_buffer)
190 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
191 : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?:
192 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
193 }
194
bch2_backpointers_maybe_flush(struct btree_trans * trans,struct bkey_s_c visiting_k,struct bkey_buf * last_flushed)195 static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
196 struct bkey_s_c visiting_k,
197 struct bkey_buf *last_flushed)
198 {
199 return !static_branch_unlikely(&bch2_backpointers_no_use_write_buffer)
200 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed)
201 : 0;
202 }
203
backpointer_target_not_found(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct bkey_s_c target_k,struct bkey_buf * last_flushed,bool commit)204 static int backpointer_target_not_found(struct btree_trans *trans,
205 struct bkey_s_c_backpointer bp,
206 struct bkey_s_c target_k,
207 struct bkey_buf *last_flushed,
208 bool commit)
209 {
210 struct bch_fs *c = trans->c;
211 struct printbuf buf = PRINTBUF;
212 int ret = 0;
213
214 /*
215 * If we're using the btree write buffer, the backpointer we were
216 * looking at may have already been deleted - failure to find what it
217 * pointed to is not an error:
218 */
219 ret = last_flushed
220 ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed)
221 : 0;
222 if (ret)
223 return ret;
224
225 prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
226 bp.v->level ? "btree node" : "extent");
227 bch2_bkey_val_to_text(&buf, c, bp.s_c);
228
229 prt_newline(&buf);
230 bch2_bkey_val_to_text(&buf, c, target_k);
231
232 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
233 const union bch_extent_entry *entry;
234 struct extent_ptr_decoded p;
235 bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
236 if (p.ptr.dev == bp.k->p.inode) {
237 prt_newline(&buf);
238 struct bkey_i_backpointer bp2;
239 bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
240 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
241 }
242
243 if (fsck_err(trans, backpointer_to_missing_ptr,
244 "%s", buf.buf)) {
245 ret = bch2_backpointer_del(trans, bp.k->p);
246 if (ret || !commit)
247 goto out;
248
249 /*
250 * Normally, on transaction commit from inside a transaction,
251 * we'll return -BCH_ERR_transaction_restart_nested, since a
252 * transaction commit invalidates pointers given out by peek().
253 *
254 * However, since we're updating a write buffer btree, if we
255 * return a transaction restart and loop we won't see that the
256 * backpointer has been deleted without an additional write
257 * buffer flush - and those are expensive.
258 *
259 * So we're relying on the caller immediately advancing to the
260 * next backpointer and starting a new transaction immediately
261 * after backpointer_get_key() returns NULL:
262 */
263 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
264 }
265 out:
266 fsck_err:
267 printbuf_exit(&buf);
268 return ret;
269 }
270
__bch2_backpointer_get_node(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,struct bkey_buf * last_flushed,bool commit)271 static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans,
272 struct bkey_s_c_backpointer bp,
273 struct btree_iter *iter,
274 struct bkey_buf *last_flushed,
275 bool commit)
276 {
277 struct bch_fs *c = trans->c;
278
279 BUG_ON(!bp.v->level);
280
281 bch2_trans_node_iter_init(trans, iter,
282 bp.v->btree_id,
283 bp.v->pos,
284 0,
285 bp.v->level - 1,
286 0);
287 struct btree *b = bch2_btree_iter_peek_node(trans, iter);
288 if (IS_ERR_OR_NULL(b))
289 goto err;
290
291 BUG_ON(b->c.level != bp.v->level - 1);
292
293 if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
294 bkey_i_to_s_c(&b->key), bp))
295 return b;
296
297 if (btree_node_will_make_reachable(b)) {
298 b = ERR_PTR(bch_err_throw(c, backpointer_to_overwritten_btree_node));
299 } else {
300 int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key),
301 last_flushed, commit);
302 b = ret ? ERR_PTR(ret) : NULL;
303 }
304 err:
305 bch2_trans_iter_exit(trans, iter);
306 return b;
307 }
308
__bch2_backpointer_get_key(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,unsigned iter_flags,struct bkey_buf * last_flushed,bool commit)309 static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans,
310 struct bkey_s_c_backpointer bp,
311 struct btree_iter *iter,
312 unsigned iter_flags,
313 struct bkey_buf *last_flushed,
314 bool commit)
315 {
316 struct bch_fs *c = trans->c;
317
318 if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
319 return bkey_s_c_null;
320
321 bch2_trans_node_iter_init(trans, iter,
322 bp.v->btree_id,
323 bp.v->pos,
324 0,
325 bp.v->level,
326 iter_flags);
327 struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
328 if (bkey_err(k)) {
329 bch2_trans_iter_exit(trans, iter);
330 return k;
331 }
332
333 /*
334 * peek_slot() doesn't normally return NULL - except when we ask for a
335 * key at a btree level that doesn't exist.
336 *
337 * We may want to revisit this and change peek_slot():
338 */
339 if (!k.k) {
340 bkey_init(&iter->k);
341 iter->k.p = bp.v->pos;
342 k.k = &iter->k;
343 }
344
345 if (k.k &&
346 extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
347 return k;
348
349 bch2_trans_iter_exit(trans, iter);
350
351 if (!bp.v->level) {
352 int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit);
353 return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
354 } else {
355 struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit);
356 if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
357 return bkey_s_c_null;
358 if (IS_ERR_OR_NULL(b))
359 return ((struct bkey_s_c) { .k = ERR_CAST(b) });
360
361 return bkey_i_to_s_c(&b->key);
362 }
363 }
364
bch2_backpointer_get_node(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,struct bkey_buf * last_flushed)365 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
366 struct bkey_s_c_backpointer bp,
367 struct btree_iter *iter,
368 struct bkey_buf *last_flushed)
369 {
370 return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true);
371 }
372
bch2_backpointer_get_key(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,unsigned iter_flags,struct bkey_buf * last_flushed)373 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
374 struct bkey_s_c_backpointer bp,
375 struct btree_iter *iter,
376 unsigned iter_flags,
377 struct bkey_buf *last_flushed)
378 {
379 return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true);
380 }
381
bch2_check_backpointer_has_valid_bucket(struct btree_trans * trans,struct bkey_s_c k,struct bkey_buf * last_flushed)382 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
383 struct bkey_buf *last_flushed)
384 {
385 if (k.k->type != KEY_TYPE_backpointer)
386 return 0;
387
388 struct bch_fs *c = trans->c;
389 struct btree_iter alloc_iter = {};
390 struct bkey_s_c alloc_k;
391 struct printbuf buf = PRINTBUF;
392 int ret = 0;
393
394 struct bpos bucket;
395 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) {
396 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
397 if (ret)
398 goto out;
399
400 if (fsck_err(trans, backpointer_to_missing_device,
401 "backpointer for missing device:\n%s",
402 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
403 ret = bch2_backpointer_del(trans, k.k->p);
404 goto out;
405 }
406
407 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0);
408 ret = bkey_err(alloc_k);
409 if (ret)
410 goto out;
411
412 if (alloc_k.k->type != KEY_TYPE_alloc_v4) {
413 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
414 if (ret)
415 goto out;
416
417 if (fsck_err(trans, backpointer_to_missing_alloc,
418 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
419 alloc_iter.pos.inode, alloc_iter.pos.offset,
420 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
421 ret = bch2_backpointer_del(trans, k.k->p);
422 }
423 out:
424 fsck_err:
425 bch2_trans_iter_exit(trans, &alloc_iter);
426 printbuf_exit(&buf);
427 return ret;
428 }
429
430 /* verify that every backpointer has a corresponding alloc key */
bch2_check_btree_backpointers(struct bch_fs * c)431 int bch2_check_btree_backpointers(struct bch_fs *c)
432 {
433 struct bkey_buf last_flushed;
434 bch2_bkey_buf_init(&last_flushed);
435 bkey_init(&last_flushed.k->k);
436
437 int ret = bch2_trans_run(c,
438 for_each_btree_key_commit(trans, iter,
439 BTREE_ID_backpointers, POS_MIN, 0, k,
440 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
441 bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)));
442
443 bch2_bkey_buf_exit(&last_flushed, c);
444 bch_err_fn(c, ret);
445 return ret;
446 }
447
448 struct extents_to_bp_state {
449 struct bpos bp_start;
450 struct bpos bp_end;
451 struct bkey_buf last_flushed;
452 };
453
drop_dev_and_update(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,unsigned dev)454 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
455 struct bkey_s_c extent, unsigned dev)
456 {
457 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
458 int ret = PTR_ERR_OR_ZERO(n);
459 if (ret)
460 return ret;
461
462 bch2_bkey_drop_device(bkey_i_to_s(n), dev);
463 return bch2_btree_insert_trans(trans, btree, n, 0);
464 }
465
check_extent_checksum(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,enum btree_id o_btree,struct bkey_s_c extent2,unsigned dev)466 static int check_extent_checksum(struct btree_trans *trans,
467 enum btree_id btree, struct bkey_s_c extent,
468 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
469 {
470 struct bch_fs *c = trans->c;
471 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
472 const union bch_extent_entry *entry;
473 struct extent_ptr_decoded p;
474 struct printbuf buf = PRINTBUF;
475 void *data_buf = NULL;
476 struct bio *bio = NULL;
477 size_t bytes;
478 int ret = 0;
479
480 if (bkey_is_btree_ptr(extent.k))
481 return false;
482
483 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
484 if (p.ptr.dev == dev)
485 goto found;
486 BUG();
487 found:
488 if (!p.crc.csum_type)
489 return false;
490
491 bytes = p.crc.compressed_size << 9;
492
493 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ,
494 BCH_DEV_READ_REF_check_extent_checksums);
495 if (!ca)
496 return false;
497
498 data_buf = kvmalloc(bytes, GFP_KERNEL);
499 if (!data_buf) {
500 ret = -ENOMEM;
501 goto err;
502 }
503
504 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
505 bio->bi_iter.bi_sector = p.ptr.offset;
506 bch2_bio_map(bio, data_buf, bytes);
507 ret = submit_bio_wait(bio);
508 if (ret)
509 goto err;
510
511 prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
512 bch2_btree_id_to_text(&buf, btree);
513 prt_str(&buf, " ");
514 bch2_bkey_val_to_text(&buf, c, extent);
515 prt_newline(&buf);
516 bch2_btree_id_to_text(&buf, o_btree);
517 prt_str(&buf, " ");
518 bch2_bkey_val_to_text(&buf, c, extent2);
519
520 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
521 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
522 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
523 trans, dup_backpointer_to_bad_csum_extent,
524 "%s", buf.buf))
525 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
526 fsck_err:
527 err:
528 if (bio)
529 bio_put(bio);
530 kvfree(data_buf);
531 enumerated_ref_put(&ca->io_ref[READ],
532 BCH_DEV_READ_REF_check_extent_checksums);
533 printbuf_exit(&buf);
534 return ret;
535 }
536
check_bp_exists(struct btree_trans * trans,struct extents_to_bp_state * s,struct bkey_i_backpointer * bp,struct bkey_s_c orig_k)537 static int check_bp_exists(struct btree_trans *trans,
538 struct extents_to_bp_state *s,
539 struct bkey_i_backpointer *bp,
540 struct bkey_s_c orig_k)
541 {
542 struct bch_fs *c = trans->c;
543 struct btree_iter other_extent_iter = {};
544 struct printbuf buf = PRINTBUF;
545
546 if (bpos_lt(bp->k.p, s->bp_start) ||
547 bpos_gt(bp->k.p, s->bp_end))
548 return 0;
549
550 struct btree_iter bp_iter;
551 struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0);
552 int ret = bkey_err(bp_k);
553 if (ret)
554 goto err;
555
556 if (bp_k.k->type != KEY_TYPE_backpointer ||
557 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) {
558 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
559 if (ret)
560 goto err;
561
562 goto check_existing_bp;
563 }
564 out:
565 err:
566 fsck_err:
567 bch2_trans_iter_exit(trans, &other_extent_iter);
568 bch2_trans_iter_exit(trans, &bp_iter);
569 printbuf_exit(&buf);
570 return ret;
571 check_existing_bp:
572 /* Do we have a backpointer for a different extent? */
573 if (bp_k.k->type != KEY_TYPE_backpointer)
574 goto missing;
575
576 struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
577
578 struct bkey_s_c other_extent =
579 __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false);
580 ret = bkey_err(other_extent);
581 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
582 ret = 0;
583 if (ret)
584 goto err;
585
586 if (!other_extent.k)
587 goto missing;
588
589 rcu_read_lock();
590 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
591 if (ca) {
592 struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
593 bkey_for_each_ptr(other_extent_ptrs, ptr)
594 if (ptr->dev == bp->k.p.inode &&
595 dev_ptr_stale_rcu(ca, ptr)) {
596 rcu_read_unlock();
597 ret = drop_dev_and_update(trans, other_bp.v->btree_id,
598 other_extent, bp->k.p.inode);
599 if (ret)
600 goto err;
601 goto out;
602 }
603 }
604 rcu_read_unlock();
605
606 if (bch2_extents_match(orig_k, other_extent)) {
607 printbuf_reset(&buf);
608 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
609 bch2_bkey_val_to_text(&buf, c, orig_k);
610 prt_newline(&buf);
611 bch2_bkey_val_to_text(&buf, c, other_extent);
612 bch_err(c, "%s", buf.buf);
613
614 if (other_extent.k->size <= orig_k.k->size) {
615 ret = drop_dev_and_update(trans, other_bp.v->btree_id,
616 other_extent, bp->k.p.inode);
617 if (ret)
618 goto err;
619 goto out;
620 } else {
621 ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode);
622 if (ret)
623 goto err;
624 goto missing;
625 }
626 }
627
628 ret = check_extent_checksum(trans,
629 other_bp.v->btree_id, other_extent,
630 bp->v.btree_id, orig_k,
631 bp->k.p.inode);
632 if (ret < 0)
633 goto err;
634 if (ret) {
635 ret = 0;
636 goto missing;
637 }
638
639 ret = check_extent_checksum(trans, bp->v.btree_id, orig_k,
640 other_bp.v->btree_id, other_extent, bp->k.p.inode);
641 if (ret < 0)
642 goto err;
643 if (ret) {
644 ret = 0;
645 goto out;
646 }
647
648 printbuf_reset(&buf);
649 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
650 bch2_bkey_val_to_text(&buf, c, orig_k);
651 prt_newline(&buf);
652 bch2_bkey_val_to_text(&buf, c, other_extent);
653 bch_err(c, "%s", buf.buf);
654 ret = bch_err_throw(c, fsck_repair_unimplemented);
655 goto err;
656 missing:
657 printbuf_reset(&buf);
658 prt_str(&buf, "missing backpointer\nfor: ");
659 bch2_bkey_val_to_text(&buf, c, orig_k);
660 prt_printf(&buf, "\nwant: ");
661 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
662 prt_printf(&buf, "\ngot: ");
663 bch2_bkey_val_to_text(&buf, c, bp_k);
664
665 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
666 ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true);
667
668 goto out;
669 }
670
check_extent_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree,unsigned level,struct bkey_s_c k)671 static int check_extent_to_backpointers(struct btree_trans *trans,
672 struct extents_to_bp_state *s,
673 enum btree_id btree, unsigned level,
674 struct bkey_s_c k)
675 {
676 struct bch_fs *c = trans->c;
677 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
678 const union bch_extent_entry *entry;
679 struct extent_ptr_decoded p;
680
681 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
682 if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
683 continue;
684
685 bool empty;
686 {
687 /* scoped_guard() is a loop, so it breaks continue */
688 guard(rcu)();
689 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
690 if (!ca)
691 continue;
692
693 if (p.ptr.cached && dev_ptr_stale_rcu(ca, &p.ptr))
694 continue;
695
696 u64 b = PTR_BUCKET_NR(ca, &p.ptr);
697 if (!bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b))
698 continue;
699
700 empty = bch2_bucket_bitmap_test(&ca->bucket_backpointer_empty, b);
701 }
702
703 struct bkey_i_backpointer bp;
704 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
705
706 int ret = !empty
707 ? check_bp_exists(trans, s, &bp, k)
708 : bch2_bucket_backpointer_mod(trans, k, &bp, true);
709 if (ret)
710 return ret;
711 }
712
713 return 0;
714 }
715
check_btree_root_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree_id,int * level)716 static int check_btree_root_to_backpointers(struct btree_trans *trans,
717 struct extents_to_bp_state *s,
718 enum btree_id btree_id,
719 int *level)
720 {
721 struct bch_fs *c = trans->c;
722 struct btree_iter iter;
723 struct btree *b;
724 struct bkey_s_c k;
725 int ret;
726 retry:
727 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
728 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
729 b = bch2_btree_iter_peek_node(trans, &iter);
730 ret = PTR_ERR_OR_ZERO(b);
731 if (ret)
732 goto err;
733
734 if (b != btree_node_root(c, b)) {
735 bch2_trans_iter_exit(trans, &iter);
736 goto retry;
737 }
738
739 *level = b->c.level;
740
741 k = bkey_i_to_s_c(&b->key);
742 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
743 err:
744 bch2_trans_iter_exit(trans, &iter);
745 return ret;
746 }
747
mem_may_pin_bytes(struct bch_fs * c)748 static u64 mem_may_pin_bytes(struct bch_fs *c)
749 {
750 struct sysinfo i;
751 si_meminfo(&i);
752
753 u64 mem_bytes = i.totalram * i.mem_unit;
754 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
755 }
756
btree_nodes_fit_in_ram(struct bch_fs * c)757 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
758 {
759 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
760 }
761
bch2_get_btree_in_memory_pos(struct btree_trans * trans,u64 btree_leaf_mask,u64 btree_interior_mask,struct bbpos start,struct bbpos * end)762 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
763 u64 btree_leaf_mask,
764 u64 btree_interior_mask,
765 struct bbpos start, struct bbpos *end)
766 {
767 struct bch_fs *c = trans->c;
768 s64 mem_may_pin = mem_may_pin_bytes(c);
769 int ret = 0;
770
771 bch2_btree_cache_unpin(c);
772
773 btree_interior_mask |= btree_leaf_mask;
774
775 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask;
776 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask;
777 c->btree_cache.pinned_nodes_start = start;
778 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX;
779
780 for (enum btree_id btree = start.btree;
781 btree < BTREE_ID_NR && !ret;
782 btree++) {
783 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
784
785 if (!(BIT_ULL(btree) & btree_leaf_mask) &&
786 !(BIT_ULL(btree) & btree_interior_mask))
787 continue;
788
789 ret = __for_each_btree_node(trans, iter, btree,
790 btree == start.btree ? start.pos : POS_MIN,
791 0, depth, BTREE_ITER_prefetch, b, ({
792 mem_may_pin -= btree_buf_bytes(b);
793 if (mem_may_pin <= 0) {
794 c->btree_cache.pinned_nodes_end = *end =
795 BBPOS(btree, b->key.k.p);
796 break;
797 }
798 bch2_node_pin(c, b);
799 0;
800 }));
801 }
802
803 return ret;
804 }
805
bch2_fs_going_ro(struct bch_fs * c)806 static inline int bch2_fs_going_ro(struct bch_fs *c)
807 {
808 return test_bit(BCH_FS_going_ro, &c->flags)
809 ? -EROFS
810 : 0;
811 }
812
bch2_check_extents_to_backpointers_pass(struct btree_trans * trans,struct extents_to_bp_state * s)813 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
814 struct extents_to_bp_state *s)
815 {
816 struct bch_fs *c = trans->c;
817 struct progress_indicator_state progress;
818 int ret = 0;
819
820 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
821
822 for (enum btree_id btree_id = 0;
823 btree_id < btree_id_nr_alive(c);
824 btree_id++) {
825 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
826
827 ret = commit_do(trans, NULL, NULL,
828 BCH_TRANS_COMMIT_no_enospc,
829 check_btree_root_to_backpointers(trans, s, btree_id, &level));
830 if (ret)
831 return ret;
832
833 while (level >= depth) {
834 struct btree_iter iter;
835 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level,
836 BTREE_ITER_prefetch);
837
838 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
839 bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
840 bch2_fs_going_ro(c) ?:
841 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
842 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
843 }));
844 if (ret)
845 return ret;
846
847 --level;
848 }
849 }
850
851 return 0;
852 }
853
854 enum alloc_sector_counter {
855 ALLOC_dirty,
856 ALLOC_cached,
857 ALLOC_stripe,
858 ALLOC_SECTORS_NR
859 };
860
data_type_to_alloc_counter(enum bch_data_type t)861 static int data_type_to_alloc_counter(enum bch_data_type t)
862 {
863 switch (t) {
864 case BCH_DATA_btree:
865 case BCH_DATA_user:
866 return ALLOC_dirty;
867 case BCH_DATA_cached:
868 return ALLOC_cached;
869 case BCH_DATA_stripe:
870 case BCH_DATA_parity:
871 return ALLOC_stripe;
872 default:
873 return -1;
874 }
875 }
876
877 static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
878
check_bucket_backpointer_mismatch(struct btree_trans * trans,struct bkey_s_c alloc_k,bool * had_mismatch,struct bkey_buf * last_flushed)879 static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
880 bool *had_mismatch,
881 struct bkey_buf *last_flushed)
882 {
883 struct bch_fs *c = trans->c;
884 struct bch_alloc_v4 a_convert;
885 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
886 bool need_commit = false;
887
888 *had_mismatch = false;
889
890 if (a->data_type == BCH_DATA_sb ||
891 a->data_type == BCH_DATA_journal ||
892 a->data_type == BCH_DATA_parity)
893 return 0;
894
895 u32 sectors[ALLOC_SECTORS_NR];
896 memset(sectors, 0, sizeof(sectors));
897
898 struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
899 if (!ca)
900 return 0;
901
902 struct btree_iter iter;
903 struct bkey_s_c bp_k;
904 int ret = 0;
905 for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
906 bucket_pos_to_bp_start(ca, alloc_k.k->p),
907 bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
908 if (bp_k.k->type != KEY_TYPE_backpointer)
909 continue;
910
911 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
912
913 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
914 (bp.v->bucket_gen != a->gen ||
915 bp.v->pad)) {
916 ret = bch2_backpointer_del(trans, bp_k.k->p);
917 if (ret)
918 break;
919
920 need_commit = true;
921 continue;
922 }
923
924 if (bp.v->bucket_gen != a->gen)
925 continue;
926
927 int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
928 if (alloc_counter < 0)
929 continue;
930
931 sectors[alloc_counter] += bp.v->bucket_len;
932 };
933 bch2_trans_iter_exit(trans, &iter);
934 if (ret)
935 goto err;
936
937 if (need_commit) {
938 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
939 if (ret)
940 goto err;
941 }
942
943 if (sectors[ALLOC_dirty] != a->dirty_sectors ||
944 sectors[ALLOC_cached] != a->cached_sectors ||
945 sectors[ALLOC_stripe] != a->stripe_sectors) {
946 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
947 ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
948 if (ret)
949 goto err;
950 }
951
952 if (sectors[ALLOC_dirty] > a->dirty_sectors ||
953 sectors[ALLOC_cached] > a->cached_sectors ||
954 sectors[ALLOC_stripe] > a->stripe_sectors) {
955 ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
956 bch_err_throw(c, transaction_restart_nested);
957 goto err;
958 }
959
960 bool empty = (sectors[ALLOC_dirty] +
961 sectors[ALLOC_stripe] +
962 sectors[ALLOC_cached]) == 0;
963
964 ret = bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_mismatch,
965 alloc_k.k->p.offset) ?:
966 (empty
967 ? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
968 alloc_k.k->p.offset)
969 : 0);
970
971 *had_mismatch = true;
972 }
973 err:
974 bch2_dev_put(ca);
975 return ret;
976 }
977
backpointer_node_has_missing(struct bch_fs * c,struct bkey_s_c k)978 static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
979 {
980 switch (k.k->type) {
981 case KEY_TYPE_btree_ptr_v2: {
982 bool ret = false;
983
984 guard(rcu)();
985 struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
986 while (pos.inode <= k.k->p.inode) {
987 if (pos.inode >= c->sb.nr_devices)
988 break;
989
990 struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
991 if (!ca)
992 goto next;
993
994 struct bpos bucket = bp_pos_to_bucket(ca, pos);
995 u64 next = ca->mi.nbuckets;
996
997 unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets);
998 if (bitmap)
999 next = min_t(u64, next,
1000 find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset));
1001
1002 bucket.offset = next;
1003 if (bucket.offset == ca->mi.nbuckets)
1004 goto next;
1005
1006 ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
1007 if (ret)
1008 break;
1009 next:
1010 pos = SPOS(pos.inode + 1, 0, 0);
1011 }
1012
1013 return ret;
1014 }
1015 case KEY_TYPE_btree_ptr:
1016 return true;
1017 default:
1018 return false;
1019 }
1020 }
1021
btree_node_get_and_pin(struct btree_trans * trans,struct bkey_i * k,enum btree_id btree,unsigned level)1022 static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
1023 enum btree_id btree, unsigned level)
1024 {
1025 struct btree_iter iter;
1026 bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
1027 struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
1028 int ret = PTR_ERR_OR_ZERO(b);
1029 if (ret)
1030 goto err;
1031
1032 if (b)
1033 bch2_node_pin(trans->c, b);
1034 err:
1035 bch2_trans_iter_exit(trans, &iter);
1036 return ret;
1037 }
1038
bch2_pin_backpointer_nodes_with_missing(struct btree_trans * trans,struct bpos start,struct bpos * end)1039 static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
1040 struct bpos start, struct bpos *end)
1041 {
1042 struct bch_fs *c = trans->c;
1043 int ret = 0;
1044
1045 struct bkey_buf tmp;
1046 bch2_bkey_buf_init(&tmp);
1047
1048 bch2_btree_cache_unpin(c);
1049
1050 *end = SPOS_MAX;
1051
1052 s64 mem_may_pin = mem_may_pin_bytes(c);
1053 struct btree_iter iter;
1054 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
1055 0, 1, BTREE_ITER_prefetch);
1056 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
1057 if (!backpointer_node_has_missing(c, k))
1058 continue;
1059
1060 mem_may_pin -= c->opts.btree_node_size;
1061 if (mem_may_pin <= 0)
1062 break;
1063
1064 bch2_bkey_buf_reassemble(&tmp, c, k);
1065 struct btree_path *path = btree_iter_path(trans, &iter);
1066
1067 BUG_ON(path->level != 1);
1068
1069 bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
1070 }));
1071 if (ret)
1072 return ret;
1073
1074 struct bpos pinned = SPOS_MAX;
1075 mem_may_pin = mem_may_pin_bytes(c);
1076 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
1077 0, 1, BTREE_ITER_prefetch);
1078 ret = for_each_btree_key_continue(trans, iter, 0, k, ({
1079 if (!backpointer_node_has_missing(c, k))
1080 continue;
1081
1082 mem_may_pin -= c->opts.btree_node_size;
1083 if (mem_may_pin <= 0) {
1084 *end = pinned;
1085 break;
1086 }
1087
1088 bch2_bkey_buf_reassemble(&tmp, c, k);
1089 struct btree_path *path = btree_iter_path(trans, &iter);
1090
1091 BUG_ON(path->level != 1);
1092
1093 int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
1094
1095 if (!ret2)
1096 pinned = tmp.k->k.p;
1097
1098 ret;
1099 }));
1100 if (ret)
1101 return ret;
1102
1103 return ret;
1104 }
1105
bch2_check_extents_to_backpointers(struct bch_fs * c)1106 int bch2_check_extents_to_backpointers(struct bch_fs *c)
1107 {
1108 int ret = 0;
1109
1110 struct btree_trans *trans = bch2_trans_get(c);
1111 struct extents_to_bp_state s = { .bp_start = POS_MIN };
1112
1113 bch2_bkey_buf_init(&s.last_flushed);
1114 bkey_init(&s.last_flushed.k->k);
1115
1116 ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
1117 POS_MIN, BTREE_ITER_prefetch, k, ({
1118 bool had_mismatch;
1119 bch2_fs_going_ro(c) ?:
1120 check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
1121 }));
1122 if (ret)
1123 goto err;
1124
1125 u64 nr_buckets = 0, nr_mismatches = 0;
1126 for_each_member_device(c, ca) {
1127 nr_buckets += ca->mi.nbuckets;
1128 nr_mismatches += ca->bucket_backpointer_mismatch.nr;
1129 }
1130
1131 if (!nr_mismatches)
1132 goto err;
1133
1134 bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
1135 nr_mismatches, nr_buckets);
1136
1137 while (1) {
1138 ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
1139 if (ret)
1140 break;
1141
1142 if ( bpos_eq(s.bp_start, POS_MIN) &&
1143 !bpos_eq(s.bp_end, SPOS_MAX))
1144 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
1145 __func__, btree_nodes_fit_in_ram(c));
1146
1147 if (!bpos_eq(s.bp_start, POS_MIN) ||
1148 !bpos_eq(s.bp_end, SPOS_MAX)) {
1149 struct printbuf buf = PRINTBUF;
1150
1151 prt_str(&buf, "check_extents_to_backpointers(): ");
1152 bch2_bpos_to_text(&buf, s.bp_start);
1153 prt_str(&buf, "-");
1154 bch2_bpos_to_text(&buf, s.bp_end);
1155
1156 bch_verbose(c, "%s", buf.buf);
1157 printbuf_exit(&buf);
1158 }
1159
1160 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
1161 if (ret || bpos_eq(s.bp_end, SPOS_MAX))
1162 break;
1163
1164 s.bp_start = bpos_successor(s.bp_end);
1165 }
1166
1167 for_each_member_device(c, ca) {
1168 bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
1169 bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
1170 }
1171 err:
1172 bch2_trans_put(trans);
1173 bch2_bkey_buf_exit(&s.last_flushed, c);
1174 bch2_btree_cache_unpin(c);
1175
1176 bch_err_fn(c, ret);
1177 return ret;
1178 }
1179
check_bucket_backpointer_pos_mismatch(struct btree_trans * trans,struct bpos bucket,bool * had_mismatch,struct bkey_buf * last_flushed)1180 static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
1181 struct bpos bucket,
1182 bool *had_mismatch,
1183 struct bkey_buf *last_flushed)
1184 {
1185 struct btree_iter alloc_iter;
1186 struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
1187 BTREE_ID_alloc, bucket,
1188 BTREE_ITER_cached);
1189 int ret = bkey_err(k);
1190 if (ret)
1191 return ret;
1192
1193 ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
1194 bch2_trans_iter_exit(trans, &alloc_iter);
1195 return ret;
1196 }
1197
bch2_check_bucket_backpointer_mismatch(struct btree_trans * trans,struct bch_dev * ca,u64 bucket,bool copygc,struct bkey_buf * last_flushed)1198 int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
1199 struct bch_dev *ca, u64 bucket,
1200 bool copygc,
1201 struct bkey_buf *last_flushed)
1202 {
1203 struct bch_fs *c = trans->c;
1204 bool had_mismatch;
1205 int ret = lockrestart_do(trans,
1206 check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
1207 &had_mismatch, last_flushed));
1208 if (ret || !had_mismatch)
1209 return ret;
1210
1211 u64 nr = ca->bucket_backpointer_mismatch.nr;
1212 u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
1213
1214 struct printbuf buf = PRINTBUF;
1215 __bch2_log_msg_start(ca->name, &buf);
1216
1217 prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
1218 bucket, nr, ca->mi.nbuckets);
1219
1220 bch2_run_explicit_recovery_pass(c, &buf,
1221 BCH_RECOVERY_PASS_check_extents_to_backpointers,
1222 nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
1223
1224 bch2_print_str(c, KERN_ERR, buf.buf);
1225 printbuf_exit(&buf);
1226 return 0;
1227 }
1228
1229 /* backpointers -> extents */
1230
check_one_backpointer(struct btree_trans * trans,struct bbpos start,struct bbpos end,struct bkey_s_c bp_k,struct bkey_buf * last_flushed)1231 static int check_one_backpointer(struct btree_trans *trans,
1232 struct bbpos start,
1233 struct bbpos end,
1234 struct bkey_s_c bp_k,
1235 struct bkey_buf *last_flushed)
1236 {
1237 if (bp_k.k->type != KEY_TYPE_backpointer)
1238 return 0;
1239
1240 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
1241 struct bbpos pos = bp_to_bbpos(*bp.v);
1242
1243 if (bbpos_cmp(pos, start) < 0 ||
1244 bbpos_cmp(pos, end) > 0)
1245 return 0;
1246
1247 struct btree_iter iter;
1248 struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
1249 int ret = bkey_err(k);
1250 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
1251 return 0;
1252 if (ret)
1253 return ret;
1254
1255 bch2_trans_iter_exit(trans, &iter);
1256 return ret;
1257 }
1258
check_bucket_backpointers_to_extents(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket)1259 static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
1260 struct bch_dev *ca, struct bpos bucket)
1261 {
1262 u32 restart_count = trans->restart_count;
1263 struct bkey_buf last_flushed;
1264 bch2_bkey_buf_init(&last_flushed);
1265 bkey_init(&last_flushed.k->k);
1266
1267 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
1268 bucket_pos_to_bp_start(ca, bucket),
1269 bucket_pos_to_bp_end(ca, bucket),
1270 0, k,
1271 check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
1272 );
1273
1274 bch2_bkey_buf_exit(&last_flushed, trans->c);
1275 return ret ?: trans_was_restarted(trans, restart_count);
1276 }
1277
bch2_check_backpointers_to_extents_pass(struct btree_trans * trans,struct bbpos start,struct bbpos end)1278 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
1279 struct bbpos start,
1280 struct bbpos end)
1281 {
1282 struct bch_fs *c = trans->c;
1283 struct bkey_buf last_flushed;
1284 struct progress_indicator_state progress;
1285
1286 bch2_bkey_buf_init(&last_flushed);
1287 bkey_init(&last_flushed.k->k);
1288 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
1289
1290 int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
1291 POS_MIN, BTREE_ITER_prefetch, k, ({
1292 bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
1293 check_one_backpointer(trans, start, end, k, &last_flushed);
1294 }));
1295
1296 bch2_bkey_buf_exit(&last_flushed, c);
1297 return ret;
1298 }
1299
bch2_check_backpointers_to_extents(struct bch_fs * c)1300 int bch2_check_backpointers_to_extents(struct bch_fs *c)
1301 {
1302 struct btree_trans *trans = bch2_trans_get(c);
1303 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
1304 int ret;
1305
1306 while (1) {
1307 ret = bch2_get_btree_in_memory_pos(trans,
1308 BIT_ULL(BTREE_ID_extents)|
1309 BIT_ULL(BTREE_ID_reflink),
1310 ~0,
1311 start, &end);
1312 if (ret)
1313 break;
1314
1315 if (!bbpos_cmp(start, BBPOS_MIN) &&
1316 bbpos_cmp(end, BBPOS_MAX))
1317 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
1318 __func__, btree_nodes_fit_in_ram(c));
1319
1320 if (bbpos_cmp(start, BBPOS_MIN) ||
1321 bbpos_cmp(end, BBPOS_MAX)) {
1322 struct printbuf buf = PRINTBUF;
1323
1324 prt_str(&buf, "check_backpointers_to_extents(): ");
1325 bch2_bbpos_to_text(&buf, start);
1326 prt_str(&buf, "-");
1327 bch2_bbpos_to_text(&buf, end);
1328
1329 bch_verbose(c, "%s", buf.buf);
1330 printbuf_exit(&buf);
1331 }
1332
1333 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
1334 if (ret || !bbpos_cmp(end, BBPOS_MAX))
1335 break;
1336
1337 start = bbpos_successor(end);
1338 }
1339 bch2_trans_put(trans);
1340
1341 bch2_btree_cache_unpin(c);
1342
1343 bch_err_fn(c, ret);
1344 return ret;
1345 }
1346
bch2_bucket_bitmap_set(struct bch_dev * ca,struct bucket_bitmap * b,u64 bit)1347 static int bch2_bucket_bitmap_set(struct bch_dev *ca, struct bucket_bitmap *b, u64 bit)
1348 {
1349 scoped_guard(mutex, &b->lock) {
1350 if (!b->buckets) {
1351 b->buckets = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
1352 sizeof(unsigned long), GFP_KERNEL);
1353 if (!b->buckets)
1354 return bch_err_throw(ca->fs, ENOMEM_backpointer_mismatches_bitmap);
1355 }
1356
1357 b->nr += !__test_and_set_bit(bit, b->buckets);
1358 }
1359
1360 return 0;
1361 }
1362
bch2_bucket_bitmap_resize(struct bch_dev * ca,struct bucket_bitmap * b,u64 old_size,u64 new_size)1363 int bch2_bucket_bitmap_resize(struct bch_dev *ca, struct bucket_bitmap *b,
1364 u64 old_size, u64 new_size)
1365 {
1366 scoped_guard(mutex, &b->lock) {
1367 if (!b->buckets)
1368 return 0;
1369
1370 unsigned long *n = kvcalloc(BITS_TO_LONGS(new_size),
1371 sizeof(unsigned long), GFP_KERNEL);
1372 if (!n)
1373 return bch_err_throw(ca->fs, ENOMEM_backpointer_mismatches_bitmap);
1374
1375 memcpy(n, b->buckets,
1376 BITS_TO_LONGS(min(old_size, new_size)) * sizeof(unsigned long));
1377 kvfree(b->buckets);
1378 b->buckets = n;
1379 }
1380
1381 return 0;
1382 }
1383
bch2_bucket_bitmap_free(struct bucket_bitmap * b)1384 void bch2_bucket_bitmap_free(struct bucket_bitmap *b)
1385 {
1386 mutex_lock(&b->lock);
1387 kvfree(b->buckets);
1388 b->buckets = NULL;
1389 b->nr = 0;
1390 mutex_unlock(&b->lock);
1391 }
1392