xref: /linux/fs/bcachefs/backpointers.c (revision 8b40a46966d294bc64bad0feb13d3304fde738f2)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "error.h"
13 
14 #include <linux/mm.h>
15 
16 static bool extent_matches_bp(struct bch_fs *c,
17 			      enum btree_id btree_id, unsigned level,
18 			      struct bkey_s_c k,
19 			      struct bpos bucket,
20 			      struct bch_backpointer bp)
21 {
22 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23 	const union bch_extent_entry *entry;
24 	struct extent_ptr_decoded p;
25 
26 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27 		struct bpos bucket2;
28 		struct bch_backpointer bp2;
29 
30 		if (p.ptr.cached)
31 			continue;
32 
33 		bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34 		if (bpos_eq(bucket, bucket2) &&
35 		    !memcmp(&bp, &bp2, sizeof(bp)))
36 			return true;
37 	}
38 
39 	return false;
40 }
41 
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43 			     enum bkey_invalid_flags flags,
44 			     struct printbuf *err)
45 {
46 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47 
48 	/* these will be caught by fsck */
49 	if (!bch2_dev_exists2(c, bp.k->p.inode))
50 		return 0;
51 
52 	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
53 	int ret = 0;
54 
55 	bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
56 			 c, err,
57 			 backpointer_pos_wrong,
58 			 "backpointer at wrong pos");
59 fsck_err:
60 	return ret;
61 }
62 
63 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
64 {
65 	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
66 	       bch2_btree_id_str(bp->btree_id),
67 	       bp->level,
68 	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
69 	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
70 	       bp->bucket_len);
71 	bch2_bpos_to_text(out, bp->pos);
72 }
73 
74 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
75 {
76 	if (bch2_dev_exists2(c, k.k->p.inode)) {
77 		prt_str(out, "bucket=");
78 		bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
79 		prt_str(out, " ");
80 	}
81 
82 	bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
83 }
84 
85 void bch2_backpointer_swab(struct bkey_s k)
86 {
87 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
88 
89 	bp.v->bucket_offset	= swab40(bp.v->bucket_offset);
90 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
91 	bch2_bpos_swab(&bp.v->pos);
92 }
93 
94 static noinline int backpointer_mod_err(struct btree_trans *trans,
95 					struct bch_backpointer bp,
96 					struct bkey_s_c bp_k,
97 					struct bkey_s_c orig_k,
98 					bool insert)
99 {
100 	struct bch_fs *c = trans->c;
101 	struct printbuf buf = PRINTBUF;
102 
103 	if (insert) {
104 		prt_printf(&buf, "existing backpointer found when inserting ");
105 		bch2_backpointer_to_text(&buf, &bp);
106 		prt_newline(&buf);
107 		printbuf_indent_add(&buf, 2);
108 
109 		prt_printf(&buf, "found ");
110 		bch2_bkey_val_to_text(&buf, c, bp_k);
111 		prt_newline(&buf);
112 
113 		prt_printf(&buf, "for ");
114 		bch2_bkey_val_to_text(&buf, c, orig_k);
115 
116 		bch_err(c, "%s", buf.buf);
117 	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
118 		prt_printf(&buf, "backpointer not found when deleting");
119 		prt_newline(&buf);
120 		printbuf_indent_add(&buf, 2);
121 
122 		prt_printf(&buf, "searching for ");
123 		bch2_backpointer_to_text(&buf, &bp);
124 		prt_newline(&buf);
125 
126 		prt_printf(&buf, "got ");
127 		bch2_bkey_val_to_text(&buf, c, bp_k);
128 		prt_newline(&buf);
129 
130 		prt_printf(&buf, "for ");
131 		bch2_bkey_val_to_text(&buf, c, orig_k);
132 
133 		bch_err(c, "%s", buf.buf);
134 	}
135 
136 	printbuf_exit(&buf);
137 
138 	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
139 		return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
140 	} else {
141 		return 0;
142 	}
143 }
144 
145 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
146 				struct bpos bucket,
147 				struct bch_backpointer bp,
148 				struct bkey_s_c orig_k,
149 				bool insert)
150 {
151 	struct btree_iter bp_iter;
152 	struct bkey_s_c k;
153 	struct bkey_i_backpointer *bp_k;
154 	int ret;
155 
156 	bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
157 	ret = PTR_ERR_OR_ZERO(bp_k);
158 	if (ret)
159 		return ret;
160 
161 	bkey_backpointer_init(&bp_k->k_i);
162 	bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
163 	bp_k->v = bp;
164 
165 	if (!insert) {
166 		bp_k->k.type = KEY_TYPE_deleted;
167 		set_bkey_val_u64s(&bp_k->k, 0);
168 	}
169 
170 	k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
171 			       bp_k->k.p,
172 			       BTREE_ITER_INTENT|
173 			       BTREE_ITER_SLOTS|
174 			       BTREE_ITER_WITH_UPDATES);
175 	ret = bkey_err(k);
176 	if (ret)
177 		goto err;
178 
179 	if (insert
180 	    ? k.k->type
181 	    : (k.k->type != KEY_TYPE_backpointer ||
182 	       memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
183 		ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
184 		if (ret)
185 			goto err;
186 	}
187 
188 	ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
189 err:
190 	bch2_trans_iter_exit(trans, &bp_iter);
191 	return ret;
192 }
193 
194 /*
195  * Find the next backpointer >= *bp_offset:
196  */
197 int bch2_get_next_backpointer(struct btree_trans *trans,
198 			      struct bpos bucket, int gen,
199 			      struct bpos *bp_pos,
200 			      struct bch_backpointer *bp,
201 			      unsigned iter_flags)
202 {
203 	struct bch_fs *c = trans->c;
204 	struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
205 	struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
206 	struct bkey_s_c k;
207 	int ret = 0;
208 
209 	if (bpos_ge(*bp_pos, bp_end_pos))
210 		goto done;
211 
212 	if (gen >= 0) {
213 		k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
214 				       bucket, BTREE_ITER_CACHED|iter_flags);
215 		ret = bkey_err(k);
216 		if (ret)
217 			goto out;
218 
219 		if (k.k->type != KEY_TYPE_alloc_v4 ||
220 		    bkey_s_c_to_alloc_v4(k).v->gen != gen)
221 			goto done;
222 	}
223 
224 	*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
225 
226 	for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
227 				     *bp_pos, iter_flags, k, ret) {
228 		if (bpos_ge(k.k->p, bp_end_pos))
229 			break;
230 
231 		*bp_pos = k.k->p;
232 		*bp = *bkey_s_c_to_backpointer(k).v;
233 		goto out;
234 	}
235 done:
236 	*bp_pos = SPOS_MAX;
237 out:
238 	bch2_trans_iter_exit(trans, &bp_iter);
239 	bch2_trans_iter_exit(trans, &alloc_iter);
240 	return ret;
241 }
242 
243 static void backpointer_not_found(struct btree_trans *trans,
244 				  struct bpos bp_pos,
245 				  struct bch_backpointer bp,
246 				  struct bkey_s_c k)
247 {
248 	struct bch_fs *c = trans->c;
249 	struct printbuf buf = PRINTBUF;
250 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
251 
252 	/*
253 	 * If we're using the btree write buffer, the backpointer we were
254 	 * looking at may have already been deleted - failure to find what it
255 	 * pointed to is not an error:
256 	 */
257 	if (likely(!bch2_backpointers_no_use_write_buffer))
258 		return;
259 
260 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
261 		   bp.level ? "btree node" : "extent");
262 	prt_printf(&buf, "bucket: ");
263 	bch2_bpos_to_text(&buf, bucket);
264 	prt_printf(&buf, "\n  ");
265 
266 	prt_printf(&buf, "backpointer pos: ");
267 	bch2_bpos_to_text(&buf, bp_pos);
268 	prt_printf(&buf, "\n  ");
269 
270 	bch2_backpointer_to_text(&buf, &bp);
271 	prt_printf(&buf, "\n  ");
272 	bch2_bkey_val_to_text(&buf, c, k);
273 	if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
274 		bch_err_ratelimited(c, "%s", buf.buf);
275 	else
276 		bch2_trans_inconsistent(trans, "%s", buf.buf);
277 
278 	printbuf_exit(&buf);
279 }
280 
281 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
282 					 struct btree_iter *iter,
283 					 struct bpos bp_pos,
284 					 struct bch_backpointer bp,
285 					 unsigned iter_flags)
286 {
287 	if (likely(!bp.level)) {
288 		struct bch_fs *c = trans->c;
289 		struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
290 		struct bkey_s_c k;
291 
292 		bch2_trans_node_iter_init(trans, iter,
293 					  bp.btree_id,
294 					  bp.pos,
295 					  0, 0,
296 					  iter_flags);
297 		k = bch2_btree_iter_peek_slot(iter);
298 		if (bkey_err(k)) {
299 			bch2_trans_iter_exit(trans, iter);
300 			return k;
301 		}
302 
303 		if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
304 			return k;
305 
306 		bch2_trans_iter_exit(trans, iter);
307 		backpointer_not_found(trans, bp_pos, bp, k);
308 		return bkey_s_c_null;
309 	} else {
310 		struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
311 
312 		if (IS_ERR_OR_NULL(b)) {
313 			bch2_trans_iter_exit(trans, iter);
314 			return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
315 		}
316 		return bkey_i_to_s_c(&b->key);
317 	}
318 }
319 
320 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
321 					struct btree_iter *iter,
322 					struct bpos bp_pos,
323 					struct bch_backpointer bp)
324 {
325 	struct bch_fs *c = trans->c;
326 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
327 	struct btree *b;
328 
329 	BUG_ON(!bp.level);
330 
331 	bch2_trans_node_iter_init(trans, iter,
332 				  bp.btree_id,
333 				  bp.pos,
334 				  0,
335 				  bp.level - 1,
336 				  0);
337 	b = bch2_btree_iter_peek_node(iter);
338 	if (IS_ERR_OR_NULL(b))
339 		goto err;
340 
341 	BUG_ON(b->c.level != bp.level - 1);
342 
343 	if (extent_matches_bp(c, bp.btree_id, bp.level,
344 			      bkey_i_to_s_c(&b->key),
345 			      bucket, bp))
346 		return b;
347 
348 	if (btree_node_will_make_reachable(b)) {
349 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
350 	} else {
351 		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
352 		b = NULL;
353 	}
354 err:
355 	bch2_trans_iter_exit(trans, iter);
356 	return b;
357 }
358 
359 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
360 					struct bkey_s_c k)
361 {
362 	struct bch_fs *c = trans->c;
363 	struct btree_iter alloc_iter = { NULL };
364 	struct bkey_s_c alloc_k;
365 	struct printbuf buf = PRINTBUF;
366 	int ret = 0;
367 
368 	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
369 			backpointer_to_missing_device,
370 			"backpointer for missing device:\n%s",
371 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
372 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
373 		goto out;
374 	}
375 
376 	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
377 				     bp_pos_to_bucket(c, k.k->p), 0);
378 	ret = bkey_err(alloc_k);
379 	if (ret)
380 		goto out;
381 
382 	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
383 			backpointer_to_missing_alloc,
384 			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
385 			alloc_iter.pos.inode, alloc_iter.pos.offset,
386 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
387 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
388 		goto out;
389 	}
390 out:
391 fsck_err:
392 	bch2_trans_iter_exit(trans, &alloc_iter);
393 	printbuf_exit(&buf);
394 	return ret;
395 }
396 
397 /* verify that every backpointer has a corresponding alloc key */
398 int bch2_check_btree_backpointers(struct bch_fs *c)
399 {
400 	int ret = bch2_trans_run(c,
401 		for_each_btree_key_commit(trans, iter,
402 			BTREE_ID_backpointers, POS_MIN, 0, k,
403 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
404 		  bch2_check_btree_backpointer(trans, &iter, k)));
405 	bch_err_fn(c, ret);
406 	return ret;
407 }
408 
409 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
410 {
411 	return bpos_eq(l.k->p, r.k->p) &&
412 		bkey_bytes(l.k) == bkey_bytes(r.k) &&
413 		!memcmp(l.v, r.v, bkey_val_bytes(l.k));
414 }
415 
416 struct extents_to_bp_state {
417 	struct bpos	bucket_start;
418 	struct bpos	bucket_end;
419 	struct bkey_buf last_flushed;
420 };
421 
422 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
423 			       struct bkey_s_c extent, unsigned dev)
424 {
425 	struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
426 	int ret = PTR_ERR_OR_ZERO(n);
427 	if (ret)
428 		return ret;
429 
430 	bch2_bkey_drop_device(bkey_i_to_s(n), dev);
431 	return bch2_btree_insert_trans(trans, btree, n, 0);
432 }
433 
434 static int check_extent_checksum(struct btree_trans *trans,
435 				 enum btree_id btree, struct bkey_s_c extent,
436 				 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
437 {
438 	struct bch_fs *c = trans->c;
439 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
440 	const union bch_extent_entry *entry;
441 	struct extent_ptr_decoded p;
442 	struct printbuf buf = PRINTBUF;
443 	void *data_buf = NULL;
444 	struct bio *bio = NULL;
445 	size_t bytes;
446 	int ret = 0;
447 
448 	if (bkey_is_btree_ptr(extent.k))
449 		return false;
450 
451 	bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
452 		if (p.ptr.dev == dev)
453 			goto found;
454 	BUG();
455 found:
456 	if (!p.crc.csum_type)
457 		return false;
458 
459 	bytes = p.crc.compressed_size << 9;
460 
461 	struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
462 	if (!bch2_dev_get_ioref(ca, READ))
463 		return false;
464 
465 	data_buf = kvmalloc(bytes, GFP_KERNEL);
466 	if (!data_buf) {
467 		ret = -ENOMEM;
468 		goto err;
469 	}
470 
471 	bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
472 	bio->bi_iter.bi_sector = p.ptr.offset;
473 	bch2_bio_map(bio, data_buf, bytes);
474 	ret = submit_bio_wait(bio);
475 	if (ret)
476 		goto err;
477 
478 	prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
479 	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
480 	bch2_bkey_val_to_text(&buf, c, extent);
481 	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
482 	bch2_bkey_val_to_text(&buf, c, extent2);
483 
484 	struct nonce nonce = extent_nonce(extent.k->version, p.crc);
485 	struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
486 	if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
487 			c, dup_backpointer_to_bad_csum_extent,
488 			"%s", buf.buf))
489 		ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
490 fsck_err:
491 err:
492 	if (bio)
493 		bio_put(bio);
494 	kvfree(data_buf);
495 	percpu_ref_put(&ca->io_ref);
496 	printbuf_exit(&buf);
497 	return ret;
498 }
499 
500 static int check_bp_exists(struct btree_trans *trans,
501 			   struct extents_to_bp_state *s,
502 			   struct bpos bucket,
503 			   struct bch_backpointer bp,
504 			   struct bkey_s_c orig_k)
505 {
506 	struct bch_fs *c = trans->c;
507 	struct btree_iter bp_iter = {};
508 	struct btree_iter other_extent_iter = {};
509 	struct printbuf buf = PRINTBUF;
510 	struct bkey_s_c bp_k;
511 	struct bkey_buf tmp;
512 	int ret;
513 
514 	bch2_bkey_buf_init(&tmp);
515 
516 	if (!bch2_dev_bucket_exists(c, bucket)) {
517 		prt_str(&buf, "extent for nonexistent device:bucket ");
518 		bch2_bpos_to_text(&buf, bucket);
519 		prt_str(&buf, "\n  ");
520 		bch2_bkey_val_to_text(&buf, c, orig_k);
521 		bch_err(c, "%s", buf.buf);
522 		return -BCH_ERR_fsck_repair_unimplemented;
523 	}
524 
525 	if (bpos_lt(bucket, s->bucket_start) ||
526 	    bpos_gt(bucket, s->bucket_end))
527 		return 0;
528 
529 	bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
530 				  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
531 				  0);
532 	ret = bkey_err(bp_k);
533 	if (ret)
534 		goto err;
535 
536 	if (bp_k.k->type != KEY_TYPE_backpointer ||
537 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
538 		bch2_bkey_buf_reassemble(&tmp, c, orig_k);
539 
540 		if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
541 			if (bp.level) {
542 				bch2_trans_unlock(trans);
543 				bch2_btree_interior_updates_flush(c);
544 			}
545 
546 			ret = bch2_btree_write_buffer_flush_sync(trans);
547 			if (ret)
548 				goto err;
549 
550 			bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
551 			ret = -BCH_ERR_transaction_restart_write_buffer_flush;
552 			goto out;
553 		}
554 
555 		goto check_existing_bp;
556 	}
557 out:
558 err:
559 fsck_err:
560 	bch2_trans_iter_exit(trans, &other_extent_iter);
561 	bch2_trans_iter_exit(trans, &bp_iter);
562 	bch2_bkey_buf_exit(&tmp, c);
563 	printbuf_exit(&buf);
564 	return ret;
565 check_existing_bp:
566 	/* Do we have a backpointer for a different extent? */
567 	if (bp_k.k->type != KEY_TYPE_backpointer)
568 		goto missing;
569 
570 	struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
571 
572 	struct bkey_s_c other_extent =
573 		bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
574 	ret = bkey_err(other_extent);
575 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
576 		ret = 0;
577 	if (ret)
578 		goto err;
579 
580 	if (!other_extent.k)
581 		goto missing;
582 
583 	if (bch2_extents_match(orig_k, other_extent)) {
584 		printbuf_reset(&buf);
585 		prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
586 		bch2_bkey_val_to_text(&buf, c, orig_k);
587 		prt_str(&buf, "\n  ");
588 		bch2_bkey_val_to_text(&buf, c, other_extent);
589 		bch_err(c, "%s", buf.buf);
590 
591 		if (other_extent.k->size <= orig_k.k->size) {
592 			ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
593 			if (ret)
594 				goto err;
595 			goto out;
596 		} else {
597 			ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
598 			if (ret)
599 				goto err;
600 			goto missing;
601 		}
602 	}
603 
604 	ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
605 	if (ret < 0)
606 		goto err;
607 	if (ret) {
608 		ret = 0;
609 		goto missing;
610 	}
611 
612 	ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
613 	if (ret < 0)
614 		goto err;
615 	if (ret) {
616 		ret = 0;
617 		goto out;
618 	}
619 
620 	printbuf_reset(&buf);
621 	prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
622 	bch2_bkey_val_to_text(&buf, c, orig_k);
623 	prt_str(&buf, "\n  ");
624 	bch2_bkey_val_to_text(&buf, c, other_extent);
625 	bch_err(c, "%s", buf.buf);
626 	ret = -BCH_ERR_fsck_repair_unimplemented;
627 	goto err;
628 missing:
629 	printbuf_reset(&buf);
630 	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
631 	       bch2_btree_id_str(bp.btree_id), bp.level);
632 	bch2_bkey_val_to_text(&buf, c, orig_k);
633 	prt_printf(&buf, "\n  got:   ");
634 	bch2_bkey_val_to_text(&buf, c, bp_k);
635 
636 	struct bkey_i_backpointer n_bp_k;
637 	bkey_backpointer_init(&n_bp_k.k_i);
638 	n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
639 	n_bp_k.v = bp;
640 	prt_printf(&buf, "\n  want:  ");
641 	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
642 
643 	if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
644 		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
645 
646 	goto out;
647 }
648 
649 static int check_extent_to_backpointers(struct btree_trans *trans,
650 					struct extents_to_bp_state *s,
651 					enum btree_id btree, unsigned level,
652 					struct bkey_s_c k)
653 {
654 	struct bch_fs *c = trans->c;
655 	struct bkey_ptrs_c ptrs;
656 	const union bch_extent_entry *entry;
657 	struct extent_ptr_decoded p;
658 	int ret;
659 
660 	ptrs = bch2_bkey_ptrs_c(k);
661 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
662 		struct bpos bucket_pos;
663 		struct bch_backpointer bp;
664 
665 		if (p.ptr.cached)
666 			continue;
667 
668 		bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
669 
670 		ret = check_bp_exists(trans, s, bucket_pos, bp, k);
671 		if (ret)
672 			return ret;
673 	}
674 
675 	return 0;
676 }
677 
678 static int check_btree_root_to_backpointers(struct btree_trans *trans,
679 					    struct extents_to_bp_state *s,
680 					    enum btree_id btree_id,
681 					    int *level)
682 {
683 	struct bch_fs *c = trans->c;
684 	struct btree_iter iter;
685 	struct btree *b;
686 	struct bkey_s_c k;
687 	int ret;
688 retry:
689 	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
690 				  0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
691 	b = bch2_btree_iter_peek_node(&iter);
692 	ret = PTR_ERR_OR_ZERO(b);
693 	if (ret)
694 		goto err;
695 
696 	if (b != btree_node_root(c, b)) {
697 		bch2_trans_iter_exit(trans, &iter);
698 		goto retry;
699 	}
700 
701 	*level = b->c.level;
702 
703 	k = bkey_i_to_s_c(&b->key);
704 	ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
705 err:
706 	bch2_trans_iter_exit(trans, &iter);
707 	return ret;
708 }
709 
710 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
711 {
712 	return (struct bbpos) {
713 		.btree	= bp.btree_id,
714 		.pos	= bp.pos,
715 	};
716 }
717 
718 static u64 mem_may_pin_bytes(struct bch_fs *c)
719 {
720 	struct sysinfo i;
721 	si_meminfo(&i);
722 
723 	u64 mem_bytes = i.totalram * i.mem_unit;
724 	return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
725 }
726 
727 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
728 {
729 	return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
730 }
731 
732 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
733 					u64 btree_leaf_mask,
734 					u64 btree_interior_mask,
735 					struct bbpos start, struct bbpos *end)
736 {
737 	struct bch_fs *c = trans->c;
738 	s64 mem_may_pin = mem_may_pin_bytes(c);
739 	int ret = 0;
740 
741 	btree_interior_mask |= btree_leaf_mask;
742 
743 	c->btree_cache.pinned_nodes_leaf_mask		= btree_leaf_mask;
744 	c->btree_cache.pinned_nodes_interior_mask	= btree_interior_mask;
745 	c->btree_cache.pinned_nodes_start		= start;
746 	c->btree_cache.pinned_nodes_end			= *end = BBPOS_MAX;
747 
748 	for (enum btree_id btree = start.btree;
749 	     btree < BTREE_ID_NR && !ret;
750 	     btree++) {
751 		unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
752 		struct btree_iter iter;
753 		struct btree *b;
754 
755 		if (!((1U << btree) & btree_leaf_mask) &&
756 		    !((1U << btree) & btree_interior_mask))
757 			continue;
758 
759 		__for_each_btree_node(trans, iter, btree,
760 				      btree == start.btree ? start.pos : POS_MIN,
761 				      0, depth, BTREE_ITER_PREFETCH, b, ret) {
762 			mem_may_pin -= btree_buf_bytes(b);
763 			if (mem_may_pin <= 0) {
764 				c->btree_cache.pinned_nodes_end = *end =
765 					BBPOS(btree, b->key.k.p);
766 				bch2_trans_iter_exit(trans, &iter);
767 				return 0;
768 			}
769 		}
770 		bch2_trans_iter_exit(trans, &iter);
771 	}
772 
773 	return ret;
774 }
775 
776 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
777 						   struct extents_to_bp_state *s)
778 {
779 	struct bch_fs *c = trans->c;
780 	int ret = 0;
781 
782 	for (enum btree_id btree_id = 0;
783 	     btree_id < btree_id_nr_alive(c);
784 	     btree_id++) {
785 		int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
786 
787 		ret = commit_do(trans, NULL, NULL,
788 				BCH_TRANS_COMMIT_no_enospc,
789 				check_btree_root_to_backpointers(trans, s, btree_id, &level));
790 		if (ret)
791 			return ret;
792 
793 		while (level >= depth) {
794 			struct btree_iter iter;
795 			bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
796 						  level,
797 						  BTREE_ITER_PREFETCH);
798 			while (1) {
799 				bch2_trans_begin(trans);
800 
801 				struct bkey_s_c k = bch2_btree_iter_peek(&iter);
802 				if (!k.k)
803 					break;
804 				ret = bkey_err(k) ?:
805 					check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
806 					bch2_trans_commit(trans, NULL, NULL,
807 							  BCH_TRANS_COMMIT_no_enospc);
808 				if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
809 					ret = 0;
810 					continue;
811 				}
812 				if (ret)
813 					break;
814 				if (bpos_eq(iter.pos, SPOS_MAX))
815 					break;
816 				bch2_btree_iter_advance(&iter);
817 			}
818 			bch2_trans_iter_exit(trans, &iter);
819 
820 			if (ret)
821 				return ret;
822 
823 			--level;
824 		}
825 	}
826 
827 	return 0;
828 }
829 
830 int bch2_check_extents_to_backpointers(struct bch_fs *c)
831 {
832 	struct btree_trans *trans = bch2_trans_get(c);
833 	struct extents_to_bp_state s = { .bucket_start = POS_MIN };
834 	int ret;
835 
836 	bch2_bkey_buf_init(&s.last_flushed);
837 	bkey_init(&s.last_flushed.k->k);
838 
839 	while (1) {
840 		struct bbpos end;
841 		ret = bch2_get_btree_in_memory_pos(trans,
842 				BIT_ULL(BTREE_ID_backpointers),
843 				BIT_ULL(BTREE_ID_backpointers),
844 				BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
845 		if (ret)
846 			break;
847 
848 		s.bucket_end = end.pos;
849 
850 		if ( bpos_eq(s.bucket_start, POS_MIN) &&
851 		    !bpos_eq(s.bucket_end, SPOS_MAX))
852 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
853 				    __func__, btree_nodes_fit_in_ram(c));
854 
855 		if (!bpos_eq(s.bucket_start, POS_MIN) ||
856 		    !bpos_eq(s.bucket_end, SPOS_MAX)) {
857 			struct printbuf buf = PRINTBUF;
858 
859 			prt_str(&buf, "check_extents_to_backpointers(): ");
860 			bch2_bpos_to_text(&buf, s.bucket_start);
861 			prt_str(&buf, "-");
862 			bch2_bpos_to_text(&buf, s.bucket_end);
863 
864 			bch_verbose(c, "%s", buf.buf);
865 			printbuf_exit(&buf);
866 		}
867 
868 		ret = bch2_check_extents_to_backpointers_pass(trans, &s);
869 		if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
870 			break;
871 
872 		s.bucket_start = bpos_successor(s.bucket_end);
873 	}
874 	bch2_trans_put(trans);
875 	bch2_bkey_buf_exit(&s.last_flushed, c);
876 
877 	c->btree_cache.pinned_nodes_leaf_mask = 0;
878 	c->btree_cache.pinned_nodes_interior_mask = 0;
879 
880 	bch_err_fn(c, ret);
881 	return ret;
882 }
883 
884 static int check_one_backpointer(struct btree_trans *trans,
885 				 struct bbpos start,
886 				 struct bbpos end,
887 				 struct bkey_s_c_backpointer bp,
888 				 struct bpos *last_flushed_pos)
889 {
890 	struct bch_fs *c = trans->c;
891 	struct btree_iter iter;
892 	struct bbpos pos = bp_to_bbpos(*bp.v);
893 	struct bkey_s_c k;
894 	struct printbuf buf = PRINTBUF;
895 	int ret;
896 
897 	if (bbpos_cmp(pos, start) < 0 ||
898 	    bbpos_cmp(pos, end) > 0)
899 		return 0;
900 
901 	k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
902 	ret = bkey_err(k);
903 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
904 		return 0;
905 	if (ret)
906 		return ret;
907 
908 	if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
909 		*last_flushed_pos = bp.k->p;
910 		ret = bch2_btree_write_buffer_flush_sync(trans) ?:
911 			-BCH_ERR_transaction_restart_write_buffer_flush;
912 		goto out;
913 	}
914 
915 	if (fsck_err_on(!k.k, c,
916 			backpointer_to_missing_ptr,
917 			"backpointer for missing %s\n  %s",
918 			bp.v->level ? "btree node" : "extent",
919 			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
920 		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
921 		goto out;
922 	}
923 out:
924 fsck_err:
925 	bch2_trans_iter_exit(trans, &iter);
926 	printbuf_exit(&buf);
927 	return ret;
928 }
929 
930 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
931 						   struct bbpos start,
932 						   struct bbpos end)
933 {
934 	struct bpos last_flushed_pos = SPOS_MAX;
935 
936 	return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
937 				  POS_MIN, BTREE_ITER_PREFETCH, k,
938 				  NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
939 		check_one_backpointer(trans, start, end,
940 				      bkey_s_c_to_backpointer(k),
941 				      &last_flushed_pos));
942 }
943 
944 int bch2_check_backpointers_to_extents(struct bch_fs *c)
945 {
946 	struct btree_trans *trans = bch2_trans_get(c);
947 	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
948 	int ret;
949 
950 	while (1) {
951 		ret = bch2_get_btree_in_memory_pos(trans,
952 						   (1U << BTREE_ID_extents)|
953 						   (1U << BTREE_ID_reflink),
954 						   ~0,
955 						   start, &end);
956 		if (ret)
957 			break;
958 
959 		if (!bbpos_cmp(start, BBPOS_MIN) &&
960 		    bbpos_cmp(end, BBPOS_MAX))
961 			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
962 				    __func__, btree_nodes_fit_in_ram(c));
963 
964 		if (bbpos_cmp(start, BBPOS_MIN) ||
965 		    bbpos_cmp(end, BBPOS_MAX)) {
966 			struct printbuf buf = PRINTBUF;
967 
968 			prt_str(&buf, "check_backpointers_to_extents(): ");
969 			bch2_bbpos_to_text(&buf, start);
970 			prt_str(&buf, "-");
971 			bch2_bbpos_to_text(&buf, end);
972 
973 			bch_verbose(c, "%s", buf.buf);
974 			printbuf_exit(&buf);
975 		}
976 
977 		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
978 		if (ret || !bbpos_cmp(end, BBPOS_MAX))
979 			break;
980 
981 		start = bbpos_successor(end);
982 	}
983 	bch2_trans_put(trans);
984 
985 	c->btree_cache.pinned_nodes_leaf_mask = 0;
986 	c->btree_cache.pinned_nodes_interior_mask = 0;
987 
988 	bch_err_fn(c, ret);
989 	return ret;
990 }
991