xref: /linux/fs/bcachefs/backpointers.c (revision 5e2cb28dd7e182dfa641550dfa225913509ad45d)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "btree_cache.h"
7 #include "btree_update.h"
8 #include "btree_write_buffer.h"
9 #include "error.h"
10 
11 #include <linux/mm.h>
12 
13 static bool extent_matches_bp(struct bch_fs *c,
14 			      enum btree_id btree_id, unsigned level,
15 			      struct bkey_s_c k,
16 			      struct bpos bucket,
17 			      struct bch_backpointer bp)
18 {
19 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
20 	const union bch_extent_entry *entry;
21 	struct extent_ptr_decoded p;
22 
23 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
24 		struct bpos bucket2;
25 		struct bch_backpointer bp2;
26 
27 		if (p.ptr.cached)
28 			continue;
29 
30 		bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
31 				      &bucket2, &bp2);
32 		if (bpos_eq(bucket, bucket2) &&
33 		    !memcmp(&bp, &bp2, sizeof(bp)))
34 			return true;
35 	}
36 
37 	return false;
38 }
39 
40 int bch2_backpointer_invalid(const struct bch_fs *c, struct bkey_s_c k,
41 			     enum bkey_invalid_flags flags,
42 			     struct printbuf *err)
43 {
44 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
45 	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
46 
47 	if (!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset))) {
48 		prt_str(err, "backpointer at wrong pos");
49 		return -BCH_ERR_invalid_bkey;
50 	}
51 
52 	return 0;
53 }
54 
55 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
56 {
57 	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
58 	       bch2_btree_ids[bp->btree_id],
59 	       bp->level,
60 	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
61 	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
62 	       bp->bucket_len);
63 	bch2_bpos_to_text(out, bp->pos);
64 }
65 
66 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
67 {
68 	prt_str(out, "bucket=");
69 	bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
70 	prt_str(out, " ");
71 
72 	bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
73 }
74 
75 void bch2_backpointer_swab(struct bkey_s k)
76 {
77 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
78 
79 	bp.v->bucket_offset	= swab32(bp.v->bucket_offset);
80 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
81 	bch2_bpos_swab(&bp.v->pos);
82 }
83 
84 static noinline int backpointer_mod_err(struct btree_trans *trans,
85 					struct bch_backpointer bp,
86 					struct bkey_s_c bp_k,
87 					struct bkey_s_c orig_k,
88 					bool insert)
89 {
90 	struct bch_fs *c = trans->c;
91 	struct printbuf buf = PRINTBUF;
92 
93 	if (insert) {
94 		prt_printf(&buf, "existing backpointer found when inserting ");
95 		bch2_backpointer_to_text(&buf, &bp);
96 		prt_newline(&buf);
97 		printbuf_indent_add(&buf, 2);
98 
99 		prt_printf(&buf, "found ");
100 		bch2_bkey_val_to_text(&buf, c, bp_k);
101 		prt_newline(&buf);
102 
103 		prt_printf(&buf, "for ");
104 		bch2_bkey_val_to_text(&buf, c, orig_k);
105 
106 		bch_err(c, "%s", buf.buf);
107 	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
108 		prt_printf(&buf, "backpointer not found when deleting");
109 		prt_newline(&buf);
110 		printbuf_indent_add(&buf, 2);
111 
112 		prt_printf(&buf, "searching for ");
113 		bch2_backpointer_to_text(&buf, &bp);
114 		prt_newline(&buf);
115 
116 		prt_printf(&buf, "got ");
117 		bch2_bkey_val_to_text(&buf, c, bp_k);
118 		prt_newline(&buf);
119 
120 		prt_printf(&buf, "for ");
121 		bch2_bkey_val_to_text(&buf, c, orig_k);
122 
123 		bch_err(c, "%s", buf.buf);
124 	}
125 
126 	printbuf_exit(&buf);
127 
128 	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
129 		bch2_inconsistent_error(c);
130 		return -EIO;
131 	} else {
132 		return 0;
133 	}
134 }
135 
136 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
137 				struct bkey_i_backpointer *bp_k,
138 				struct bch_backpointer bp,
139 				struct bkey_s_c orig_k,
140 				bool insert)
141 {
142 	struct btree_iter bp_iter;
143 	struct bkey_s_c k;
144 	int ret;
145 
146 	k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
147 			       bp_k->k.p,
148 			       BTREE_ITER_INTENT|
149 			       BTREE_ITER_SLOTS|
150 			       BTREE_ITER_WITH_UPDATES);
151 	ret = bkey_err(k);
152 	if (ret)
153 		goto err;
154 
155 	if (insert
156 	    ? k.k->type
157 	    : (k.k->type != KEY_TYPE_backpointer ||
158 	       memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
159 		ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
160 		if (ret)
161 			goto err;
162 	}
163 
164 	ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
165 err:
166 	bch2_trans_iter_exit(trans, &bp_iter);
167 	return ret;
168 }
169 
170 /*
171  * Find the next backpointer >= *bp_offset:
172  */
173 int bch2_get_next_backpointer(struct btree_trans *trans,
174 			      struct bpos bucket, int gen,
175 			      struct bpos *bp_pos,
176 			      struct bch_backpointer *bp,
177 			      unsigned iter_flags)
178 {
179 	struct bch_fs *c = trans->c;
180 	struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
181 	struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
182 	struct bkey_s_c k;
183 	int ret = 0;
184 
185 	if (bpos_ge(*bp_pos, bp_end_pos))
186 		goto done;
187 
188 	if (gen >= 0) {
189 		k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
190 				       bucket, BTREE_ITER_CACHED|iter_flags);
191 		ret = bkey_err(k);
192 		if (ret)
193 			goto out;
194 
195 		if (k.k->type != KEY_TYPE_alloc_v4 ||
196 		    bkey_s_c_to_alloc_v4(k).v->gen != gen)
197 			goto done;
198 	}
199 
200 	*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
201 
202 	for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
203 				     *bp_pos, iter_flags, k, ret) {
204 		if (bpos_ge(k.k->p, bp_end_pos))
205 			break;
206 
207 		*bp_pos = k.k->p;
208 		*bp = *bkey_s_c_to_backpointer(k).v;
209 		goto out;
210 	}
211 done:
212 	*bp_pos = SPOS_MAX;
213 out:
214 	bch2_trans_iter_exit(trans, &bp_iter);
215 	bch2_trans_iter_exit(trans, &alloc_iter);
216 	return ret;
217 }
218 
219 static void backpointer_not_found(struct btree_trans *trans,
220 				  struct bpos bp_pos,
221 				  struct bch_backpointer bp,
222 				  struct bkey_s_c k,
223 				  const char *thing_it_points_to)
224 {
225 	struct bch_fs *c = trans->c;
226 	struct printbuf buf = PRINTBUF;
227 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
228 
229 	if (likely(!bch2_backpointers_no_use_write_buffer))
230 		return;
231 
232 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
233 		   thing_it_points_to);
234 	prt_printf(&buf, "bucket: ");
235 	bch2_bpos_to_text(&buf, bucket);
236 	prt_printf(&buf, "\n  ");
237 
238 	prt_printf(&buf, "backpointer pos: ");
239 	bch2_bpos_to_text(&buf, bp_pos);
240 	prt_printf(&buf, "\n  ");
241 
242 	bch2_backpointer_to_text(&buf, &bp);
243 	prt_printf(&buf, "\n  ");
244 	bch2_bkey_val_to_text(&buf, c, k);
245 	if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
246 		bch_err_ratelimited(c, "%s", buf.buf);
247 	else
248 		bch2_trans_inconsistent(trans, "%s", buf.buf);
249 
250 	printbuf_exit(&buf);
251 }
252 
253 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
254 					 struct btree_iter *iter,
255 					 struct bpos bp_pos,
256 					 struct bch_backpointer bp,
257 					 unsigned iter_flags)
258 {
259 	struct bch_fs *c = trans->c;
260 	struct btree_root *r = bch2_btree_id_root(c, bp.btree_id);
261 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
262 	struct bkey_s_c k;
263 
264 	bch2_trans_node_iter_init(trans, iter,
265 				  bp.btree_id,
266 				  bp.pos,
267 				  0,
268 				  min(bp.level, r->level),
269 				  iter_flags);
270 	k = bch2_btree_iter_peek_slot(iter);
271 	if (bkey_err(k)) {
272 		bch2_trans_iter_exit(trans, iter);
273 		return k;
274 	}
275 
276 	if (bp.level == r->level + 1)
277 		k = bkey_i_to_s_c(&r->key);
278 
279 	if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
280 		return k;
281 
282 	bch2_trans_iter_exit(trans, iter);
283 
284 	if (unlikely(bch2_backpointers_no_use_write_buffer)) {
285 		if (bp.level) {
286 			struct btree *b;
287 
288 			/*
289 			 * If a backpointer for a btree node wasn't found, it may be
290 			 * because it was overwritten by a new btree node that hasn't
291 			 * been written out yet - backpointer_get_node() checks for
292 			 * this:
293 			 */
294 			b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
295 			if (!IS_ERR_OR_NULL(b))
296 				return bkey_i_to_s_c(&b->key);
297 
298 			bch2_trans_iter_exit(trans, iter);
299 
300 			if (IS_ERR(b))
301 				return bkey_s_c_err(PTR_ERR(b));
302 			return bkey_s_c_null;
303 		}
304 
305 		backpointer_not_found(trans, bp_pos, bp, k, "extent");
306 	}
307 
308 	return bkey_s_c_null;
309 }
310 
311 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
312 					struct btree_iter *iter,
313 					struct bpos bp_pos,
314 					struct bch_backpointer bp)
315 {
316 	struct bch_fs *c = trans->c;
317 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
318 	struct btree *b;
319 
320 	BUG_ON(!bp.level);
321 
322 	bch2_trans_node_iter_init(trans, iter,
323 				  bp.btree_id,
324 				  bp.pos,
325 				  0,
326 				  bp.level - 1,
327 				  0);
328 	b = bch2_btree_iter_peek_node(iter);
329 	if (IS_ERR(b))
330 		goto err;
331 
332 	if (b && extent_matches_bp(c, bp.btree_id, bp.level,
333 				   bkey_i_to_s_c(&b->key),
334 				   bucket, bp))
335 		return b;
336 
337 	if (b && btree_node_will_make_reachable(b)) {
338 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
339 	} else {
340 		backpointer_not_found(trans, bp_pos, bp,
341 				      bkey_i_to_s_c(&b->key), "btree node");
342 		b = NULL;
343 	}
344 err:
345 	bch2_trans_iter_exit(trans, iter);
346 	return b;
347 }
348 
349 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
350 					struct bkey_s_c k)
351 {
352 	struct bch_fs *c = trans->c;
353 	struct btree_iter alloc_iter = { NULL };
354 	struct bkey_s_c alloc_k;
355 	struct printbuf buf = PRINTBUF;
356 	int ret = 0;
357 
358 	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
359 			"backpointer for missing device:\n%s",
360 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
361 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
362 		goto out;
363 	}
364 
365 	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
366 				     bp_pos_to_bucket(c, k.k->p), 0);
367 	ret = bkey_err(alloc_k);
368 	if (ret)
369 		goto out;
370 
371 	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
372 			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
373 			alloc_iter.pos.inode, alloc_iter.pos.offset,
374 			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
375 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
376 		goto out;
377 	}
378 out:
379 fsck_err:
380 	bch2_trans_iter_exit(trans, &alloc_iter);
381 	printbuf_exit(&buf);
382 	return ret;
383 }
384 
385 /* verify that every backpointer has a corresponding alloc key */
386 int bch2_check_btree_backpointers(struct bch_fs *c)
387 {
388 	struct btree_iter iter;
389 	struct bkey_s_c k;
390 	int ret;
391 
392 	ret = bch2_trans_run(c,
393 		for_each_btree_key_commit(trans, iter,
394 			BTREE_ID_backpointers, POS_MIN, 0, k,
395 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
396 		  bch2_check_btree_backpointer(trans, &iter, k)));
397 	if (ret)
398 		bch_err_fn(c, ret);
399 	return ret;
400 }
401 
402 struct bpos_level {
403 	unsigned	level;
404 	struct bpos	pos;
405 };
406 
407 static int check_bp_exists(struct btree_trans *trans,
408 			   struct bpos bucket,
409 			   struct bch_backpointer bp,
410 			   struct bkey_s_c orig_k,
411 			   struct bpos bucket_start,
412 			   struct bpos bucket_end,
413 			   struct bpos_level *last_flushed)
414 {
415 	struct bch_fs *c = trans->c;
416 	struct btree_iter bp_iter = { NULL };
417 	struct printbuf buf = PRINTBUF;
418 	struct bkey_s_c bp_k;
419 	int ret;
420 
421 	if (bpos_lt(bucket, bucket_start) ||
422 	    bpos_gt(bucket, bucket_end))
423 		return 0;
424 
425 	if (!bch2_dev_bucket_exists(c, bucket))
426 		goto missing;
427 
428 	bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
429 				  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
430 				  0);
431 	ret = bkey_err(bp_k);
432 	if (ret)
433 		goto err;
434 
435 	if (bp_k.k->type != KEY_TYPE_backpointer ||
436 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
437 		if (last_flushed->level != bp.level ||
438 		    !bpos_eq(last_flushed->pos, orig_k.k->p)) {
439 			last_flushed->level = bp.level;
440 			last_flushed->pos = orig_k.k->p;
441 
442 			ret = bch2_btree_write_buffer_flush_sync(trans) ?:
443 				-BCH_ERR_transaction_restart_write_buffer_flush;
444 			goto out;
445 		}
446 		goto missing;
447 	}
448 out:
449 err:
450 fsck_err:
451 	bch2_trans_iter_exit(trans, &bp_iter);
452 	printbuf_exit(&buf);
453 	return ret;
454 missing:
455 	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
456 	       bch2_btree_ids[bp.btree_id], bp.level);
457 	bch2_bkey_val_to_text(&buf, c, orig_k);
458 	prt_printf(&buf, "\nbp pos ");
459 	bch2_bpos_to_text(&buf, bp_iter.pos);
460 
461 	if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
462 	    c->opts.reconstruct_alloc ||
463 	    fsck_err(c, "%s", buf.buf))
464 		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
465 
466 	goto out;
467 }
468 
469 static int check_extent_to_backpointers(struct btree_trans *trans,
470 					struct btree_iter *iter,
471 					struct bpos bucket_start,
472 					struct bpos bucket_end,
473 					struct bpos_level *last_flushed)
474 {
475 	struct bch_fs *c = trans->c;
476 	struct bkey_ptrs_c ptrs;
477 	const union bch_extent_entry *entry;
478 	struct extent_ptr_decoded p;
479 	struct bkey_s_c k;
480 	int ret;
481 
482 	k = bch2_btree_iter_peek_all_levels(iter);
483 	ret = bkey_err(k);
484 	if (ret)
485 		return ret;
486 	if (!k.k)
487 		return 0;
488 
489 	ptrs = bch2_bkey_ptrs_c(k);
490 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
491 		struct bpos bucket_pos;
492 		struct bch_backpointer bp;
493 
494 		if (p.ptr.cached)
495 			continue;
496 
497 		bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
498 				      k, p, &bucket_pos, &bp);
499 
500 		ret = check_bp_exists(trans, bucket_pos, bp, k,
501 				      bucket_start, bucket_end,
502 				      last_flushed);
503 		if (ret)
504 			return ret;
505 	}
506 
507 	return 0;
508 }
509 
510 static int check_btree_root_to_backpointers(struct btree_trans *trans,
511 					    enum btree_id btree_id,
512 					    struct bpos bucket_start,
513 					    struct bpos bucket_end,
514 					    struct bpos_level *last_flushed)
515 {
516 	struct bch_fs *c = trans->c;
517 	struct btree_root *r = bch2_btree_id_root(c, btree_id);
518 	struct btree_iter iter;
519 	struct btree *b;
520 	struct bkey_s_c k;
521 	struct bkey_ptrs_c ptrs;
522 	struct extent_ptr_decoded p;
523 	const union bch_extent_entry *entry;
524 	int ret;
525 
526 	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0);
527 	b = bch2_btree_iter_peek_node(&iter);
528 	ret = PTR_ERR_OR_ZERO(b);
529 	if (ret)
530 		goto err;
531 
532 	BUG_ON(b != btree_node_root(c, b));
533 
534 	k = bkey_i_to_s_c(&b->key);
535 	ptrs = bch2_bkey_ptrs_c(k);
536 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
537 		struct bpos bucket_pos;
538 		struct bch_backpointer bp;
539 
540 		if (p.ptr.cached)
541 			continue;
542 
543 		bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
544 				      k, p, &bucket_pos, &bp);
545 
546 		ret = check_bp_exists(trans, bucket_pos, bp, k,
547 				      bucket_start, bucket_end,
548 				      last_flushed);
549 		if (ret)
550 			goto err;
551 	}
552 err:
553 	bch2_trans_iter_exit(trans, &iter);
554 	return ret;
555 }
556 
557 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
558 {
559 	return (struct bbpos) {
560 		.btree	= bp.btree_id,
561 		.pos	= bp.pos,
562 	};
563 }
564 
565 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
566 {
567 	struct sysinfo i;
568 	u64 mem_bytes;
569 
570 	si_meminfo(&i);
571 	mem_bytes = i.totalram * i.mem_unit;
572 	return div_u64(mem_bytes >> 1, btree_bytes(c));
573 }
574 
575 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
576 					unsigned btree_leaf_mask,
577 					unsigned btree_interior_mask,
578 					struct bbpos start, struct bbpos *end)
579 {
580 	struct btree_iter iter;
581 	struct bkey_s_c k;
582 	size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
583 	enum btree_id btree;
584 	int ret = 0;
585 
586 	for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
587 		unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
588 
589 		if (!((1U << btree) & btree_leaf_mask) &&
590 		    !((1U << btree) & btree_interior_mask))
591 			continue;
592 
593 		bch2_trans_node_iter_init(trans, &iter, btree,
594 					  btree == start.btree ? start.pos : POS_MIN,
595 					  0, depth, 0);
596 		/*
597 		 * for_each_btree_key_contineu() doesn't check the return value
598 		 * from bch2_btree_iter_advance(), which is needed when
599 		 * iterating over interior nodes where we'll see keys at
600 		 * SPOS_MAX:
601 		 */
602 		do {
603 			k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
604 			ret = bkey_err(k);
605 			if (!k.k || ret)
606 				break;
607 
608 			--btree_nodes;
609 			if (!btree_nodes) {
610 				*end = BBPOS(btree, k.k->p);
611 				bch2_trans_iter_exit(trans, &iter);
612 				return 0;
613 			}
614 		} while (bch2_btree_iter_advance(&iter));
615 		bch2_trans_iter_exit(trans, &iter);
616 	}
617 
618 	*end = BBPOS_MAX;
619 	return ret;
620 }
621 
622 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
623 						   struct bpos bucket_start,
624 						   struct bpos bucket_end)
625 {
626 	struct bch_fs *c = trans->c;
627 	struct btree_iter iter;
628 	enum btree_id btree_id;
629 	struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
630 	int ret = 0;
631 
632 	for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
633 		unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
634 
635 		bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
636 					  depth,
637 					  BTREE_ITER_ALL_LEVELS|
638 					  BTREE_ITER_PREFETCH);
639 
640 		do {
641 			ret = commit_do(trans, NULL, NULL,
642 					BTREE_INSERT_LAZY_RW|
643 					BTREE_INSERT_NOFAIL,
644 					check_extent_to_backpointers(trans, &iter,
645 								bucket_start, bucket_end,
646 								&last_flushed));
647 			if (ret)
648 				break;
649 		} while (!bch2_btree_iter_advance(&iter));
650 
651 		bch2_trans_iter_exit(trans, &iter);
652 
653 		if (ret)
654 			break;
655 
656 		ret = commit_do(trans, NULL, NULL,
657 				BTREE_INSERT_LAZY_RW|
658 				BTREE_INSERT_NOFAIL,
659 				check_btree_root_to_backpointers(trans, btree_id,
660 							bucket_start, bucket_end,
661 							&last_flushed));
662 		if (ret)
663 			break;
664 	}
665 	return ret;
666 }
667 
668 static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
669 					 struct bpos bucket)
670 {
671 	return bch2_dev_exists2(c, bucket.inode)
672 		? bucket_pos_to_bp(c, bucket, 0)
673 		: bucket;
674 }
675 
676 static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
677 					struct bpos start, struct bpos *end)
678 {
679 	struct btree_iter alloc_iter;
680 	struct btree_iter bp_iter;
681 	struct bkey_s_c alloc_k, bp_k;
682 	size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
683 	bool alloc_end = false, bp_end = false;
684 	int ret = 0;
685 
686 	bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
687 				  start, 0, 1, 0);
688 	bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
689 				  bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
690 	while (1) {
691 		alloc_k = !alloc_end
692 			? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
693 			: bkey_s_c_null;
694 		bp_k = !bp_end
695 			? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
696 			: bkey_s_c_null;
697 
698 		ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
699 		if ((!alloc_k.k && !bp_k.k) || ret) {
700 			*end = SPOS_MAX;
701 			break;
702 		}
703 
704 		--btree_nodes;
705 		if (!btree_nodes) {
706 			*end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
707 			break;
708 		}
709 
710 		if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
711 		    bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
712 			if (!bch2_btree_iter_advance(&alloc_iter))
713 				alloc_end = true;
714 		} else {
715 			if (!bch2_btree_iter_advance(&bp_iter))
716 				bp_end = true;
717 		}
718 	}
719 	bch2_trans_iter_exit(trans, &bp_iter);
720 	bch2_trans_iter_exit(trans, &alloc_iter);
721 	return ret;
722 }
723 
724 int bch2_check_extents_to_backpointers(struct bch_fs *c)
725 {
726 	struct btree_trans *trans = bch2_trans_get(c);
727 	struct bpos start = POS_MIN, end;
728 	int ret;
729 
730 	while (1) {
731 		ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
732 		if (ret)
733 			break;
734 
735 		if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
736 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
737 				    __func__, btree_nodes_fit_in_ram(c));
738 
739 		if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
740 			struct printbuf buf = PRINTBUF;
741 
742 			prt_str(&buf, "check_extents_to_backpointers(): ");
743 			bch2_bpos_to_text(&buf, start);
744 			prt_str(&buf, "-");
745 			bch2_bpos_to_text(&buf, end);
746 
747 			bch_verbose(c, "%s", buf.buf);
748 			printbuf_exit(&buf);
749 		}
750 
751 		ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
752 		if (ret || bpos_eq(end, SPOS_MAX))
753 			break;
754 
755 		start = bpos_successor(end);
756 	}
757 	bch2_trans_put(trans);
758 
759 	if (ret)
760 		bch_err_fn(c, ret);
761 	return ret;
762 }
763 
764 static int check_one_backpointer(struct btree_trans *trans,
765 				 struct bbpos start,
766 				 struct bbpos end,
767 				 struct bkey_s_c_backpointer bp,
768 				 struct bpos *last_flushed_pos)
769 {
770 	struct bch_fs *c = trans->c;
771 	struct btree_iter iter;
772 	struct bbpos pos = bp_to_bbpos(*bp.v);
773 	struct bkey_s_c k;
774 	struct printbuf buf = PRINTBUF;
775 	int ret;
776 
777 	if (bbpos_cmp(pos, start) < 0 ||
778 	    bbpos_cmp(pos, end) > 0)
779 		return 0;
780 
781 	k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
782 	ret = bkey_err(k);
783 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
784 		return 0;
785 	if (ret)
786 		return ret;
787 
788 	if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
789 		*last_flushed_pos = bp.k->p;
790 		ret = bch2_btree_write_buffer_flush_sync(trans) ?:
791 			-BCH_ERR_transaction_restart_write_buffer_flush;
792 		goto out;
793 	}
794 
795 	if (fsck_err_on(!k.k, c,
796 			"backpointer for missing extent\n  %s",
797 			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
798 		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
799 		goto out;
800 	}
801 out:
802 fsck_err:
803 	bch2_trans_iter_exit(trans, &iter);
804 	printbuf_exit(&buf);
805 	return ret;
806 }
807 
808 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
809 						   struct bbpos start,
810 						   struct bbpos end)
811 {
812 	struct btree_iter iter;
813 	struct bkey_s_c k;
814 	struct bpos last_flushed_pos = SPOS_MAX;
815 
816 	return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
817 				  POS_MIN, BTREE_ITER_PREFETCH, k,
818 				  NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
819 		check_one_backpointer(trans, start, end,
820 				      bkey_s_c_to_backpointer(k),
821 				      &last_flushed_pos));
822 }
823 
824 int bch2_check_backpointers_to_extents(struct bch_fs *c)
825 {
826 	struct btree_trans *trans = bch2_trans_get(c);
827 	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
828 	int ret;
829 
830 	while (1) {
831 		ret = bch2_get_btree_in_memory_pos(trans,
832 						   (1U << BTREE_ID_extents)|
833 						   (1U << BTREE_ID_reflink),
834 						   ~0,
835 						   start, &end);
836 		if (ret)
837 			break;
838 
839 		if (!bbpos_cmp(start, BBPOS_MIN) &&
840 		    bbpos_cmp(end, BBPOS_MAX))
841 			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
842 				    __func__, btree_nodes_fit_in_ram(c));
843 
844 		if (bbpos_cmp(start, BBPOS_MIN) ||
845 		    bbpos_cmp(end, BBPOS_MAX)) {
846 			struct printbuf buf = PRINTBUF;
847 
848 			prt_str(&buf, "check_backpointers_to_extents(): ");
849 			bch2_bbpos_to_text(&buf, start);
850 			prt_str(&buf, "-");
851 			bch2_bbpos_to_text(&buf, end);
852 
853 			bch_verbose(c, "%s", buf.buf);
854 			printbuf_exit(&buf);
855 		}
856 
857 		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
858 		if (ret || !bbpos_cmp(end, BBPOS_MAX))
859 			break;
860 
861 		start = bbpos_successor(end);
862 	}
863 	bch2_trans_put(trans);
864 
865 	if (ret)
866 		bch_err_fn(c, ret);
867 	return ret;
868 }
869