xref: /linux/fs/bcachefs/backpointers.c (revision 4cde72fead4cebb5b6b2fe9425904c2064739184)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "btree_cache.h"
7 #include "btree_update.h"
8 #include "btree_update_interior.h"
9 #include "btree_write_buffer.h"
10 #include "error.h"
11 
12 #include <linux/mm.h>
13 
14 static bool extent_matches_bp(struct bch_fs *c,
15 			      enum btree_id btree_id, unsigned level,
16 			      struct bkey_s_c k,
17 			      struct bpos bucket,
18 			      struct bch_backpointer bp)
19 {
20 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
21 	const union bch_extent_entry *entry;
22 	struct extent_ptr_decoded p;
23 
24 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
25 		struct bpos bucket2;
26 		struct bch_backpointer bp2;
27 
28 		if (p.ptr.cached)
29 			continue;
30 
31 		bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
32 				      &bucket2, &bp2);
33 		if (bpos_eq(bucket, bucket2) &&
34 		    !memcmp(&bp, &bp2, sizeof(bp)))
35 			return true;
36 	}
37 
38 	return false;
39 }
40 
41 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
42 			     enum bkey_invalid_flags flags,
43 			     struct printbuf *err)
44 {
45 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
46 	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
47 	int ret = 0;
48 
49 	bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
50 			 c, err,
51 			 backpointer_pos_wrong,
52 			 "backpointer at wrong pos");
53 fsck_err:
54 	return ret;
55 }
56 
57 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
58 {
59 	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
60 	       bch2_btree_id_str(bp->btree_id),
61 	       bp->level,
62 	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
63 	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
64 	       bp->bucket_len);
65 	bch2_bpos_to_text(out, bp->pos);
66 }
67 
68 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
69 {
70 	prt_str(out, "bucket=");
71 	bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
72 	prt_str(out, " ");
73 
74 	bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
75 }
76 
77 void bch2_backpointer_swab(struct bkey_s k)
78 {
79 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
80 
81 	bp.v->bucket_offset	= swab40(bp.v->bucket_offset);
82 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
83 	bch2_bpos_swab(&bp.v->pos);
84 }
85 
86 static noinline int backpointer_mod_err(struct btree_trans *trans,
87 					struct bch_backpointer bp,
88 					struct bkey_s_c bp_k,
89 					struct bkey_s_c orig_k,
90 					bool insert)
91 {
92 	struct bch_fs *c = trans->c;
93 	struct printbuf buf = PRINTBUF;
94 
95 	if (insert) {
96 		prt_printf(&buf, "existing backpointer found when inserting ");
97 		bch2_backpointer_to_text(&buf, &bp);
98 		prt_newline(&buf);
99 		printbuf_indent_add(&buf, 2);
100 
101 		prt_printf(&buf, "found ");
102 		bch2_bkey_val_to_text(&buf, c, bp_k);
103 		prt_newline(&buf);
104 
105 		prt_printf(&buf, "for ");
106 		bch2_bkey_val_to_text(&buf, c, orig_k);
107 
108 		bch_err(c, "%s", buf.buf);
109 	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
110 		prt_printf(&buf, "backpointer not found when deleting");
111 		prt_newline(&buf);
112 		printbuf_indent_add(&buf, 2);
113 
114 		prt_printf(&buf, "searching for ");
115 		bch2_backpointer_to_text(&buf, &bp);
116 		prt_newline(&buf);
117 
118 		prt_printf(&buf, "got ");
119 		bch2_bkey_val_to_text(&buf, c, bp_k);
120 		prt_newline(&buf);
121 
122 		prt_printf(&buf, "for ");
123 		bch2_bkey_val_to_text(&buf, c, orig_k);
124 
125 		bch_err(c, "%s", buf.buf);
126 	}
127 
128 	printbuf_exit(&buf);
129 
130 	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
131 		bch2_inconsistent_error(c);
132 		return -EIO;
133 	} else {
134 		return 0;
135 	}
136 }
137 
138 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
139 				struct bkey_i_backpointer *bp_k,
140 				struct bch_backpointer bp,
141 				struct bkey_s_c orig_k,
142 				bool insert)
143 {
144 	struct btree_iter bp_iter;
145 	struct bkey_s_c k;
146 	int ret;
147 
148 	k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
149 			       bp_k->k.p,
150 			       BTREE_ITER_INTENT|
151 			       BTREE_ITER_SLOTS|
152 			       BTREE_ITER_WITH_UPDATES);
153 	ret = bkey_err(k);
154 	if (ret)
155 		goto err;
156 
157 	if (insert
158 	    ? k.k->type
159 	    : (k.k->type != KEY_TYPE_backpointer ||
160 	       memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
161 		ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
162 		if (ret)
163 			goto err;
164 	}
165 
166 	ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
167 err:
168 	bch2_trans_iter_exit(trans, &bp_iter);
169 	return ret;
170 }
171 
172 /*
173  * Find the next backpointer >= *bp_offset:
174  */
175 int bch2_get_next_backpointer(struct btree_trans *trans,
176 			      struct bpos bucket, int gen,
177 			      struct bpos *bp_pos,
178 			      struct bch_backpointer *bp,
179 			      unsigned iter_flags)
180 {
181 	struct bch_fs *c = trans->c;
182 	struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
183 	struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
184 	struct bkey_s_c k;
185 	int ret = 0;
186 
187 	if (bpos_ge(*bp_pos, bp_end_pos))
188 		goto done;
189 
190 	if (gen >= 0) {
191 		k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
192 				       bucket, BTREE_ITER_CACHED|iter_flags);
193 		ret = bkey_err(k);
194 		if (ret)
195 			goto out;
196 
197 		if (k.k->type != KEY_TYPE_alloc_v4 ||
198 		    bkey_s_c_to_alloc_v4(k).v->gen != gen)
199 			goto done;
200 	}
201 
202 	*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
203 
204 	for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
205 				     *bp_pos, iter_flags, k, ret) {
206 		if (bpos_ge(k.k->p, bp_end_pos))
207 			break;
208 
209 		*bp_pos = k.k->p;
210 		*bp = *bkey_s_c_to_backpointer(k).v;
211 		goto out;
212 	}
213 done:
214 	*bp_pos = SPOS_MAX;
215 out:
216 	bch2_trans_iter_exit(trans, &bp_iter);
217 	bch2_trans_iter_exit(trans, &alloc_iter);
218 	return ret;
219 }
220 
221 static void backpointer_not_found(struct btree_trans *trans,
222 				  struct bpos bp_pos,
223 				  struct bch_backpointer bp,
224 				  struct bkey_s_c k)
225 {
226 	struct bch_fs *c = trans->c;
227 	struct printbuf buf = PRINTBUF;
228 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
229 
230 	/*
231 	 * If we're using the btree write buffer, the backpointer we were
232 	 * looking at may have already been deleted - failure to find what it
233 	 * pointed to is not an error:
234 	 */
235 	if (likely(!bch2_backpointers_no_use_write_buffer))
236 		return;
237 
238 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
239 		   bp.level ? "btree node" : "extent");
240 	prt_printf(&buf, "bucket: ");
241 	bch2_bpos_to_text(&buf, bucket);
242 	prt_printf(&buf, "\n  ");
243 
244 	prt_printf(&buf, "backpointer pos: ");
245 	bch2_bpos_to_text(&buf, bp_pos);
246 	prt_printf(&buf, "\n  ");
247 
248 	bch2_backpointer_to_text(&buf, &bp);
249 	prt_printf(&buf, "\n  ");
250 	bch2_bkey_val_to_text(&buf, c, k);
251 	if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
252 		bch_err_ratelimited(c, "%s", buf.buf);
253 	else
254 		bch2_trans_inconsistent(trans, "%s", buf.buf);
255 
256 	printbuf_exit(&buf);
257 }
258 
259 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
260 					 struct btree_iter *iter,
261 					 struct bpos bp_pos,
262 					 struct bch_backpointer bp,
263 					 unsigned iter_flags)
264 {
265 	if (likely(!bp.level)) {
266 		struct bch_fs *c = trans->c;
267 		struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
268 		struct bkey_s_c k;
269 
270 		bch2_trans_node_iter_init(trans, iter,
271 					  bp.btree_id,
272 					  bp.pos,
273 					  0, 0,
274 					  iter_flags);
275 		k = bch2_btree_iter_peek_slot(iter);
276 		if (bkey_err(k)) {
277 			bch2_trans_iter_exit(trans, iter);
278 			return k;
279 		}
280 
281 		if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
282 			return k;
283 
284 		bch2_trans_iter_exit(trans, iter);
285 		backpointer_not_found(trans, bp_pos, bp, k);
286 		return bkey_s_c_null;
287 	} else {
288 		struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
289 
290 		if (IS_ERR_OR_NULL(b)) {
291 			bch2_trans_iter_exit(trans, iter);
292 			return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
293 		}
294 		return bkey_i_to_s_c(&b->key);
295 	}
296 }
297 
298 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
299 					struct btree_iter *iter,
300 					struct bpos bp_pos,
301 					struct bch_backpointer bp)
302 {
303 	struct bch_fs *c = trans->c;
304 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
305 	struct btree *b;
306 
307 	BUG_ON(!bp.level);
308 
309 	bch2_trans_node_iter_init(trans, iter,
310 				  bp.btree_id,
311 				  bp.pos,
312 				  0,
313 				  bp.level - 1,
314 				  0);
315 	b = bch2_btree_iter_peek_node(iter);
316 	if (IS_ERR_OR_NULL(b))
317 		goto err;
318 
319 	BUG_ON(b->c.level != bp.level - 1);
320 
321 	if (extent_matches_bp(c, bp.btree_id, bp.level,
322 			      bkey_i_to_s_c(&b->key),
323 			      bucket, bp))
324 		return b;
325 
326 	if (btree_node_will_make_reachable(b)) {
327 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
328 	} else {
329 		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
330 		b = NULL;
331 	}
332 err:
333 	bch2_trans_iter_exit(trans, iter);
334 	return b;
335 }
336 
337 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
338 					struct bkey_s_c k)
339 {
340 	struct bch_fs *c = trans->c;
341 	struct btree_iter alloc_iter = { NULL };
342 	struct bkey_s_c alloc_k;
343 	struct printbuf buf = PRINTBUF;
344 	int ret = 0;
345 
346 	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
347 			backpointer_to_missing_device,
348 			"backpointer for missing device:\n%s",
349 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
350 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
351 		goto out;
352 	}
353 
354 	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
355 				     bp_pos_to_bucket(c, k.k->p), 0);
356 	ret = bkey_err(alloc_k);
357 	if (ret)
358 		goto out;
359 
360 	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
361 			backpointer_to_missing_alloc,
362 			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
363 			alloc_iter.pos.inode, alloc_iter.pos.offset,
364 			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
365 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
366 		goto out;
367 	}
368 out:
369 fsck_err:
370 	bch2_trans_iter_exit(trans, &alloc_iter);
371 	printbuf_exit(&buf);
372 	return ret;
373 }
374 
375 /* verify that every backpointer has a corresponding alloc key */
376 int bch2_check_btree_backpointers(struct bch_fs *c)
377 {
378 	struct btree_iter iter;
379 	struct bkey_s_c k;
380 	int ret;
381 
382 	ret = bch2_trans_run(c,
383 		for_each_btree_key_commit(trans, iter,
384 			BTREE_ID_backpointers, POS_MIN, 0, k,
385 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
386 		  bch2_check_btree_backpointer(trans, &iter, k)));
387 	if (ret)
388 		bch_err_fn(c, ret);
389 	return ret;
390 }
391 
392 struct bpos_level {
393 	unsigned	level;
394 	struct bpos	pos;
395 };
396 
397 static int check_bp_exists(struct btree_trans *trans,
398 			   struct bpos bucket,
399 			   struct bch_backpointer bp,
400 			   struct bkey_s_c orig_k,
401 			   struct bpos bucket_start,
402 			   struct bpos bucket_end,
403 			   struct bpos_level *last_flushed)
404 {
405 	struct bch_fs *c = trans->c;
406 	struct btree_iter bp_iter = { NULL };
407 	struct printbuf buf = PRINTBUF;
408 	struct bkey_s_c bp_k;
409 	int ret;
410 
411 	if (bpos_lt(bucket, bucket_start) ||
412 	    bpos_gt(bucket, bucket_end))
413 		return 0;
414 
415 	if (!bch2_dev_bucket_exists(c, bucket))
416 		goto missing;
417 
418 	bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
419 				  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
420 				  0);
421 	ret = bkey_err(bp_k);
422 	if (ret)
423 		goto err;
424 
425 	if (bp_k.k->type != KEY_TYPE_backpointer ||
426 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
427 		if (last_flushed->level != bp.level ||
428 		    !bpos_eq(last_flushed->pos, orig_k.k->p)) {
429 			last_flushed->level = bp.level;
430 			last_flushed->pos = orig_k.k->p;
431 
432 			ret = bch2_btree_write_buffer_flush_sync(trans) ?:
433 				-BCH_ERR_transaction_restart_write_buffer_flush;
434 			goto out;
435 		}
436 		goto missing;
437 	}
438 out:
439 err:
440 fsck_err:
441 	bch2_trans_iter_exit(trans, &bp_iter);
442 	printbuf_exit(&buf);
443 	return ret;
444 missing:
445 	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
446 	       bch2_btree_id_str(bp.btree_id), bp.level);
447 	bch2_bkey_val_to_text(&buf, c, orig_k);
448 	prt_printf(&buf, "\nbp pos ");
449 	bch2_bpos_to_text(&buf, bp_iter.pos);
450 
451 	if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
452 	    c->opts.reconstruct_alloc ||
453 	    fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
454 		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
455 
456 	goto out;
457 }
458 
459 static int check_extent_to_backpointers(struct btree_trans *trans,
460 					struct btree_iter *iter,
461 					struct bpos bucket_start,
462 					struct bpos bucket_end,
463 					struct bpos_level *last_flushed)
464 {
465 	struct bch_fs *c = trans->c;
466 	struct bkey_ptrs_c ptrs;
467 	const union bch_extent_entry *entry;
468 	struct extent_ptr_decoded p;
469 	struct bkey_s_c k;
470 	int ret;
471 
472 	k = bch2_btree_iter_peek_all_levels(iter);
473 	ret = bkey_err(k);
474 	if (ret)
475 		return ret;
476 	if (!k.k)
477 		return 0;
478 
479 	ptrs = bch2_bkey_ptrs_c(k);
480 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
481 		struct bpos bucket_pos;
482 		struct bch_backpointer bp;
483 
484 		if (p.ptr.cached)
485 			continue;
486 
487 		bch2_extent_ptr_to_bp(c, iter->btree_id, iter->path->level,
488 				      k, p, &bucket_pos, &bp);
489 
490 		ret = check_bp_exists(trans, bucket_pos, bp, k,
491 				      bucket_start, bucket_end,
492 				      last_flushed);
493 		if (ret)
494 			return ret;
495 	}
496 
497 	return 0;
498 }
499 
500 static int check_btree_root_to_backpointers(struct btree_trans *trans,
501 					    enum btree_id btree_id,
502 					    struct bpos bucket_start,
503 					    struct bpos bucket_end,
504 					    struct bpos_level *last_flushed)
505 {
506 	struct bch_fs *c = trans->c;
507 	struct btree_root *r = bch2_btree_id_root(c, btree_id);
508 	struct btree_iter iter;
509 	struct btree *b;
510 	struct bkey_s_c k;
511 	struct bkey_ptrs_c ptrs;
512 	struct extent_ptr_decoded p;
513 	const union bch_extent_entry *entry;
514 	int ret;
515 
516 	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, r->level, 0);
517 	b = bch2_btree_iter_peek_node(&iter);
518 	ret = PTR_ERR_OR_ZERO(b);
519 	if (ret)
520 		goto err;
521 
522 	BUG_ON(b != btree_node_root(c, b));
523 
524 	k = bkey_i_to_s_c(&b->key);
525 	ptrs = bch2_bkey_ptrs_c(k);
526 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
527 		struct bpos bucket_pos;
528 		struct bch_backpointer bp;
529 
530 		if (p.ptr.cached)
531 			continue;
532 
533 		bch2_extent_ptr_to_bp(c, iter.btree_id, b->c.level + 1,
534 				      k, p, &bucket_pos, &bp);
535 
536 		ret = check_bp_exists(trans, bucket_pos, bp, k,
537 				      bucket_start, bucket_end,
538 				      last_flushed);
539 		if (ret)
540 			goto err;
541 	}
542 err:
543 	bch2_trans_iter_exit(trans, &iter);
544 	return ret;
545 }
546 
547 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
548 {
549 	return (struct bbpos) {
550 		.btree	= bp.btree_id,
551 		.pos	= bp.pos,
552 	};
553 }
554 
555 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
556 {
557 	struct sysinfo i;
558 	u64 mem_bytes;
559 
560 	si_meminfo(&i);
561 	mem_bytes = i.totalram * i.mem_unit;
562 	return div_u64(mem_bytes >> 1, btree_bytes(c));
563 }
564 
565 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
566 					unsigned btree_leaf_mask,
567 					unsigned btree_interior_mask,
568 					struct bbpos start, struct bbpos *end)
569 {
570 	struct btree_iter iter;
571 	struct bkey_s_c k;
572 	size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
573 	enum btree_id btree;
574 	int ret = 0;
575 
576 	for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
577 		unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
578 
579 		if (!((1U << btree) & btree_leaf_mask) &&
580 		    !((1U << btree) & btree_interior_mask))
581 			continue;
582 
583 		bch2_trans_node_iter_init(trans, &iter, btree,
584 					  btree == start.btree ? start.pos : POS_MIN,
585 					  0, depth, 0);
586 		/*
587 		 * for_each_btree_key_contineu() doesn't check the return value
588 		 * from bch2_btree_iter_advance(), which is needed when
589 		 * iterating over interior nodes where we'll see keys at
590 		 * SPOS_MAX:
591 		 */
592 		do {
593 			k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
594 			ret = bkey_err(k);
595 			if (!k.k || ret)
596 				break;
597 
598 			--btree_nodes;
599 			if (!btree_nodes) {
600 				*end = BBPOS(btree, k.k->p);
601 				bch2_trans_iter_exit(trans, &iter);
602 				return 0;
603 			}
604 		} while (bch2_btree_iter_advance(&iter));
605 		bch2_trans_iter_exit(trans, &iter);
606 	}
607 
608 	*end = BBPOS_MAX;
609 	return ret;
610 }
611 
612 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
613 						   struct bpos bucket_start,
614 						   struct bpos bucket_end)
615 {
616 	struct bch_fs *c = trans->c;
617 	struct btree_iter iter;
618 	enum btree_id btree_id;
619 	struct bpos_level last_flushed = { UINT_MAX, POS_MIN };
620 	int ret = 0;
621 
622 	for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
623 		unsigned depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
624 
625 		bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
626 					  depth,
627 					  BTREE_ITER_ALL_LEVELS|
628 					  BTREE_ITER_PREFETCH);
629 
630 		do {
631 			ret = commit_do(trans, NULL, NULL,
632 					BTREE_INSERT_LAZY_RW|
633 					BTREE_INSERT_NOFAIL,
634 					check_extent_to_backpointers(trans, &iter,
635 								bucket_start, bucket_end,
636 								&last_flushed));
637 			if (ret)
638 				break;
639 		} while (!bch2_btree_iter_advance(&iter));
640 
641 		bch2_trans_iter_exit(trans, &iter);
642 
643 		if (ret)
644 			break;
645 
646 		ret = commit_do(trans, NULL, NULL,
647 				BTREE_INSERT_LAZY_RW|
648 				BTREE_INSERT_NOFAIL,
649 				check_btree_root_to_backpointers(trans, btree_id,
650 							bucket_start, bucket_end,
651 							&last_flushed));
652 		if (ret)
653 			break;
654 	}
655 	return ret;
656 }
657 
658 static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
659 					 struct bpos bucket)
660 {
661 	return bch2_dev_exists2(c, bucket.inode)
662 		? bucket_pos_to_bp(c, bucket, 0)
663 		: bucket;
664 }
665 
666 static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
667 					struct bpos start, struct bpos *end)
668 {
669 	struct btree_iter alloc_iter;
670 	struct btree_iter bp_iter;
671 	struct bkey_s_c alloc_k, bp_k;
672 	size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
673 	bool alloc_end = false, bp_end = false;
674 	int ret = 0;
675 
676 	bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
677 				  start, 0, 1, 0);
678 	bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
679 				  bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
680 	while (1) {
681 		alloc_k = !alloc_end
682 			? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
683 			: bkey_s_c_null;
684 		bp_k = !bp_end
685 			? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
686 			: bkey_s_c_null;
687 
688 		ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
689 		if ((!alloc_k.k && !bp_k.k) || ret) {
690 			*end = SPOS_MAX;
691 			break;
692 		}
693 
694 		--btree_nodes;
695 		if (!btree_nodes) {
696 			*end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
697 			break;
698 		}
699 
700 		if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
701 		    bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
702 			if (!bch2_btree_iter_advance(&alloc_iter))
703 				alloc_end = true;
704 		} else {
705 			if (!bch2_btree_iter_advance(&bp_iter))
706 				bp_end = true;
707 		}
708 	}
709 	bch2_trans_iter_exit(trans, &bp_iter);
710 	bch2_trans_iter_exit(trans, &alloc_iter);
711 	return ret;
712 }
713 
714 int bch2_check_extents_to_backpointers(struct bch_fs *c)
715 {
716 	struct btree_trans *trans = bch2_trans_get(c);
717 	struct bpos start = POS_MIN, end;
718 	int ret;
719 
720 	while (1) {
721 		ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
722 		if (ret)
723 			break;
724 
725 		if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
726 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
727 				    __func__, btree_nodes_fit_in_ram(c));
728 
729 		if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
730 			struct printbuf buf = PRINTBUF;
731 
732 			prt_str(&buf, "check_extents_to_backpointers(): ");
733 			bch2_bpos_to_text(&buf, start);
734 			prt_str(&buf, "-");
735 			bch2_bpos_to_text(&buf, end);
736 
737 			bch_verbose(c, "%s", buf.buf);
738 			printbuf_exit(&buf);
739 		}
740 
741 		ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
742 		if (ret || bpos_eq(end, SPOS_MAX))
743 			break;
744 
745 		start = bpos_successor(end);
746 	}
747 	bch2_trans_put(trans);
748 
749 	if (ret)
750 		bch_err_fn(c, ret);
751 	return ret;
752 }
753 
754 static int check_one_backpointer(struct btree_trans *trans,
755 				 struct bbpos start,
756 				 struct bbpos end,
757 				 struct bkey_s_c_backpointer bp,
758 				 struct bpos *last_flushed_pos)
759 {
760 	struct bch_fs *c = trans->c;
761 	struct btree_iter iter;
762 	struct bbpos pos = bp_to_bbpos(*bp.v);
763 	struct bkey_s_c k;
764 	struct printbuf buf = PRINTBUF;
765 	int ret;
766 
767 	if (bbpos_cmp(pos, start) < 0 ||
768 	    bbpos_cmp(pos, end) > 0)
769 		return 0;
770 
771 	k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
772 	ret = bkey_err(k);
773 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
774 		return 0;
775 	if (ret)
776 		return ret;
777 
778 	if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
779 		*last_flushed_pos = bp.k->p;
780 		ret = bch2_btree_write_buffer_flush_sync(trans) ?:
781 			-BCH_ERR_transaction_restart_write_buffer_flush;
782 		goto out;
783 	}
784 
785 	if (fsck_err_on(!k.k, c,
786 			backpointer_to_missing_ptr,
787 			"backpointer for missing %s\n  %s",
788 			bp.v->level ? "btree node" : "extent",
789 			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
790 		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
791 		goto out;
792 	}
793 out:
794 fsck_err:
795 	bch2_trans_iter_exit(trans, &iter);
796 	printbuf_exit(&buf);
797 	return ret;
798 }
799 
800 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
801 						   struct bbpos start,
802 						   struct bbpos end)
803 {
804 	struct btree_iter iter;
805 	struct bkey_s_c k;
806 	struct bpos last_flushed_pos = SPOS_MAX;
807 
808 	return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
809 				  POS_MIN, BTREE_ITER_PREFETCH, k,
810 				  NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
811 		check_one_backpointer(trans, start, end,
812 				      bkey_s_c_to_backpointer(k),
813 				      &last_flushed_pos));
814 }
815 
816 int bch2_check_backpointers_to_extents(struct bch_fs *c)
817 {
818 	struct btree_trans *trans = bch2_trans_get(c);
819 	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
820 	int ret;
821 
822 	while (1) {
823 		ret = bch2_get_btree_in_memory_pos(trans,
824 						   (1U << BTREE_ID_extents)|
825 						   (1U << BTREE_ID_reflink),
826 						   ~0,
827 						   start, &end);
828 		if (ret)
829 			break;
830 
831 		if (!bbpos_cmp(start, BBPOS_MIN) &&
832 		    bbpos_cmp(end, BBPOS_MAX))
833 			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
834 				    __func__, btree_nodes_fit_in_ram(c));
835 
836 		if (bbpos_cmp(start, BBPOS_MIN) ||
837 		    bbpos_cmp(end, BBPOS_MAX)) {
838 			struct printbuf buf = PRINTBUF;
839 
840 			prt_str(&buf, "check_backpointers_to_extents(): ");
841 			bch2_bbpos_to_text(&buf, start);
842 			prt_str(&buf, "-");
843 			bch2_bbpos_to_text(&buf, end);
844 
845 			bch_verbose(c, "%s", buf.buf);
846 			printbuf_exit(&buf);
847 		}
848 
849 		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
850 		if (ret || !bbpos_cmp(end, BBPOS_MAX))
851 			break;
852 
853 		start = bbpos_successor(end);
854 	}
855 	bch2_trans_put(trans);
856 
857 	if (ret)
858 		bch_err_fn(c, ret);
859 	return ret;
860 }
861