xref: /linux/fs/bcachefs/backpointers.c (revision 64ed7d8190611c96744fd2b89afe6aeb3054902b)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "error.h"
13 
14 #include <linux/mm.h>
15 
16 static bool extent_matches_bp(struct bch_fs *c,
17 			      enum btree_id btree_id, unsigned level,
18 			      struct bkey_s_c k,
19 			      struct bpos bucket,
20 			      struct bch_backpointer bp)
21 {
22 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
23 	const union bch_extent_entry *entry;
24 	struct extent_ptr_decoded p;
25 
26 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
27 		struct bpos bucket2;
28 		struct bch_backpointer bp2;
29 
30 		if (p.ptr.cached)
31 			continue;
32 
33 		bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
34 		if (bpos_eq(bucket, bucket2) &&
35 		    !memcmp(&bp, &bp2, sizeof(bp)))
36 			return true;
37 	}
38 
39 	return false;
40 }
41 
42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
43 			     enum bkey_invalid_flags flags,
44 			     struct printbuf *err)
45 {
46 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
47 
48 	/* these will be caught by fsck */
49 	if (!bch2_dev_exists2(c, bp.k->p.inode))
50 		return 0;
51 
52 	struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
53 	struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
54 	int ret = 0;
55 
56 	bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
57 			 !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
58 			 c, err,
59 			 backpointer_bucket_offset_wrong,
60 			 "backpointer bucket_offset wrong");
61 fsck_err:
62 	return ret;
63 }
64 
65 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
66 {
67 	prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
68 	       bch2_btree_id_str(bp->btree_id),
69 	       bp->level,
70 	       (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
71 	       (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
72 	       bp->bucket_len);
73 	bch2_bpos_to_text(out, bp->pos);
74 }
75 
76 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
77 {
78 	if (bch2_dev_exists2(c, k.k->p.inode)) {
79 		prt_str(out, "bucket=");
80 		bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p));
81 		prt_str(out, " ");
82 	}
83 
84 	bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
85 }
86 
87 void bch2_backpointer_swab(struct bkey_s k)
88 {
89 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
90 
91 	bp.v->bucket_offset	= swab40(bp.v->bucket_offset);
92 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
93 	bch2_bpos_swab(&bp.v->pos);
94 }
95 
96 static noinline int backpointer_mod_err(struct btree_trans *trans,
97 					struct bch_backpointer bp,
98 					struct bkey_s_c bp_k,
99 					struct bkey_s_c orig_k,
100 					bool insert)
101 {
102 	struct bch_fs *c = trans->c;
103 	struct printbuf buf = PRINTBUF;
104 
105 	if (insert) {
106 		prt_printf(&buf, "existing backpointer found when inserting ");
107 		bch2_backpointer_to_text(&buf, &bp);
108 		prt_newline(&buf);
109 		printbuf_indent_add(&buf, 2);
110 
111 		prt_printf(&buf, "found ");
112 		bch2_bkey_val_to_text(&buf, c, bp_k);
113 		prt_newline(&buf);
114 
115 		prt_printf(&buf, "for ");
116 		bch2_bkey_val_to_text(&buf, c, orig_k);
117 
118 		bch_err(c, "%s", buf.buf);
119 	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
120 		prt_printf(&buf, "backpointer not found when deleting");
121 		prt_newline(&buf);
122 		printbuf_indent_add(&buf, 2);
123 
124 		prt_printf(&buf, "searching for ");
125 		bch2_backpointer_to_text(&buf, &bp);
126 		prt_newline(&buf);
127 
128 		prt_printf(&buf, "got ");
129 		bch2_bkey_val_to_text(&buf, c, bp_k);
130 		prt_newline(&buf);
131 
132 		prt_printf(&buf, "for ");
133 		bch2_bkey_val_to_text(&buf, c, orig_k);
134 
135 		bch_err(c, "%s", buf.buf);
136 	}
137 
138 	printbuf_exit(&buf);
139 
140 	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
141 		return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
142 	} else {
143 		return 0;
144 	}
145 }
146 
147 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
148 				struct bpos bucket,
149 				struct bch_backpointer bp,
150 				struct bkey_s_c orig_k,
151 				bool insert)
152 {
153 	struct btree_iter bp_iter;
154 	struct bkey_s_c k;
155 	struct bkey_i_backpointer *bp_k;
156 	int ret;
157 
158 	bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
159 	ret = PTR_ERR_OR_ZERO(bp_k);
160 	if (ret)
161 		return ret;
162 
163 	bkey_backpointer_init(&bp_k->k_i);
164 	bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
165 	bp_k->v = bp;
166 
167 	if (!insert) {
168 		bp_k->k.type = KEY_TYPE_deleted;
169 		set_bkey_val_u64s(&bp_k->k, 0);
170 	}
171 
172 	k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
173 			       bp_k->k.p,
174 			       BTREE_ITER_INTENT|
175 			       BTREE_ITER_SLOTS|
176 			       BTREE_ITER_WITH_UPDATES);
177 	ret = bkey_err(k);
178 	if (ret)
179 		goto err;
180 
181 	if (insert
182 	    ? k.k->type
183 	    : (k.k->type != KEY_TYPE_backpointer ||
184 	       memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
185 		ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
186 		if (ret)
187 			goto err;
188 	}
189 
190 	ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
191 err:
192 	bch2_trans_iter_exit(trans, &bp_iter);
193 	return ret;
194 }
195 
196 /*
197  * Find the next backpointer >= *bp_offset:
198  */
199 int bch2_get_next_backpointer(struct btree_trans *trans,
200 			      struct bpos bucket, int gen,
201 			      struct bpos *bp_pos,
202 			      struct bch_backpointer *bp,
203 			      unsigned iter_flags)
204 {
205 	struct bch_fs *c = trans->c;
206 	struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0);
207 	struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
208 	struct bkey_s_c k;
209 	int ret = 0;
210 
211 	if (bpos_ge(*bp_pos, bp_end_pos))
212 		goto done;
213 
214 	if (gen >= 0) {
215 		k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
216 				       bucket, BTREE_ITER_CACHED|iter_flags);
217 		ret = bkey_err(k);
218 		if (ret)
219 			goto out;
220 
221 		if (k.k->type != KEY_TYPE_alloc_v4 ||
222 		    bkey_s_c_to_alloc_v4(k).v->gen != gen)
223 			goto done;
224 	}
225 
226 	*bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0));
227 
228 	for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
229 				     *bp_pos, iter_flags, k, ret) {
230 		if (bpos_ge(k.k->p, bp_end_pos))
231 			break;
232 
233 		*bp_pos = k.k->p;
234 		*bp = *bkey_s_c_to_backpointer(k).v;
235 		goto out;
236 	}
237 done:
238 	*bp_pos = SPOS_MAX;
239 out:
240 	bch2_trans_iter_exit(trans, &bp_iter);
241 	bch2_trans_iter_exit(trans, &alloc_iter);
242 	return ret;
243 }
244 
245 static void backpointer_not_found(struct btree_trans *trans,
246 				  struct bpos bp_pos,
247 				  struct bch_backpointer bp,
248 				  struct bkey_s_c k)
249 {
250 	struct bch_fs *c = trans->c;
251 	struct printbuf buf = PRINTBUF;
252 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
253 
254 	/*
255 	 * If we're using the btree write buffer, the backpointer we were
256 	 * looking at may have already been deleted - failure to find what it
257 	 * pointed to is not an error:
258 	 */
259 	if (likely(!bch2_backpointers_no_use_write_buffer))
260 		return;
261 
262 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
263 		   bp.level ? "btree node" : "extent");
264 	prt_printf(&buf, "bucket: ");
265 	bch2_bpos_to_text(&buf, bucket);
266 	prt_printf(&buf, "\n  ");
267 
268 	prt_printf(&buf, "backpointer pos: ");
269 	bch2_bpos_to_text(&buf, bp_pos);
270 	prt_printf(&buf, "\n  ");
271 
272 	bch2_backpointer_to_text(&buf, &bp);
273 	prt_printf(&buf, "\n  ");
274 	bch2_bkey_val_to_text(&buf, c, k);
275 	if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
276 		bch_err_ratelimited(c, "%s", buf.buf);
277 	else
278 		bch2_trans_inconsistent(trans, "%s", buf.buf);
279 
280 	printbuf_exit(&buf);
281 }
282 
283 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
284 					 struct btree_iter *iter,
285 					 struct bpos bp_pos,
286 					 struct bch_backpointer bp,
287 					 unsigned iter_flags)
288 {
289 	if (likely(!bp.level)) {
290 		struct bch_fs *c = trans->c;
291 		struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
292 		struct bkey_s_c k;
293 
294 		bch2_trans_node_iter_init(trans, iter,
295 					  bp.btree_id,
296 					  bp.pos,
297 					  0, 0,
298 					  iter_flags);
299 		k = bch2_btree_iter_peek_slot(iter);
300 		if (bkey_err(k)) {
301 			bch2_trans_iter_exit(trans, iter);
302 			return k;
303 		}
304 
305 		if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
306 			return k;
307 
308 		bch2_trans_iter_exit(trans, iter);
309 		backpointer_not_found(trans, bp_pos, bp, k);
310 		return bkey_s_c_null;
311 	} else {
312 		struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
313 
314 		if (IS_ERR_OR_NULL(b)) {
315 			bch2_trans_iter_exit(trans, iter);
316 			return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
317 		}
318 		return bkey_i_to_s_c(&b->key);
319 	}
320 }
321 
322 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
323 					struct btree_iter *iter,
324 					struct bpos bp_pos,
325 					struct bch_backpointer bp)
326 {
327 	struct bch_fs *c = trans->c;
328 	struct bpos bucket = bp_pos_to_bucket(c, bp_pos);
329 	struct btree *b;
330 
331 	BUG_ON(!bp.level);
332 
333 	bch2_trans_node_iter_init(trans, iter,
334 				  bp.btree_id,
335 				  bp.pos,
336 				  0,
337 				  bp.level - 1,
338 				  0);
339 	b = bch2_btree_iter_peek_node(iter);
340 	if (IS_ERR_OR_NULL(b))
341 		goto err;
342 
343 	BUG_ON(b->c.level != bp.level - 1);
344 
345 	if (extent_matches_bp(c, bp.btree_id, bp.level,
346 			      bkey_i_to_s_c(&b->key),
347 			      bucket, bp))
348 		return b;
349 
350 	if (btree_node_will_make_reachable(b)) {
351 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
352 	} else {
353 		backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
354 		b = NULL;
355 	}
356 err:
357 	bch2_trans_iter_exit(trans, iter);
358 	return b;
359 }
360 
361 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
362 					struct bkey_s_c k)
363 {
364 	struct bch_fs *c = trans->c;
365 	struct btree_iter alloc_iter = { NULL };
366 	struct bkey_s_c alloc_k;
367 	struct printbuf buf = PRINTBUF;
368 	int ret = 0;
369 
370 	if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
371 			backpointer_to_missing_device,
372 			"backpointer for missing device:\n%s",
373 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
374 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
375 		goto out;
376 	}
377 
378 	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
379 				     bp_pos_to_bucket(c, k.k->p), 0);
380 	ret = bkey_err(alloc_k);
381 	if (ret)
382 		goto out;
383 
384 	if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c,
385 			backpointer_to_missing_alloc,
386 			"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
387 			alloc_iter.pos.inode, alloc_iter.pos.offset,
388 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
389 		ret = bch2_btree_delete_at(trans, bp_iter, 0);
390 		goto out;
391 	}
392 out:
393 fsck_err:
394 	bch2_trans_iter_exit(trans, &alloc_iter);
395 	printbuf_exit(&buf);
396 	return ret;
397 }
398 
399 /* verify that every backpointer has a corresponding alloc key */
400 int bch2_check_btree_backpointers(struct bch_fs *c)
401 {
402 	int ret = bch2_trans_run(c,
403 		for_each_btree_key_commit(trans, iter,
404 			BTREE_ID_backpointers, POS_MIN, 0, k,
405 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
406 		  bch2_check_btree_backpointer(trans, &iter, k)));
407 	bch_err_fn(c, ret);
408 	return ret;
409 }
410 
411 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
412 {
413 	return bpos_eq(l.k->p, r.k->p) &&
414 		bkey_bytes(l.k) == bkey_bytes(r.k) &&
415 		!memcmp(l.v, r.v, bkey_val_bytes(l.k));
416 }
417 
418 struct extents_to_bp_state {
419 	struct bpos	bucket_start;
420 	struct bpos	bucket_end;
421 	struct bkey_buf last_flushed;
422 };
423 
424 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
425 			       struct bkey_s_c extent, unsigned dev)
426 {
427 	struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
428 	int ret = PTR_ERR_OR_ZERO(n);
429 	if (ret)
430 		return ret;
431 
432 	bch2_bkey_drop_device(bkey_i_to_s(n), dev);
433 	return bch2_btree_insert_trans(trans, btree, n, 0);
434 }
435 
436 static int check_extent_checksum(struct btree_trans *trans,
437 				 enum btree_id btree, struct bkey_s_c extent,
438 				 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
439 {
440 	struct bch_fs *c = trans->c;
441 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
442 	const union bch_extent_entry *entry;
443 	struct extent_ptr_decoded p;
444 	struct printbuf buf = PRINTBUF;
445 	void *data_buf = NULL;
446 	struct bio *bio = NULL;
447 	size_t bytes;
448 	int ret = 0;
449 
450 	if (bkey_is_btree_ptr(extent.k))
451 		return false;
452 
453 	bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
454 		if (p.ptr.dev == dev)
455 			goto found;
456 	BUG();
457 found:
458 	if (!p.crc.csum_type)
459 		return false;
460 
461 	bytes = p.crc.compressed_size << 9;
462 
463 	struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
464 	if (!bch2_dev_get_ioref(ca, READ))
465 		return false;
466 
467 	data_buf = kvmalloc(bytes, GFP_KERNEL);
468 	if (!data_buf) {
469 		ret = -ENOMEM;
470 		goto err;
471 	}
472 
473 	bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
474 	bio->bi_iter.bi_sector = p.ptr.offset;
475 	bch2_bio_map(bio, data_buf, bytes);
476 	ret = submit_bio_wait(bio);
477 	if (ret)
478 		goto err;
479 
480 	prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
481 	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
482 	bch2_bkey_val_to_text(&buf, c, extent);
483 	prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
484 	bch2_bkey_val_to_text(&buf, c, extent2);
485 
486 	struct nonce nonce = extent_nonce(extent.k->version, p.crc);
487 	struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
488 	if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
489 			c, dup_backpointer_to_bad_csum_extent,
490 			"%s", buf.buf))
491 		ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
492 fsck_err:
493 err:
494 	if (bio)
495 		bio_put(bio);
496 	kvfree(data_buf);
497 	percpu_ref_put(&ca->io_ref);
498 	printbuf_exit(&buf);
499 	return ret;
500 }
501 
502 static int check_bp_exists(struct btree_trans *trans,
503 			   struct extents_to_bp_state *s,
504 			   struct bpos bucket,
505 			   struct bch_backpointer bp,
506 			   struct bkey_s_c orig_k)
507 {
508 	struct bch_fs *c = trans->c;
509 	struct btree_iter bp_iter = {};
510 	struct btree_iter other_extent_iter = {};
511 	struct printbuf buf = PRINTBUF;
512 	struct bkey_s_c bp_k;
513 	struct bkey_buf tmp;
514 	int ret;
515 
516 	bch2_bkey_buf_init(&tmp);
517 
518 	if (!bch2_dev_bucket_exists(c, bucket)) {
519 		prt_str(&buf, "extent for nonexistent device:bucket ");
520 		bch2_bpos_to_text(&buf, bucket);
521 		prt_str(&buf, "\n  ");
522 		bch2_bkey_val_to_text(&buf, c, orig_k);
523 		bch_err(c, "%s", buf.buf);
524 		return -BCH_ERR_fsck_repair_unimplemented;
525 	}
526 
527 	if (bpos_lt(bucket, s->bucket_start) ||
528 	    bpos_gt(bucket, s->bucket_end))
529 		return 0;
530 
531 	bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
532 				  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
533 				  0);
534 	ret = bkey_err(bp_k);
535 	if (ret)
536 		goto err;
537 
538 	if (bp_k.k->type != KEY_TYPE_backpointer ||
539 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
540 		bch2_bkey_buf_reassemble(&tmp, c, orig_k);
541 
542 		if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
543 			if (bp.level) {
544 				bch2_trans_unlock(trans);
545 				bch2_btree_interior_updates_flush(c);
546 			}
547 
548 			ret = bch2_btree_write_buffer_flush_sync(trans);
549 			if (ret)
550 				goto err;
551 
552 			bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
553 			ret = -BCH_ERR_transaction_restart_write_buffer_flush;
554 			goto out;
555 		}
556 
557 		goto check_existing_bp;
558 	}
559 out:
560 err:
561 fsck_err:
562 	bch2_trans_iter_exit(trans, &other_extent_iter);
563 	bch2_trans_iter_exit(trans, &bp_iter);
564 	bch2_bkey_buf_exit(&tmp, c);
565 	printbuf_exit(&buf);
566 	return ret;
567 check_existing_bp:
568 	/* Do we have a backpointer for a different extent? */
569 	if (bp_k.k->type != KEY_TYPE_backpointer)
570 		goto missing;
571 
572 	struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
573 
574 	struct bkey_s_c other_extent =
575 		bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
576 	ret = bkey_err(other_extent);
577 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
578 		ret = 0;
579 	if (ret)
580 		goto err;
581 
582 	if (!other_extent.k)
583 		goto missing;
584 
585 	if (bch2_extents_match(orig_k, other_extent)) {
586 		printbuf_reset(&buf);
587 		prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
588 		bch2_bkey_val_to_text(&buf, c, orig_k);
589 		prt_str(&buf, "\n  ");
590 		bch2_bkey_val_to_text(&buf, c, other_extent);
591 		bch_err(c, "%s", buf.buf);
592 
593 		if (other_extent.k->size <= orig_k.k->size) {
594 			ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
595 			if (ret)
596 				goto err;
597 			goto out;
598 		} else {
599 			ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
600 			if (ret)
601 				goto err;
602 			goto missing;
603 		}
604 	}
605 
606 	ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
607 	if (ret < 0)
608 		goto err;
609 	if (ret) {
610 		ret = 0;
611 		goto missing;
612 	}
613 
614 	ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
615 	if (ret < 0)
616 		goto err;
617 	if (ret) {
618 		ret = 0;
619 		goto out;
620 	}
621 
622 	printbuf_reset(&buf);
623 	prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
624 	bch2_bkey_val_to_text(&buf, c, orig_k);
625 	prt_str(&buf, "\n  ");
626 	bch2_bkey_val_to_text(&buf, c, other_extent);
627 	bch_err(c, "%s", buf.buf);
628 	ret = -BCH_ERR_fsck_repair_unimplemented;
629 	goto err;
630 missing:
631 	printbuf_reset(&buf);
632 	prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
633 	       bch2_btree_id_str(bp.btree_id), bp.level);
634 	bch2_bkey_val_to_text(&buf, c, orig_k);
635 	prt_printf(&buf, "\n  got:   ");
636 	bch2_bkey_val_to_text(&buf, c, bp_k);
637 
638 	struct bkey_i_backpointer n_bp_k;
639 	bkey_backpointer_init(&n_bp_k.k_i);
640 	n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
641 	n_bp_k.v = bp;
642 	prt_printf(&buf, "\n  want:  ");
643 	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
644 
645 	if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
646 		ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
647 
648 	goto out;
649 }
650 
651 static int check_extent_to_backpointers(struct btree_trans *trans,
652 					struct extents_to_bp_state *s,
653 					enum btree_id btree, unsigned level,
654 					struct bkey_s_c k)
655 {
656 	struct bch_fs *c = trans->c;
657 	struct bkey_ptrs_c ptrs;
658 	const union bch_extent_entry *entry;
659 	struct extent_ptr_decoded p;
660 	int ret;
661 
662 	ptrs = bch2_bkey_ptrs_c(k);
663 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
664 		struct bpos bucket_pos;
665 		struct bch_backpointer bp;
666 
667 		if (p.ptr.cached)
668 			continue;
669 
670 		bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
671 
672 		ret = check_bp_exists(trans, s, bucket_pos, bp, k);
673 		if (ret)
674 			return ret;
675 	}
676 
677 	return 0;
678 }
679 
680 static int check_btree_root_to_backpointers(struct btree_trans *trans,
681 					    struct extents_to_bp_state *s,
682 					    enum btree_id btree_id,
683 					    int *level)
684 {
685 	struct bch_fs *c = trans->c;
686 	struct btree_iter iter;
687 	struct btree *b;
688 	struct bkey_s_c k;
689 	int ret;
690 retry:
691 	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
692 				  0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
693 	b = bch2_btree_iter_peek_node(&iter);
694 	ret = PTR_ERR_OR_ZERO(b);
695 	if (ret)
696 		goto err;
697 
698 	if (b != btree_node_root(c, b)) {
699 		bch2_trans_iter_exit(trans, &iter);
700 		goto retry;
701 	}
702 
703 	*level = b->c.level;
704 
705 	k = bkey_i_to_s_c(&b->key);
706 	ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
707 err:
708 	bch2_trans_iter_exit(trans, &iter);
709 	return ret;
710 }
711 
712 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
713 {
714 	return (struct bbpos) {
715 		.btree	= bp.btree_id,
716 		.pos	= bp.pos,
717 	};
718 }
719 
720 static u64 mem_may_pin_bytes(struct bch_fs *c)
721 {
722 	struct sysinfo i;
723 	si_meminfo(&i);
724 
725 	u64 mem_bytes = i.totalram * i.mem_unit;
726 	return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
727 }
728 
729 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
730 {
731 	return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
732 }
733 
734 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
735 					u64 btree_leaf_mask,
736 					u64 btree_interior_mask,
737 					struct bbpos start, struct bbpos *end)
738 {
739 	struct bch_fs *c = trans->c;
740 	s64 mem_may_pin = mem_may_pin_bytes(c);
741 	int ret = 0;
742 
743 	btree_interior_mask |= btree_leaf_mask;
744 
745 	c->btree_cache.pinned_nodes_leaf_mask		= btree_leaf_mask;
746 	c->btree_cache.pinned_nodes_interior_mask	= btree_interior_mask;
747 	c->btree_cache.pinned_nodes_start		= start;
748 	c->btree_cache.pinned_nodes_end			= *end = BBPOS_MAX;
749 
750 	for (enum btree_id btree = start.btree;
751 	     btree < BTREE_ID_NR && !ret;
752 	     btree++) {
753 		unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
754 		struct btree_iter iter;
755 		struct btree *b;
756 
757 		if (!((1U << btree) & btree_leaf_mask) &&
758 		    !((1U << btree) & btree_interior_mask))
759 			continue;
760 
761 		__for_each_btree_node(trans, iter, btree,
762 				      btree == start.btree ? start.pos : POS_MIN,
763 				      0, depth, BTREE_ITER_PREFETCH, b, ret) {
764 			mem_may_pin -= btree_buf_bytes(b);
765 			if (mem_may_pin <= 0) {
766 				c->btree_cache.pinned_nodes_end = *end =
767 					BBPOS(btree, b->key.k.p);
768 				bch2_trans_iter_exit(trans, &iter);
769 				return 0;
770 			}
771 		}
772 		bch2_trans_iter_exit(trans, &iter);
773 	}
774 
775 	return ret;
776 }
777 
778 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
779 						   struct extents_to_bp_state *s)
780 {
781 	struct bch_fs *c = trans->c;
782 	int ret = 0;
783 
784 	for (enum btree_id btree_id = 0;
785 	     btree_id < btree_id_nr_alive(c);
786 	     btree_id++) {
787 		int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
788 
789 		ret = commit_do(trans, NULL, NULL,
790 				BCH_TRANS_COMMIT_no_enospc,
791 				check_btree_root_to_backpointers(trans, s, btree_id, &level));
792 		if (ret)
793 			return ret;
794 
795 		while (level >= depth) {
796 			struct btree_iter iter;
797 			bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
798 						  level,
799 						  BTREE_ITER_PREFETCH);
800 			while (1) {
801 				bch2_trans_begin(trans);
802 
803 				struct bkey_s_c k = bch2_btree_iter_peek(&iter);
804 				if (!k.k)
805 					break;
806 				ret = bkey_err(k) ?:
807 					check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
808 					bch2_trans_commit(trans, NULL, NULL,
809 							  BCH_TRANS_COMMIT_no_enospc);
810 				if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
811 					ret = 0;
812 					continue;
813 				}
814 				if (ret)
815 					break;
816 				if (bpos_eq(iter.pos, SPOS_MAX))
817 					break;
818 				bch2_btree_iter_advance(&iter);
819 			}
820 			bch2_trans_iter_exit(trans, &iter);
821 
822 			if (ret)
823 				return ret;
824 
825 			--level;
826 		}
827 	}
828 
829 	return 0;
830 }
831 
832 int bch2_check_extents_to_backpointers(struct bch_fs *c)
833 {
834 	struct btree_trans *trans = bch2_trans_get(c);
835 	struct extents_to_bp_state s = { .bucket_start = POS_MIN };
836 	int ret;
837 
838 	bch2_bkey_buf_init(&s.last_flushed);
839 	bkey_init(&s.last_flushed.k->k);
840 
841 	while (1) {
842 		struct bbpos end;
843 		ret = bch2_get_btree_in_memory_pos(trans,
844 				BIT_ULL(BTREE_ID_backpointers),
845 				BIT_ULL(BTREE_ID_backpointers),
846 				BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
847 		if (ret)
848 			break;
849 
850 		s.bucket_end = end.pos;
851 
852 		if ( bpos_eq(s.bucket_start, POS_MIN) &&
853 		    !bpos_eq(s.bucket_end, SPOS_MAX))
854 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
855 				    __func__, btree_nodes_fit_in_ram(c));
856 
857 		if (!bpos_eq(s.bucket_start, POS_MIN) ||
858 		    !bpos_eq(s.bucket_end, SPOS_MAX)) {
859 			struct printbuf buf = PRINTBUF;
860 
861 			prt_str(&buf, "check_extents_to_backpointers(): ");
862 			bch2_bpos_to_text(&buf, s.bucket_start);
863 			prt_str(&buf, "-");
864 			bch2_bpos_to_text(&buf, s.bucket_end);
865 
866 			bch_verbose(c, "%s", buf.buf);
867 			printbuf_exit(&buf);
868 		}
869 
870 		ret = bch2_check_extents_to_backpointers_pass(trans, &s);
871 		if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
872 			break;
873 
874 		s.bucket_start = bpos_successor(s.bucket_end);
875 	}
876 	bch2_trans_put(trans);
877 	bch2_bkey_buf_exit(&s.last_flushed, c);
878 
879 	c->btree_cache.pinned_nodes_leaf_mask = 0;
880 	c->btree_cache.pinned_nodes_interior_mask = 0;
881 
882 	bch_err_fn(c, ret);
883 	return ret;
884 }
885 
886 static int check_one_backpointer(struct btree_trans *trans,
887 				 struct bbpos start,
888 				 struct bbpos end,
889 				 struct bkey_s_c_backpointer bp,
890 				 struct bpos *last_flushed_pos)
891 {
892 	struct bch_fs *c = trans->c;
893 	struct btree_iter iter;
894 	struct bbpos pos = bp_to_bbpos(*bp.v);
895 	struct bkey_s_c k;
896 	struct printbuf buf = PRINTBUF;
897 	int ret;
898 
899 	if (bbpos_cmp(pos, start) < 0 ||
900 	    bbpos_cmp(pos, end) > 0)
901 		return 0;
902 
903 	k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
904 	ret = bkey_err(k);
905 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
906 		return 0;
907 	if (ret)
908 		return ret;
909 
910 	if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
911 		*last_flushed_pos = bp.k->p;
912 		ret = bch2_btree_write_buffer_flush_sync(trans) ?:
913 			-BCH_ERR_transaction_restart_write_buffer_flush;
914 		goto out;
915 	}
916 
917 	if (fsck_err_on(!k.k, c,
918 			backpointer_to_missing_ptr,
919 			"backpointer for missing %s\n  %s",
920 			bp.v->level ? "btree node" : "extent",
921 			(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
922 		ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
923 		goto out;
924 	}
925 out:
926 fsck_err:
927 	bch2_trans_iter_exit(trans, &iter);
928 	printbuf_exit(&buf);
929 	return ret;
930 }
931 
932 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
933 						   struct bbpos start,
934 						   struct bbpos end)
935 {
936 	struct bpos last_flushed_pos = SPOS_MAX;
937 
938 	return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
939 				  POS_MIN, BTREE_ITER_PREFETCH, k,
940 				  NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
941 		check_one_backpointer(trans, start, end,
942 				      bkey_s_c_to_backpointer(k),
943 				      &last_flushed_pos));
944 }
945 
946 int bch2_check_backpointers_to_extents(struct bch_fs *c)
947 {
948 	struct btree_trans *trans = bch2_trans_get(c);
949 	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
950 	int ret;
951 
952 	while (1) {
953 		ret = bch2_get_btree_in_memory_pos(trans,
954 						   (1U << BTREE_ID_extents)|
955 						   (1U << BTREE_ID_reflink),
956 						   ~0,
957 						   start, &end);
958 		if (ret)
959 			break;
960 
961 		if (!bbpos_cmp(start, BBPOS_MIN) &&
962 		    bbpos_cmp(end, BBPOS_MAX))
963 			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
964 				    __func__, btree_nodes_fit_in_ram(c));
965 
966 		if (bbpos_cmp(start, BBPOS_MIN) ||
967 		    bbpos_cmp(end, BBPOS_MAX)) {
968 			struct printbuf buf = PRINTBUF;
969 
970 			prt_str(&buf, "check_backpointers_to_extents(): ");
971 			bch2_bbpos_to_text(&buf, start);
972 			prt_str(&buf, "-");
973 			bch2_bbpos_to_text(&buf, end);
974 
975 			bch_verbose(c, "%s", buf.buf);
976 			printbuf_exit(&buf);
977 		}
978 
979 		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
980 		if (ret || !bbpos_cmp(end, BBPOS_MAX))
981 			break;
982 
983 		start = bbpos_successor(end);
984 	}
985 	bch2_trans_put(trans);
986 
987 	c->btree_cache.pinned_nodes_leaf_mask = 0;
988 	c->btree_cache.pinned_nodes_interior_mask = 0;
989 
990 	bch_err_fn(c, ret);
991 	return ret;
992 }
993