xref: /linux/fs/bcachefs/backpointers.c (revision 56770e24f678a84a21f21bcc1ae9cbc1364677bd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "bbpos.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "bkey_buf.h"
7 #include "btree_cache.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_write_buffer.h"
11 #include "checksum.h"
12 #include "disk_accounting.h"
13 #include "error.h"
14 #include "progress.h"
15 
16 #include <linux/mm.h>
17 
bch2_backpointer_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)18 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
19 			      struct bkey_validate_context from)
20 {
21 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
22 	int ret = 0;
23 
24 	bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
25 			 c, backpointer_level_bad,
26 			 "backpointer level bad: %u >= %u",
27 			 bp.v->level, BTREE_MAX_DEPTH);
28 
29 	bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID,
30 			 c, backpointer_dev_bad,
31 			 "backpointer for BCH_SB_MEMBER_INVALID");
32 fsck_err:
33 	return ret;
34 }
35 
bch2_backpointer_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)36 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
37 {
38 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
39 
40 	rcu_read_lock();
41 	struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
42 	if (ca) {
43 		u32 bucket_offset;
44 		struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset);
45 		rcu_read_unlock();
46 		prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset);
47 	} else {
48 		rcu_read_unlock();
49 		prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT);
50 	}
51 
52 	bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level);
53 	prt_str(out, " data_type=");
54 	bch2_prt_data_type(out, bp.v->data_type);
55 	prt_printf(out, " suboffset=%u len=%u gen=%u pos=",
56 		   (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
57 		   bp.v->bucket_len,
58 		   bp.v->bucket_gen);
59 	bch2_bpos_to_text(out, bp.v->pos);
60 }
61 
bch2_backpointer_swab(struct bkey_s k)62 void bch2_backpointer_swab(struct bkey_s k)
63 {
64 	struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
65 
66 	bp.v->bucket_len	= swab32(bp.v->bucket_len);
67 	bch2_bpos_swab(&bp.v->pos);
68 }
69 
extent_matches_bp(struct bch_fs * c,enum btree_id btree_id,unsigned level,struct bkey_s_c k,struct bkey_s_c_backpointer bp)70 static bool extent_matches_bp(struct bch_fs *c,
71 			      enum btree_id btree_id, unsigned level,
72 			      struct bkey_s_c k,
73 			      struct bkey_s_c_backpointer bp)
74 {
75 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
76 	const union bch_extent_entry *entry;
77 	struct extent_ptr_decoded p;
78 
79 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
80 		struct bkey_i_backpointer bp2;
81 		bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2);
82 
83 		if (bpos_eq(bp.k->p, bp2.k.p) &&
84 		    !memcmp(bp.v, &bp2.v, sizeof(bp2.v)))
85 			return true;
86 	}
87 
88 	return false;
89 }
90 
backpointer_mod_err(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * new_bp,struct bkey_s_c found_bp,bool insert)91 static noinline int backpointer_mod_err(struct btree_trans *trans,
92 					struct bkey_s_c orig_k,
93 					struct bkey_i_backpointer *new_bp,
94 					struct bkey_s_c found_bp,
95 					bool insert)
96 {
97 	struct bch_fs *c = trans->c;
98 	struct printbuf buf = PRINTBUF;
99 	int ret = 0;
100 
101 	if (insert) {
102 		prt_printf(&buf, "existing backpointer found when inserting ");
103 		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
104 		prt_newline(&buf);
105 		printbuf_indent_add(&buf, 2);
106 
107 		prt_printf(&buf, "found ");
108 		bch2_bkey_val_to_text(&buf, c, found_bp);
109 		prt_newline(&buf);
110 
111 		prt_printf(&buf, "for ");
112 		bch2_bkey_val_to_text(&buf, c, orig_k);
113 
114 		bch_err(c, "%s", buf.buf);
115 	} else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
116 		prt_printf(&buf, "backpointer not found when deleting\n");
117 		printbuf_indent_add(&buf, 2);
118 
119 		prt_printf(&buf, "searching for ");
120 		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i));
121 		prt_newline(&buf);
122 
123 		prt_printf(&buf, "got ");
124 		bch2_bkey_val_to_text(&buf, c, found_bp);
125 		prt_newline(&buf);
126 
127 		prt_printf(&buf, "for ");
128 		bch2_bkey_val_to_text(&buf, c, orig_k);
129 	}
130 
131 	if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
132 	    __bch2_inconsistent_error(c, &buf))
133 		ret = -BCH_ERR_erofs_unfixed_errors;
134 
135 	bch_err(c, "%s", buf.buf);
136 	printbuf_exit(&buf);
137 	return ret;
138 }
139 
bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans * trans,struct bkey_s_c orig_k,struct bkey_i_backpointer * bp,bool insert)140 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
141 				struct bkey_s_c orig_k,
142 				struct bkey_i_backpointer *bp,
143 				bool insert)
144 {
145 	struct btree_iter bp_iter;
146 	struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
147 			       bp->k.p,
148 			       BTREE_ITER_intent|
149 			       BTREE_ITER_slots|
150 			       BTREE_ITER_with_updates);
151 	int ret = bkey_err(k);
152 	if (ret)
153 		return ret;
154 
155 	if (insert
156 	    ? k.k->type
157 	    : (k.k->type != KEY_TYPE_backpointer ||
158 	       memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) {
159 		ret = backpointer_mod_err(trans, orig_k, bp, k, insert);
160 		if (ret)
161 			goto err;
162 	}
163 
164 	if (!insert) {
165 		bp->k.type = KEY_TYPE_deleted;
166 		set_bkey_val_u64s(&bp->k, 0);
167 	}
168 
169 	ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0);
170 err:
171 	bch2_trans_iter_exit(trans, &bp_iter);
172 	return ret;
173 }
174 
bch2_backpointer_del(struct btree_trans * trans,struct bpos pos)175 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos)
176 {
177 	return (likely(!bch2_backpointers_no_use_write_buffer)
178 		? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos)
179 		: bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?:
180 		 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
181 }
182 
bch2_backpointers_maybe_flush(struct btree_trans * trans,struct bkey_s_c visiting_k,struct bkey_buf * last_flushed)183 static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans,
184 					 struct bkey_s_c visiting_k,
185 					 struct bkey_buf *last_flushed)
186 {
187 	return likely(!bch2_backpointers_no_use_write_buffer)
188 		? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed)
189 		: 0;
190 }
191 
backpointer_target_not_found(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct bkey_s_c target_k,struct bkey_buf * last_flushed)192 static int backpointer_target_not_found(struct btree_trans *trans,
193 				  struct bkey_s_c_backpointer bp,
194 				  struct bkey_s_c target_k,
195 				  struct bkey_buf *last_flushed)
196 {
197 	struct bch_fs *c = trans->c;
198 	struct printbuf buf = PRINTBUF;
199 	int ret = 0;
200 
201 	/*
202 	 * If we're using the btree write buffer, the backpointer we were
203 	 * looking at may have already been deleted - failure to find what it
204 	 * pointed to is not an error:
205 	 */
206 	ret = last_flushed
207 		? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed)
208 		: 0;
209 	if (ret)
210 		return ret;
211 
212 	prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
213 		   bp.v->level ? "btree node" : "extent");
214 	bch2_bkey_val_to_text(&buf, c, bp.s_c);
215 
216 	prt_newline(&buf);
217 	bch2_bkey_val_to_text(&buf, c, target_k);
218 
219 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
220 	const union bch_extent_entry *entry;
221 	struct extent_ptr_decoded p;
222 	bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
223 		if (p.ptr.dev == bp.k->p.inode) {
224 			prt_newline(&buf);
225 			struct bkey_i_backpointer bp2;
226 			bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
227 			bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
228 		}
229 
230 	if (fsck_err(trans, backpointer_to_missing_ptr,
231 		     "%s", buf.buf))
232 		ret = bch2_backpointer_del(trans, bp.k->p);
233 fsck_err:
234 	printbuf_exit(&buf);
235 	return ret;
236 }
237 
bch2_backpointer_get_key(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,unsigned iter_flags,struct bkey_buf * last_flushed)238 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
239 					 struct bkey_s_c_backpointer bp,
240 					 struct btree_iter *iter,
241 					 unsigned iter_flags,
242 					 struct bkey_buf *last_flushed)
243 {
244 	struct bch_fs *c = trans->c;
245 
246 	if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
247 		return bkey_s_c_null;
248 
249 	bch2_trans_node_iter_init(trans, iter,
250 				  bp.v->btree_id,
251 				  bp.v->pos,
252 				  0,
253 				  bp.v->level,
254 				  iter_flags);
255 	struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
256 	if (bkey_err(k)) {
257 		bch2_trans_iter_exit(trans, iter);
258 		return k;
259 	}
260 
261 	/*
262 	 * peek_slot() doesn't normally return NULL - except when we ask for a
263 	 * key at a btree level that doesn't exist.
264 	 *
265 	 * We may want to revisit this and change peek_slot():
266 	 */
267 	if (!k.k) {
268 		bkey_init(&iter->k);
269 		iter->k.p = bp.v->pos;
270 		k.k = &iter->k;
271 	}
272 
273 	if (k.k &&
274 	    extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp))
275 		return k;
276 
277 	bch2_trans_iter_exit(trans, iter);
278 
279 	if (!bp.v->level) {
280 		int ret = backpointer_target_not_found(trans, bp, k, last_flushed);
281 		return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
282 	} else {
283 		struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed);
284 		if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node))
285 			return bkey_s_c_null;
286 		if (IS_ERR_OR_NULL(b))
287 			return ((struct bkey_s_c) { .k = ERR_CAST(b) });
288 
289 		return bkey_i_to_s_c(&b->key);
290 	}
291 }
292 
bch2_backpointer_get_node(struct btree_trans * trans,struct bkey_s_c_backpointer bp,struct btree_iter * iter,struct bkey_buf * last_flushed)293 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
294 					struct bkey_s_c_backpointer bp,
295 					struct btree_iter *iter,
296 					struct bkey_buf *last_flushed)
297 {
298 	struct bch_fs *c = trans->c;
299 
300 	BUG_ON(!bp.v->level);
301 
302 	bch2_trans_node_iter_init(trans, iter,
303 				  bp.v->btree_id,
304 				  bp.v->pos,
305 				  0,
306 				  bp.v->level - 1,
307 				  0);
308 	struct btree *b = bch2_btree_iter_peek_node(trans, iter);
309 	if (IS_ERR_OR_NULL(b))
310 		goto err;
311 
312 	BUG_ON(b->c.level != bp.v->level - 1);
313 
314 	if (extent_matches_bp(c, bp.v->btree_id, bp.v->level,
315 			      bkey_i_to_s_c(&b->key), bp))
316 		return b;
317 
318 	if (btree_node_will_make_reachable(b)) {
319 		b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
320 	} else {
321 		int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed);
322 		b = ret ? ERR_PTR(ret) : NULL;
323 	}
324 err:
325 	bch2_trans_iter_exit(trans, iter);
326 	return b;
327 }
328 
bch2_check_backpointer_has_valid_bucket(struct btree_trans * trans,struct bkey_s_c k,struct bkey_buf * last_flushed)329 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k,
330 						   struct bkey_buf *last_flushed)
331 {
332 	if (k.k->type != KEY_TYPE_backpointer)
333 		return 0;
334 
335 	struct bch_fs *c = trans->c;
336 	struct btree_iter alloc_iter = {};
337 	struct bkey_s_c alloc_k;
338 	struct printbuf buf = PRINTBUF;
339 	int ret = 0;
340 
341 	struct bpos bucket;
342 	if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) {
343 		ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
344 		if (ret)
345 			goto out;
346 
347 		if (fsck_err(trans, backpointer_to_missing_device,
348 			     "backpointer for missing device:\n%s",
349 			     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
350 			ret = bch2_backpointer_del(trans, k.k->p);
351 		goto out;
352 	}
353 
354 	alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0);
355 	ret = bkey_err(alloc_k);
356 	if (ret)
357 		goto out;
358 
359 	if (alloc_k.k->type != KEY_TYPE_alloc_v4) {
360 		ret = bch2_backpointers_maybe_flush(trans, k, last_flushed);
361 		if (ret)
362 			goto out;
363 
364 		if (fsck_err(trans, backpointer_to_missing_alloc,
365 			     "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
366 			     alloc_iter.pos.inode, alloc_iter.pos.offset,
367 			     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
368 			ret = bch2_backpointer_del(trans, k.k->p);
369 	}
370 out:
371 fsck_err:
372 	bch2_trans_iter_exit(trans, &alloc_iter);
373 	printbuf_exit(&buf);
374 	return ret;
375 }
376 
377 /* verify that every backpointer has a corresponding alloc key */
bch2_check_btree_backpointers(struct bch_fs * c)378 int bch2_check_btree_backpointers(struct bch_fs *c)
379 {
380 	struct bkey_buf last_flushed;
381 	bch2_bkey_buf_init(&last_flushed);
382 	bkey_init(&last_flushed.k->k);
383 
384 	int ret = bch2_trans_run(c,
385 		for_each_btree_key_commit(trans, iter,
386 			BTREE_ID_backpointers, POS_MIN, 0, k,
387 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
388 		  bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)));
389 
390 	bch2_bkey_buf_exit(&last_flushed, c);
391 	bch_err_fn(c, ret);
392 	return ret;
393 }
394 
395 struct extents_to_bp_state {
396 	struct bpos	bp_start;
397 	struct bpos	bp_end;
398 	struct bkey_buf last_flushed;
399 };
400 
drop_dev_and_update(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,unsigned dev)401 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
402 			       struct bkey_s_c extent, unsigned dev)
403 {
404 	struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
405 	int ret = PTR_ERR_OR_ZERO(n);
406 	if (ret)
407 		return ret;
408 
409 	bch2_bkey_drop_device(bkey_i_to_s(n), dev);
410 	return bch2_btree_insert_trans(trans, btree, n, 0);
411 }
412 
check_extent_checksum(struct btree_trans * trans,enum btree_id btree,struct bkey_s_c extent,enum btree_id o_btree,struct bkey_s_c extent2,unsigned dev)413 static int check_extent_checksum(struct btree_trans *trans,
414 				 enum btree_id btree, struct bkey_s_c extent,
415 				 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
416 {
417 	struct bch_fs *c = trans->c;
418 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
419 	const union bch_extent_entry *entry;
420 	struct extent_ptr_decoded p;
421 	struct printbuf buf = PRINTBUF;
422 	void *data_buf = NULL;
423 	struct bio *bio = NULL;
424 	size_t bytes;
425 	int ret = 0;
426 
427 	if (bkey_is_btree_ptr(extent.k))
428 		return false;
429 
430 	bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
431 		if (p.ptr.dev == dev)
432 			goto found;
433 	BUG();
434 found:
435 	if (!p.crc.csum_type)
436 		return false;
437 
438 	bytes = p.crc.compressed_size << 9;
439 
440 	struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
441 	if (!ca)
442 		return false;
443 
444 	data_buf = kvmalloc(bytes, GFP_KERNEL);
445 	if (!data_buf) {
446 		ret = -ENOMEM;
447 		goto err;
448 	}
449 
450 	bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
451 	bio->bi_iter.bi_sector = p.ptr.offset;
452 	bch2_bio_map(bio, data_buf, bytes);
453 	ret = submit_bio_wait(bio);
454 	if (ret)
455 		goto err;
456 
457 	prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
458 	bch2_btree_id_to_text(&buf, btree);
459 	prt_str(&buf, " ");
460 	bch2_bkey_val_to_text(&buf, c, extent);
461 	prt_newline(&buf);
462 	bch2_btree_id_to_text(&buf, o_btree);
463 	prt_str(&buf, " ");
464 	bch2_bkey_val_to_text(&buf, c, extent2);
465 
466 	struct nonce nonce = extent_nonce(extent.k->bversion, p.crc);
467 	struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
468 	if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
469 			trans, dup_backpointer_to_bad_csum_extent,
470 			"%s", buf.buf))
471 		ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
472 fsck_err:
473 err:
474 	if (bio)
475 		bio_put(bio);
476 	kvfree(data_buf);
477 	percpu_ref_put(&ca->io_ref[READ]);
478 	printbuf_exit(&buf);
479 	return ret;
480 }
481 
check_bp_exists(struct btree_trans * trans,struct extents_to_bp_state * s,struct bkey_i_backpointer * bp,struct bkey_s_c orig_k)482 static int check_bp_exists(struct btree_trans *trans,
483 			   struct extents_to_bp_state *s,
484 			   struct bkey_i_backpointer *bp,
485 			   struct bkey_s_c orig_k)
486 {
487 	struct bch_fs *c = trans->c;
488 	struct btree_iter other_extent_iter = {};
489 	struct printbuf buf = PRINTBUF;
490 
491 	if (bpos_lt(bp->k.p, s->bp_start) ||
492 	    bpos_gt(bp->k.p, s->bp_end))
493 		return 0;
494 
495 	struct btree_iter bp_iter;
496 	struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0);
497 	int ret = bkey_err(bp_k);
498 	if (ret)
499 		goto err;
500 
501 	if (bp_k.k->type != KEY_TYPE_backpointer ||
502 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) {
503 		ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
504 		if (ret)
505 			goto err;
506 
507 		goto check_existing_bp;
508 	}
509 out:
510 err:
511 fsck_err:
512 	bch2_trans_iter_exit(trans, &other_extent_iter);
513 	bch2_trans_iter_exit(trans, &bp_iter);
514 	printbuf_exit(&buf);
515 	return ret;
516 check_existing_bp:
517 	/* Do we have a backpointer for a different extent? */
518 	if (bp_k.k->type != KEY_TYPE_backpointer)
519 		goto missing;
520 
521 	struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k);
522 
523 	struct bkey_s_c other_extent =
524 		bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL);
525 	ret = bkey_err(other_extent);
526 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
527 		ret = 0;
528 	if (ret)
529 		goto err;
530 
531 	if (!other_extent.k)
532 		goto missing;
533 
534 	rcu_read_lock();
535 	struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode);
536 	if (ca) {
537 		struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent);
538 		bkey_for_each_ptr(other_extent_ptrs, ptr)
539 			if (ptr->dev == bp->k.p.inode &&
540 			    dev_ptr_stale_rcu(ca, ptr)) {
541 				ret = drop_dev_and_update(trans, other_bp.v->btree_id,
542 							  other_extent, bp->k.p.inode);
543 				if (ret)
544 					goto err;
545 				goto out;
546 			}
547 	}
548 	rcu_read_unlock();
549 
550 	if (bch2_extents_match(orig_k, other_extent)) {
551 		printbuf_reset(&buf);
552 		prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
553 		bch2_bkey_val_to_text(&buf, c, orig_k);
554 		prt_newline(&buf);
555 		bch2_bkey_val_to_text(&buf, c, other_extent);
556 		bch_err(c, "%s", buf.buf);
557 
558 		if (other_extent.k->size <= orig_k.k->size) {
559 			ret = drop_dev_and_update(trans, other_bp.v->btree_id,
560 						  other_extent, bp->k.p.inode);
561 			if (ret)
562 				goto err;
563 			goto out;
564 		} else {
565 			ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode);
566 			if (ret)
567 				goto err;
568 			goto missing;
569 		}
570 	}
571 
572 	ret = check_extent_checksum(trans,
573 				    other_bp.v->btree_id, other_extent,
574 				    bp->v.btree_id, orig_k,
575 				    bp->k.p.inode);
576 	if (ret < 0)
577 		goto err;
578 	if (ret) {
579 		ret = 0;
580 		goto missing;
581 	}
582 
583 	ret = check_extent_checksum(trans, bp->v.btree_id, orig_k,
584 				    other_bp.v->btree_id, other_extent, bp->k.p.inode);
585 	if (ret < 0)
586 		goto err;
587 	if (ret) {
588 		ret = 0;
589 		goto out;
590 	}
591 
592 	printbuf_reset(&buf);
593 	prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
594 	bch2_bkey_val_to_text(&buf, c, orig_k);
595 	prt_newline(&buf);
596 	bch2_bkey_val_to_text(&buf, c, other_extent);
597 	bch_err(c, "%s", buf.buf);
598 	ret = -BCH_ERR_fsck_repair_unimplemented;
599 	goto err;
600 missing:
601 	printbuf_reset(&buf);
602 	prt_str(&buf, "missing backpointer\nfor:  ");
603 	bch2_bkey_val_to_text(&buf, c, orig_k);
604 	prt_printf(&buf, "\nwant: ");
605 	bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
606 	prt_printf(&buf, "\ngot:  ");
607 	bch2_bkey_val_to_text(&buf, c, bp_k);
608 
609 	if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
610 		ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true);
611 
612 	goto out;
613 }
614 
check_extent_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree,unsigned level,struct bkey_s_c k)615 static int check_extent_to_backpointers(struct btree_trans *trans,
616 					struct extents_to_bp_state *s,
617 					enum btree_id btree, unsigned level,
618 					struct bkey_s_c k)
619 {
620 	struct bch_fs *c = trans->c;
621 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
622 	const union bch_extent_entry *entry;
623 	struct extent_ptr_decoded p;
624 
625 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
626 		if (p.ptr.dev == BCH_SB_MEMBER_INVALID)
627 			continue;
628 
629 		rcu_read_lock();
630 		struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev);
631 		bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches);
632 		bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty);
633 
634 		bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr));
635 		rcu_read_unlock();
636 
637 		if ((check || empty) && !stale) {
638 			struct bkey_i_backpointer bp;
639 			bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp);
640 
641 			int ret = check
642 				? check_bp_exists(trans, s, &bp, k)
643 				: bch2_bucket_backpointer_mod(trans, k, &bp, true);
644 			if (ret)
645 				return ret;
646 		}
647 	}
648 
649 	return 0;
650 }
651 
check_btree_root_to_backpointers(struct btree_trans * trans,struct extents_to_bp_state * s,enum btree_id btree_id,int * level)652 static int check_btree_root_to_backpointers(struct btree_trans *trans,
653 					    struct extents_to_bp_state *s,
654 					    enum btree_id btree_id,
655 					    int *level)
656 {
657 	struct bch_fs *c = trans->c;
658 	struct btree_iter iter;
659 	struct btree *b;
660 	struct bkey_s_c k;
661 	int ret;
662 retry:
663 	bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
664 				  0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
665 	b = bch2_btree_iter_peek_node(trans, &iter);
666 	ret = PTR_ERR_OR_ZERO(b);
667 	if (ret)
668 		goto err;
669 
670 	if (b != btree_node_root(c, b)) {
671 		bch2_trans_iter_exit(trans, &iter);
672 		goto retry;
673 	}
674 
675 	*level = b->c.level;
676 
677 	k = bkey_i_to_s_c(&b->key);
678 	ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
679 err:
680 	bch2_trans_iter_exit(trans, &iter);
681 	return ret;
682 }
683 
bp_to_bbpos(struct bch_backpointer bp)684 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
685 {
686 	return (struct bbpos) {
687 		.btree	= bp.btree_id,
688 		.pos	= bp.pos,
689 	};
690 }
691 
mem_may_pin_bytes(struct bch_fs * c)692 static u64 mem_may_pin_bytes(struct bch_fs *c)
693 {
694 	struct sysinfo i;
695 	si_meminfo(&i);
696 
697 	u64 mem_bytes = i.totalram * i.mem_unit;
698 	return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
699 }
700 
btree_nodes_fit_in_ram(struct bch_fs * c)701 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
702 {
703 	return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
704 }
705 
bch2_get_btree_in_memory_pos(struct btree_trans * trans,u64 btree_leaf_mask,u64 btree_interior_mask,struct bbpos start,struct bbpos * end)706 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
707 					u64 btree_leaf_mask,
708 					u64 btree_interior_mask,
709 					struct bbpos start, struct bbpos *end)
710 {
711 	struct bch_fs *c = trans->c;
712 	s64 mem_may_pin = mem_may_pin_bytes(c);
713 	int ret = 0;
714 
715 	bch2_btree_cache_unpin(c);
716 
717 	btree_interior_mask |= btree_leaf_mask;
718 
719 	c->btree_cache.pinned_nodes_mask[0]		= btree_leaf_mask;
720 	c->btree_cache.pinned_nodes_mask[1]		= btree_interior_mask;
721 	c->btree_cache.pinned_nodes_start		= start;
722 	c->btree_cache.pinned_nodes_end			= *end = BBPOS_MAX;
723 
724 	for (enum btree_id btree = start.btree;
725 	     btree < BTREE_ID_NR && !ret;
726 	     btree++) {
727 		unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
728 
729 		if (!(BIT_ULL(btree) & btree_leaf_mask) &&
730 		    !(BIT_ULL(btree) & btree_interior_mask))
731 			continue;
732 
733 		ret = __for_each_btree_node(trans, iter, btree,
734 				      btree == start.btree ? start.pos : POS_MIN,
735 				      0, depth, BTREE_ITER_prefetch, b, ({
736 			mem_may_pin -= btree_buf_bytes(b);
737 			if (mem_may_pin <= 0) {
738 				c->btree_cache.pinned_nodes_end = *end =
739 					BBPOS(btree, b->key.k.p);
740 				break;
741 			}
742 			bch2_node_pin(c, b);
743 			0;
744 		}));
745 	}
746 
747 	return ret;
748 }
749 
bch2_check_extents_to_backpointers_pass(struct btree_trans * trans,struct extents_to_bp_state * s)750 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
751 						   struct extents_to_bp_state *s)
752 {
753 	struct bch_fs *c = trans->c;
754 	struct progress_indicator_state progress;
755 	int ret = 0;
756 
757 	bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink));
758 
759 	for (enum btree_id btree_id = 0;
760 	     btree_id < btree_id_nr_alive(c);
761 	     btree_id++) {
762 		int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
763 
764 		ret = commit_do(trans, NULL, NULL,
765 				BCH_TRANS_COMMIT_no_enospc,
766 				check_btree_root_to_backpointers(trans, s, btree_id, &level));
767 		if (ret)
768 			return ret;
769 
770 		while (level >= depth) {
771 			struct btree_iter iter;
772 			bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level,
773 						  BTREE_ITER_prefetch);
774 
775 			ret = for_each_btree_key_continue(trans, iter, 0, k, ({
776 				bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
777 				check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
778 				bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
779 			}));
780 			if (ret)
781 				return ret;
782 
783 			--level;
784 		}
785 	}
786 
787 	return 0;
788 }
789 
790 enum alloc_sector_counter {
791 	ALLOC_dirty,
792 	ALLOC_cached,
793 	ALLOC_stripe,
794 	ALLOC_SECTORS_NR
795 };
796 
data_type_to_alloc_counter(enum bch_data_type t)797 static int data_type_to_alloc_counter(enum bch_data_type t)
798 {
799 	switch (t) {
800 	case BCH_DATA_btree:
801 	case BCH_DATA_user:
802 		return ALLOC_dirty;
803 	case BCH_DATA_cached:
804 		return ALLOC_cached;
805 	case BCH_DATA_stripe:
806 	case BCH_DATA_parity:
807 		return ALLOC_stripe;
808 	default:
809 		return -1;
810 	}
811 }
812 
813 static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
814 
check_bucket_backpointer_mismatch(struct btree_trans * trans,struct bkey_s_c alloc_k,struct bkey_buf * last_flushed)815 static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
816 					     struct bkey_buf *last_flushed)
817 {
818 	struct bch_fs *c = trans->c;
819 	struct bch_alloc_v4 a_convert;
820 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
821 	bool need_commit = false;
822 
823 	if (a->data_type == BCH_DATA_sb ||
824 	    a->data_type == BCH_DATA_journal ||
825 	    a->data_type == BCH_DATA_parity)
826 		return 0;
827 
828 	u32 sectors[ALLOC_SECTORS_NR];
829 	memset(sectors, 0, sizeof(sectors));
830 
831 	struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p);
832 	if (!ca)
833 		return 0;
834 
835 	struct btree_iter iter;
836 	struct bkey_s_c bp_k;
837 	int ret = 0;
838 	for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers,
839 				bucket_pos_to_bp_start(ca, alloc_k.k->p),
840 				bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) {
841 		if (bp_k.k->type != KEY_TYPE_backpointer)
842 			continue;
843 
844 		struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
845 
846 		if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen &&
847 		    (bp.v->bucket_gen != a->gen ||
848 		     bp.v->pad)) {
849 			ret = bch2_backpointer_del(trans, bp_k.k->p);
850 			if (ret)
851 				break;
852 
853 			need_commit = true;
854 			continue;
855 		}
856 
857 		if (bp.v->bucket_gen != a->gen)
858 			continue;
859 
860 		int alloc_counter = data_type_to_alloc_counter(bp.v->data_type);
861 		if (alloc_counter < 0)
862 			continue;
863 
864 		sectors[alloc_counter] += bp.v->bucket_len;
865 	};
866 	bch2_trans_iter_exit(trans, &iter);
867 	if (ret)
868 		goto err;
869 
870 	if (need_commit) {
871 		ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
872 		if (ret)
873 			goto err;
874 	}
875 
876 	if (sectors[ALLOC_dirty]  != a->dirty_sectors ||
877 	    sectors[ALLOC_cached] != a->cached_sectors ||
878 	    sectors[ALLOC_stripe] != a->stripe_sectors) {
879 		if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) {
880 			ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed);
881 			if (ret)
882 				goto err;
883 		}
884 
885 		if (sectors[ALLOC_dirty]  > a->dirty_sectors ||
886 		    sectors[ALLOC_cached] > a->cached_sectors ||
887 		    sectors[ALLOC_stripe] > a->stripe_sectors) {
888 			ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?:
889 				-BCH_ERR_transaction_restart_nested;
890 			goto err;
891 		}
892 
893 		if (!sectors[ALLOC_dirty] &&
894 		    !sectors[ALLOC_stripe] &&
895 		    !sectors[ALLOC_cached])
896 			__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty);
897 		else
898 			__set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches);
899 	}
900 err:
901 	bch2_dev_put(ca);
902 	return ret;
903 }
904 
backpointer_node_has_missing(struct bch_fs * c,struct bkey_s_c k)905 static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k)
906 {
907 	switch (k.k->type) {
908 	case KEY_TYPE_btree_ptr_v2: {
909 		bool ret = false;
910 
911 		rcu_read_lock();
912 		struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key;
913 		while (pos.inode <= k.k->p.inode) {
914 			if (pos.inode >= c->sb.nr_devices)
915 				break;
916 
917 			struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode);
918 			if (!ca)
919 				goto next;
920 
921 			struct bpos bucket = bp_pos_to_bucket(ca, pos);
922 			bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches,
923 						      ca->mi.nbuckets, bucket.offset);
924 			if (bucket.offset == ca->mi.nbuckets)
925 				goto next;
926 
927 			ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p);
928 			if (ret)
929 				break;
930 next:
931 			pos = SPOS(pos.inode + 1, 0, 0);
932 		}
933 		rcu_read_unlock();
934 
935 		return ret;
936 	}
937 	case KEY_TYPE_btree_ptr:
938 		return true;
939 	default:
940 		return false;
941 	}
942 }
943 
btree_node_get_and_pin(struct btree_trans * trans,struct bkey_i * k,enum btree_id btree,unsigned level)944 static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k,
945 				  enum btree_id btree, unsigned level)
946 {
947 	struct btree_iter iter;
948 	bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0);
949 	struct btree *b = bch2_btree_iter_peek_node(trans, &iter);
950 	int ret = PTR_ERR_OR_ZERO(b);
951 	if (ret)
952 		goto err;
953 
954 	if (b)
955 		bch2_node_pin(trans->c, b);
956 err:
957 	bch2_trans_iter_exit(trans, &iter);
958 	return ret;
959 }
960 
bch2_pin_backpointer_nodes_with_missing(struct btree_trans * trans,struct bpos start,struct bpos * end)961 static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans,
962 						   struct bpos start, struct bpos *end)
963 {
964 	struct bch_fs *c = trans->c;
965 	int ret = 0;
966 
967 	struct bkey_buf tmp;
968 	bch2_bkey_buf_init(&tmp);
969 
970 	bch2_btree_cache_unpin(c);
971 
972 	*end = SPOS_MAX;
973 
974 	s64 mem_may_pin = mem_may_pin_bytes(c);
975 	struct btree_iter iter;
976 	bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
977 				  0, 1, BTREE_ITER_prefetch);
978 	ret = for_each_btree_key_continue(trans, iter, 0, k, ({
979 		if (!backpointer_node_has_missing(c, k))
980 			continue;
981 
982 		mem_may_pin -= c->opts.btree_node_size;
983 		if (mem_may_pin <= 0)
984 			break;
985 
986 		bch2_bkey_buf_reassemble(&tmp, c, k);
987 		struct btree_path *path = btree_iter_path(trans, &iter);
988 
989 		BUG_ON(path->level != 1);
990 
991 		bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1);
992 	}));
993 	if (ret)
994 		return ret;
995 
996 	struct bpos pinned = SPOS_MAX;
997 	mem_may_pin = mem_may_pin_bytes(c);
998 	bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start,
999 				  0, 1, BTREE_ITER_prefetch);
1000 	ret = for_each_btree_key_continue(trans, iter, 0, k, ({
1001 		if (!backpointer_node_has_missing(c, k))
1002 			continue;
1003 
1004 		mem_may_pin -= c->opts.btree_node_size;
1005 		if (mem_may_pin <= 0) {
1006 			*end = pinned;
1007 			break;
1008 		}
1009 
1010 		bch2_bkey_buf_reassemble(&tmp, c, k);
1011 		struct btree_path *path = btree_iter_path(trans, &iter);
1012 
1013 		BUG_ON(path->level != 1);
1014 
1015 		int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1);
1016 
1017 		if (!ret2)
1018 			pinned = tmp.k->k.p;
1019 
1020 		ret;
1021 	}));
1022 	if (ret)
1023 		return ret;
1024 
1025 	return ret;
1026 }
1027 
bch2_check_extents_to_backpointers(struct bch_fs * c)1028 int bch2_check_extents_to_backpointers(struct bch_fs *c)
1029 {
1030 	int ret = 0;
1031 
1032 	/*
1033 	 * Can't allow devices to come/go/resize while we have bucket bitmaps
1034 	 * allocated
1035 	 */
1036 	down_read(&c->state_lock);
1037 
1038 	for_each_member_device(c, ca) {
1039 		BUG_ON(ca->bucket_backpointer_mismatches);
1040 		ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
1041 							     sizeof(unsigned long),
1042 							     GFP_KERNEL);
1043 		ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets),
1044 							sizeof(unsigned long),
1045 							GFP_KERNEL);
1046 		if (!ca->bucket_backpointer_mismatches ||
1047 		    !ca->bucket_backpointer_empty) {
1048 			bch2_dev_put(ca);
1049 			ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap;
1050 			goto err_free_bitmaps;
1051 		}
1052 	}
1053 
1054 	struct btree_trans *trans = bch2_trans_get(c);
1055 	struct extents_to_bp_state s = { .bp_start = POS_MIN };
1056 
1057 	bch2_bkey_buf_init(&s.last_flushed);
1058 	bkey_init(&s.last_flushed.k->k);
1059 
1060 	ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
1061 				 POS_MIN, BTREE_ITER_prefetch, k, ({
1062 		check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
1063 	}));
1064 	if (ret)
1065 		goto err;
1066 
1067 	u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0;
1068 	for_each_member_device(c, ca) {
1069 		nr_buckets	+= ca->mi.nbuckets;
1070 		nr_mismatches	+= bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets);
1071 		nr_empty	+= bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets);
1072 	}
1073 
1074 	if (!nr_mismatches && !nr_empty)
1075 		goto err;
1076 
1077 	bch_info(c, "scanning for missing backpointers in %llu/%llu buckets",
1078 		 nr_mismatches + nr_empty, nr_buckets);
1079 
1080 	while (1) {
1081 		ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end);
1082 		if (ret)
1083 			break;
1084 
1085 		if ( bpos_eq(s.bp_start, POS_MIN) &&
1086 		    !bpos_eq(s.bp_end, SPOS_MAX))
1087 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
1088 				    __func__, btree_nodes_fit_in_ram(c));
1089 
1090 		if (!bpos_eq(s.bp_start, POS_MIN) ||
1091 		    !bpos_eq(s.bp_end, SPOS_MAX)) {
1092 			struct printbuf buf = PRINTBUF;
1093 
1094 			prt_str(&buf, "check_extents_to_backpointers(): ");
1095 			bch2_bpos_to_text(&buf, s.bp_start);
1096 			prt_str(&buf, "-");
1097 			bch2_bpos_to_text(&buf, s.bp_end);
1098 
1099 			bch_verbose(c, "%s", buf.buf);
1100 			printbuf_exit(&buf);
1101 		}
1102 
1103 		ret = bch2_check_extents_to_backpointers_pass(trans, &s);
1104 		if (ret || bpos_eq(s.bp_end, SPOS_MAX))
1105 			break;
1106 
1107 		s.bp_start = bpos_successor(s.bp_end);
1108 	}
1109 err:
1110 	bch2_trans_put(trans);
1111 	bch2_bkey_buf_exit(&s.last_flushed, c);
1112 	bch2_btree_cache_unpin(c);
1113 err_free_bitmaps:
1114 	for_each_member_device(c, ca) {
1115 		kvfree(ca->bucket_backpointer_empty);
1116 		ca->bucket_backpointer_empty = NULL;
1117 		kvfree(ca->bucket_backpointer_mismatches);
1118 		ca->bucket_backpointer_mismatches = NULL;
1119 	}
1120 
1121 	up_read(&c->state_lock);
1122 	bch_err_fn(c, ret);
1123 	return ret;
1124 }
1125 
check_one_backpointer(struct btree_trans * trans,struct bbpos start,struct bbpos end,struct bkey_s_c bp_k,struct bkey_buf * last_flushed)1126 static int check_one_backpointer(struct btree_trans *trans,
1127 				 struct bbpos start,
1128 				 struct bbpos end,
1129 				 struct bkey_s_c bp_k,
1130 				 struct bkey_buf *last_flushed)
1131 {
1132 	if (bp_k.k->type != KEY_TYPE_backpointer)
1133 		return 0;
1134 
1135 	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
1136 	struct bbpos pos = bp_to_bbpos(*bp.v);
1137 
1138 	if (bbpos_cmp(pos, start) < 0 ||
1139 	    bbpos_cmp(pos, end) > 0)
1140 		return 0;
1141 
1142 	struct btree_iter iter;
1143 	struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
1144 	int ret = bkey_err(k);
1145 	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
1146 		return 0;
1147 	if (ret)
1148 		return ret;
1149 
1150 	bch2_trans_iter_exit(trans, &iter);
1151 	return ret;
1152 }
1153 
check_bucket_backpointers_to_extents(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket)1154 static int check_bucket_backpointers_to_extents(struct btree_trans *trans,
1155 						struct bch_dev *ca, struct bpos bucket)
1156 {
1157 	u32 restart_count = trans->restart_count;
1158 	struct bkey_buf last_flushed;
1159 	bch2_bkey_buf_init(&last_flushed);
1160 	bkey_init(&last_flushed.k->k);
1161 
1162 	int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers,
1163 				      bucket_pos_to_bp_start(ca, bucket),
1164 				      bucket_pos_to_bp_end(ca, bucket),
1165 				      0, k,
1166 		check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed)
1167 	);
1168 
1169 	bch2_bkey_buf_exit(&last_flushed, trans->c);
1170 	return ret ?: trans_was_restarted(trans, restart_count);
1171 }
1172 
bch2_check_backpointers_to_extents_pass(struct btree_trans * trans,struct bbpos start,struct bbpos end)1173 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
1174 						   struct bbpos start,
1175 						   struct bbpos end)
1176 {
1177 	struct bch_fs *c = trans->c;
1178 	struct bkey_buf last_flushed;
1179 	struct progress_indicator_state progress;
1180 
1181 	bch2_bkey_buf_init(&last_flushed);
1182 	bkey_init(&last_flushed.k->k);
1183 	bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers));
1184 
1185 	int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers,
1186 				     POS_MIN, BTREE_ITER_prefetch, k, ({
1187 			bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
1188 			check_one_backpointer(trans, start, end, k, &last_flushed);
1189 	}));
1190 
1191 	bch2_bkey_buf_exit(&last_flushed, c);
1192 	return ret;
1193 }
1194 
bch2_check_backpointers_to_extents(struct bch_fs * c)1195 int bch2_check_backpointers_to_extents(struct bch_fs *c)
1196 {
1197 	struct btree_trans *trans = bch2_trans_get(c);
1198 	struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
1199 	int ret;
1200 
1201 	while (1) {
1202 		ret = bch2_get_btree_in_memory_pos(trans,
1203 						   BIT_ULL(BTREE_ID_extents)|
1204 						   BIT_ULL(BTREE_ID_reflink),
1205 						   ~0,
1206 						   start, &end);
1207 		if (ret)
1208 			break;
1209 
1210 		if (!bbpos_cmp(start, BBPOS_MIN) &&
1211 		    bbpos_cmp(end, BBPOS_MAX))
1212 			bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
1213 				    __func__, btree_nodes_fit_in_ram(c));
1214 
1215 		if (bbpos_cmp(start, BBPOS_MIN) ||
1216 		    bbpos_cmp(end, BBPOS_MAX)) {
1217 			struct printbuf buf = PRINTBUF;
1218 
1219 			prt_str(&buf, "check_backpointers_to_extents(): ");
1220 			bch2_bbpos_to_text(&buf, start);
1221 			prt_str(&buf, "-");
1222 			bch2_bbpos_to_text(&buf, end);
1223 
1224 			bch_verbose(c, "%s", buf.buf);
1225 			printbuf_exit(&buf);
1226 		}
1227 
1228 		ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
1229 		if (ret || !bbpos_cmp(end, BBPOS_MAX))
1230 			break;
1231 
1232 		start = bbpos_successor(end);
1233 	}
1234 	bch2_trans_put(trans);
1235 
1236 	bch2_btree_cache_unpin(c);
1237 
1238 	bch_err_fn(c, ret);
1239 	return ret;
1240 }
1241