xref: /linux/fs/xfs/scrub/refcount_repair.c (revision a095686a2383526d7315197e2419d84ee8470217)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_inode.h"
16 #include "xfs_bit.h"
17 #include "xfs_log_format.h"
18 #include "xfs_trans.h"
19 #include "xfs_sb.h"
20 #include "xfs_alloc.h"
21 #include "xfs_ialloc.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rmap_btree.h"
24 #include "xfs_refcount.h"
25 #include "xfs_refcount_btree.h"
26 #include "xfs_error.h"
27 #include "xfs_ag.h"
28 #include "xfs_health.h"
29 #include "scrub/xfs_scrub.h"
30 #include "scrub/scrub.h"
31 #include "scrub/common.h"
32 #include "scrub/btree.h"
33 #include "scrub/trace.h"
34 #include "scrub/repair.h"
35 #include "scrub/bitmap.h"
36 #include "scrub/agb_bitmap.h"
37 #include "scrub/xfile.h"
38 #include "scrub/xfarray.h"
39 #include "scrub/newbt.h"
40 #include "scrub/reap.h"
41 
42 /*
43  * Rebuilding the Reference Count Btree
44  * ====================================
45  *
46  * This algorithm is "borrowed" from xfs_repair.  Imagine the rmap
47  * entries as rectangles representing extents of physical blocks, and
48  * that the rectangles can be laid down to allow them to overlap each
49  * other; then we know that we must emit a refcnt btree entry wherever
50  * the amount of overlap changes, i.e. the emission stimulus is
51  * level-triggered:
52  *
53  *                 -    ---
54  *       --      ----- ----   ---        ------
55  * --   ----     ----------- ----     ---------
56  * -------------------------------- -----------
57  * ^ ^  ^^ ^^    ^ ^^ ^^^  ^^^^  ^ ^^ ^  ^     ^
58  * 2 1  23 21    3 43 234  2123  1 01 2  3     0
59  *
60  * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
61  *
62  * Note that in the actual refcnt btree we don't store the refcount < 2
63  * cases because the bnobt tells us which blocks are free; single-use
64  * blocks aren't recorded in the bnobt or the refcntbt.  If the rmapbt
65  * supports storing multiple entries covering a given block we could
66  * theoretically dispense with the refcntbt and simply count rmaps, but
67  * that's inefficient in the (hot) write path, so we'll take the cost of
68  * the extra tree to save time.  Also there's no guarantee that rmap
69  * will be enabled.
70  *
71  * Given an array of rmaps sorted by physical block number, a starting
72  * physical block (sp), a bag to hold rmaps that cover sp, and the next
73  * physical block where the level changes (np), we can reconstruct the
74  * refcount btree as follows:
75  *
76  * While there are still unprocessed rmaps in the array,
77  *  - Set sp to the physical block (pblk) of the next unprocessed rmap.
78  *  - Add to the bag all rmaps in the array where startblock == sp.
79  *  - Set np to the physical block where the bag size will change.  This
80  *    is the minimum of (the pblk of the next unprocessed rmap) and
81  *    (startblock + len of each rmap in the bag).
82  *  - Record the bag size as old_bag_size.
83  *
84  *  - While the bag isn't empty,
85  *     - Remove from the bag all rmaps where startblock + len == np.
86  *     - Add to the bag all rmaps in the array where startblock == np.
87  *     - If the bag size isn't old_bag_size, store the refcount entry
88  *       (sp, np - sp, bag_size) in the refcnt btree.
89  *     - If the bag is empty, break out of the inner loop.
90  *     - Set old_bag_size to the bag size
91  *     - Set sp = np.
92  *     - Set np to the physical block where the bag size will change.
93  *       This is the minimum of (the pblk of the next unprocessed rmap)
94  *       and (startblock + len of each rmap in the bag).
95  *
96  * Like all the other repairers, we make a list of all the refcount
97  * records we need, then reinitialize the refcount btree root and
98  * insert all the records.
99  */
100 
101 /* The only parts of the rmap that we care about for computing refcounts. */
102 struct xrep_refc_rmap {
103 	xfs_agblock_t		startblock;
104 	xfs_extlen_t		blockcount;
105 } __packed;
106 
107 struct xrep_refc {
108 	/* refcount extents */
109 	struct xfarray		*refcount_records;
110 
111 	/* new refcountbt information */
112 	struct xrep_newbt	new_btree;
113 
114 	/* old refcountbt blocks */
115 	struct xagb_bitmap	old_refcountbt_blocks;
116 
117 	struct xfs_scrub	*sc;
118 
119 	/* get_records()'s position in the refcount record array. */
120 	xfarray_idx_t		array_cur;
121 
122 	/* # of refcountbt blocks */
123 	xfs_extlen_t		btblocks;
124 };
125 
126 /* Check for any obvious conflicts with this shared/CoW staging extent. */
127 STATIC int
128 xrep_refc_check_ext(
129 	struct xfs_scrub		*sc,
130 	const struct xfs_refcount_irec	*rec)
131 {
132 	enum xbtree_recpacking		outcome;
133 	int				error;
134 
135 	if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL)
136 		return -EFSCORRUPTED;
137 
138 	/* Make sure this isn't free space. */
139 	error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock,
140 			rec->rc_blockcount, &outcome);
141 	if (error)
142 		return error;
143 	if (outcome != XBTREE_RECPACKING_EMPTY)
144 		return -EFSCORRUPTED;
145 
146 	/* Must not be an inode chunk. */
147 	error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
148 			rec->rc_startblock, rec->rc_blockcount, &outcome);
149 	if (error)
150 		return error;
151 	if (outcome != XBTREE_RECPACKING_EMPTY)
152 		return -EFSCORRUPTED;
153 
154 	return 0;
155 }
156 
157 /* Record a reference count extent. */
158 STATIC int
159 xrep_refc_stash(
160 	struct xrep_refc		*rr,
161 	enum xfs_refc_domain		domain,
162 	xfs_agblock_t			agbno,
163 	xfs_extlen_t			len,
164 	uint64_t			refcount)
165 {
166 	struct xfs_refcount_irec	irec = {
167 		.rc_startblock		= agbno,
168 		.rc_blockcount		= len,
169 		.rc_domain		= domain,
170 	};
171 	struct xfs_scrub		*sc = rr->sc;
172 	int				error = 0;
173 
174 	if (xchk_should_terminate(sc, &error))
175 		return error;
176 
177 	irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
178 
179 	error = xrep_refc_check_ext(rr->sc, &irec);
180 	if (error)
181 		return error;
182 
183 	trace_xrep_refc_found(sc->sa.pag, &irec);
184 
185 	return xfarray_append(rr->refcount_records, &irec);
186 }
187 
188 /* Record a CoW staging extent. */
189 STATIC int
190 xrep_refc_stash_cow(
191 	struct xrep_refc		*rr,
192 	xfs_agblock_t			agbno,
193 	xfs_extlen_t			len)
194 {
195 	return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1);
196 }
197 
198 /* Decide if an rmap could describe a shared extent. */
199 static inline bool
200 xrep_refc_rmap_shareable(
201 	struct xfs_mount		*mp,
202 	const struct xfs_rmap_irec	*rmap)
203 {
204 	/* AG metadata are never sharable */
205 	if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
206 		return false;
207 
208 	/* Metadata in files are never shareable */
209 	if (xfs_internal_inum(mp, rmap->rm_owner))
210 		return false;
211 
212 	/* Metadata and unwritten file blocks are not shareable. */
213 	if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
214 			      XFS_RMAP_UNWRITTEN))
215 		return false;
216 
217 	return true;
218 }
219 
220 /*
221  * Walk along the reverse mapping records until we find one that could describe
222  * a shared extent.
223  */
224 STATIC int
225 xrep_refc_walk_rmaps(
226 	struct xrep_refc	*rr,
227 	struct xrep_refc_rmap	*rrm,
228 	bool			*have_rec)
229 {
230 	struct xfs_rmap_irec	rmap;
231 	struct xfs_btree_cur	*cur = rr->sc->sa.rmap_cur;
232 	struct xfs_mount	*mp = cur->bc_mp;
233 	int			have_gt;
234 	int			error = 0;
235 
236 	*have_rec = false;
237 
238 	/*
239 	 * Loop through the remaining rmaps.  Remember CoW staging
240 	 * extents and the refcountbt blocks from the old tree for later
241 	 * disposal.  We can only share written data fork extents, so
242 	 * keep looping until we find an rmap for one.
243 	 */
244 	do {
245 		if (xchk_should_terminate(rr->sc, &error))
246 			return error;
247 
248 		error = xfs_btree_increment(cur, 0, &have_gt);
249 		if (error)
250 			return error;
251 		if (!have_gt)
252 			return 0;
253 
254 		error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
255 		if (error)
256 			return error;
257 		if (XFS_IS_CORRUPT(mp, !have_gt)) {
258 			xfs_btree_mark_sick(cur);
259 			return -EFSCORRUPTED;
260 		}
261 
262 		if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
263 			error = xrep_refc_stash_cow(rr, rmap.rm_startblock,
264 					rmap.rm_blockcount);
265 			if (error)
266 				return error;
267 		} else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) {
268 			/* refcountbt block, dump it when we're done. */
269 			rr->btblocks += rmap.rm_blockcount;
270 			error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
271 					rmap.rm_startblock, rmap.rm_blockcount);
272 			if (error)
273 				return error;
274 		}
275 	} while (!xrep_refc_rmap_shareable(mp, &rmap));
276 
277 	rrm->startblock = rmap.rm_startblock;
278 	rrm->blockcount = rmap.rm_blockcount;
279 	*have_rec = true;
280 	return 0;
281 }
282 
283 static inline uint32_t
284 xrep_refc_encode_startblock(
285 	const struct xfs_refcount_irec	*irec)
286 {
287 	uint32_t			start;
288 
289 	start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
290 	if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
291 		start |= XFS_REFC_COWFLAG;
292 
293 	return start;
294 }
295 
296 /* Sort in the same order as the ondisk records. */
297 static int
298 xrep_refc_extent_cmp(
299 	const void			*a,
300 	const void			*b)
301 {
302 	const struct xfs_refcount_irec	*ap = a;
303 	const struct xfs_refcount_irec	*bp = b;
304 	uint32_t			sa, sb;
305 
306 	sa = xrep_refc_encode_startblock(ap);
307 	sb = xrep_refc_encode_startblock(bp);
308 
309 	if (sa > sb)
310 		return 1;
311 	if (sa < sb)
312 		return -1;
313 	return 0;
314 }
315 
316 /*
317  * Sort the refcount extents by startblock or else the btree records will be in
318  * the wrong order.  Make sure the records do not overlap in physical space.
319  */
320 STATIC int
321 xrep_refc_sort_records(
322 	struct xrep_refc		*rr)
323 {
324 	struct xfs_refcount_irec	irec;
325 	xfarray_idx_t			cur;
326 	enum xfs_refc_domain		dom = XFS_REFC_DOMAIN_SHARED;
327 	xfs_agblock_t			next_agbno = 0;
328 	int				error;
329 
330 	error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp,
331 			XFARRAY_SORT_KILLABLE);
332 	if (error)
333 		return error;
334 
335 	foreach_xfarray_idx(rr->refcount_records, cur) {
336 		if (xchk_should_terminate(rr->sc, &error))
337 			return error;
338 
339 		error = xfarray_load(rr->refcount_records, cur, &irec);
340 		if (error)
341 			return error;
342 
343 		if (dom == XFS_REFC_DOMAIN_SHARED &&
344 		    irec.rc_domain == XFS_REFC_DOMAIN_COW) {
345 			dom = irec.rc_domain;
346 			next_agbno = 0;
347 		}
348 
349 		if (dom != irec.rc_domain)
350 			return -EFSCORRUPTED;
351 		if (irec.rc_startblock < next_agbno)
352 			return -EFSCORRUPTED;
353 
354 		next_agbno = irec.rc_startblock + irec.rc_blockcount;
355 	}
356 
357 	return error;
358 }
359 
360 #define RRM_NEXT(r)	((r).startblock + (r).blockcount)
361 /*
362  * Find the next block where the refcount changes, given the next rmap we
363  * looked at and the ones we're already tracking.
364  */
365 static inline int
366 xrep_refc_next_edge(
367 	struct xfarray		*rmap_bag,
368 	struct xrep_refc_rmap	*next_rrm,
369 	bool			next_valid,
370 	xfs_agblock_t		*nbnop)
371 {
372 	struct xrep_refc_rmap	rrm;
373 	xfarray_idx_t		array_cur = XFARRAY_CURSOR_INIT;
374 	xfs_agblock_t		nbno = NULLAGBLOCK;
375 	int			error;
376 
377 	if (next_valid)
378 		nbno = next_rrm->startblock;
379 
380 	while ((error = xfarray_iter(rmap_bag, &array_cur, &rrm)) == 1)
381 		nbno = min_t(xfs_agblock_t, nbno, RRM_NEXT(rrm));
382 
383 	if (error)
384 		return error;
385 
386 	/*
387 	 * We should have found /something/ because either next_rrm is the next
388 	 * interesting rmap to look at after emitting this refcount extent, or
389 	 * there are other rmaps in rmap_bag contributing to the current
390 	 * sharing count.  But if something is seriously wrong, bail out.
391 	 */
392 	if (nbno == NULLAGBLOCK)
393 		return -EFSCORRUPTED;
394 
395 	*nbnop = nbno;
396 	return 0;
397 }
398 
399 /*
400  * Walk forward through the rmap btree to collect all rmaps starting at
401  * @bno in @rmap_bag.  These represent the file(s) that share ownership of
402  * the current block.  Upon return, the rmap cursor points to the last record
403  * satisfying the startblock constraint.
404  */
405 static int
406 xrep_refc_push_rmaps_at(
407 	struct xrep_refc	*rr,
408 	struct xfarray		*rmap_bag,
409 	xfs_agblock_t		bno,
410 	struct xrep_refc_rmap	*rrm,
411 	bool			*have,
412 	uint64_t		*stack_sz)
413 {
414 	struct xfs_scrub	*sc = rr->sc;
415 	int			have_gt;
416 	int			error;
417 
418 	while (*have && rrm->startblock == bno) {
419 		error = xfarray_store_anywhere(rmap_bag, rrm);
420 		if (error)
421 			return error;
422 		(*stack_sz)++;
423 		error = xrep_refc_walk_rmaps(rr, rrm, have);
424 		if (error)
425 			return error;
426 	}
427 
428 	error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt);
429 	if (error)
430 		return error;
431 	if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
432 		xfs_btree_mark_sick(sc->sa.rmap_cur);
433 		return -EFSCORRUPTED;
434 	}
435 
436 	return 0;
437 }
438 
439 /* Iterate all the rmap records to generate reference count data. */
440 STATIC int
441 xrep_refc_find_refcounts(
442 	struct xrep_refc	*rr)
443 {
444 	struct xrep_refc_rmap	rrm;
445 	struct xfs_scrub	*sc = rr->sc;
446 	struct xfarray		*rmap_bag;
447 	char			*descr;
448 	uint64_t		old_stack_sz;
449 	uint64_t		stack_sz = 0;
450 	xfs_agblock_t		sbno;
451 	xfs_agblock_t		cbno;
452 	xfs_agblock_t		nbno;
453 	bool			have;
454 	int			error;
455 
456 	xrep_ag_btcur_init(sc, &sc->sa);
457 
458 	/*
459 	 * Set up a sparse array to store all the rmap records that we're
460 	 * tracking to generate a reference count record.  If this exceeds
461 	 * MAXREFCOUNT, we clamp rc_refcount.
462 	 */
463 	descr = xchk_xfile_ag_descr(sc, "rmap record bag");
464 	error = xfarray_create(descr, 0, sizeof(struct xrep_refc_rmap),
465 			&rmap_bag);
466 	kfree(descr);
467 	if (error)
468 		goto out_cur;
469 
470 	/* Start the rmapbt cursor to the left of all records. */
471 	error = xfs_btree_goto_left_edge(sc->sa.rmap_cur);
472 	if (error)
473 		goto out_bag;
474 
475 	/* Process reverse mappings into refcount data. */
476 	while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
477 		/* Push all rmaps with pblk == sbno onto the stack */
478 		error = xrep_refc_walk_rmaps(rr, &rrm, &have);
479 		if (error)
480 			goto out_bag;
481 		if (!have)
482 			break;
483 		sbno = cbno = rrm.startblock;
484 		error = xrep_refc_push_rmaps_at(rr, rmap_bag, sbno,
485 					&rrm, &have, &stack_sz);
486 		if (error)
487 			goto out_bag;
488 
489 		/* Set nbno to the bno of the next refcount change */
490 		error = xrep_refc_next_edge(rmap_bag, &rrm, have, &nbno);
491 		if (error)
492 			goto out_bag;
493 
494 		ASSERT(nbno > sbno);
495 		old_stack_sz = stack_sz;
496 
497 		/* While stack isn't empty... */
498 		while (stack_sz) {
499 			xfarray_idx_t	array_cur = XFARRAY_CURSOR_INIT;
500 
501 			/* Pop all rmaps that end at nbno */
502 			while ((error = xfarray_iter(rmap_bag, &array_cur,
503 								&rrm)) == 1) {
504 				if (RRM_NEXT(rrm) != nbno)
505 					continue;
506 				error = xfarray_unset(rmap_bag, array_cur - 1);
507 				if (error)
508 					goto out_bag;
509 				stack_sz--;
510 			}
511 			if (error)
512 				goto out_bag;
513 
514 			/* Push array items that start at nbno */
515 			error = xrep_refc_walk_rmaps(rr, &rrm, &have);
516 			if (error)
517 				goto out_bag;
518 			if (have) {
519 				error = xrep_refc_push_rmaps_at(rr, rmap_bag,
520 						nbno, &rrm, &have, &stack_sz);
521 				if (error)
522 					goto out_bag;
523 			}
524 
525 			/* Emit refcount if necessary */
526 			ASSERT(nbno > cbno);
527 			if (stack_sz != old_stack_sz) {
528 				if (old_stack_sz > 1) {
529 					error = xrep_refc_stash(rr,
530 							XFS_REFC_DOMAIN_SHARED,
531 							cbno, nbno - cbno,
532 							old_stack_sz);
533 					if (error)
534 						goto out_bag;
535 				}
536 				cbno = nbno;
537 			}
538 
539 			/* Stack empty, go find the next rmap */
540 			if (stack_sz == 0)
541 				break;
542 			old_stack_sz = stack_sz;
543 			sbno = nbno;
544 
545 			/* Set nbno to the bno of the next refcount change */
546 			error = xrep_refc_next_edge(rmap_bag, &rrm, have,
547 					&nbno);
548 			if (error)
549 				goto out_bag;
550 
551 			ASSERT(nbno > sbno);
552 		}
553 	}
554 
555 	ASSERT(stack_sz == 0);
556 out_bag:
557 	xfarray_destroy(rmap_bag);
558 out_cur:
559 	xchk_ag_btcur_free(&sc->sa);
560 	return error;
561 }
562 #undef RRM_NEXT
563 
564 /* Retrieve refcountbt data for bulk load. */
565 STATIC int
566 xrep_refc_get_records(
567 	struct xfs_btree_cur		*cur,
568 	unsigned int			idx,
569 	struct xfs_btree_block		*block,
570 	unsigned int			nr_wanted,
571 	void				*priv)
572 {
573 	struct xfs_refcount_irec	*irec = &cur->bc_rec.rc;
574 	struct xrep_refc		*rr = priv;
575 	union xfs_btree_rec		*block_rec;
576 	unsigned int			loaded;
577 	int				error;
578 
579 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
580 		error = xfarray_load(rr->refcount_records, rr->array_cur++,
581 				irec);
582 		if (error)
583 			return error;
584 
585 		block_rec = xfs_btree_rec_addr(cur, idx, block);
586 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
587 	}
588 
589 	return loaded;
590 }
591 
592 /* Feed one of the new btree blocks to the bulk loader. */
593 STATIC int
594 xrep_refc_claim_block(
595 	struct xfs_btree_cur	*cur,
596 	union xfs_btree_ptr	*ptr,
597 	void			*priv)
598 {
599 	struct xrep_refc        *rr = priv;
600 
601 	return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
602 }
603 
604 /* Update the AGF counters. */
605 STATIC int
606 xrep_refc_reset_counters(
607 	struct xrep_refc	*rr)
608 {
609 	struct xfs_scrub	*sc = rr->sc;
610 	struct xfs_perag	*pag = sc->sa.pag;
611 
612 	/*
613 	 * After we commit the new btree to disk, it is possible that the
614 	 * process to reap the old btree blocks will race with the AIL trying
615 	 * to checkpoint the old btree blocks into the filesystem.  If the new
616 	 * tree is shorter than the old one, the refcountbt write verifier will
617 	 * fail and the AIL will shut down the filesystem.
618 	 *
619 	 * To avoid this, save the old incore btree height values as the alt
620 	 * height values before re-initializing the perag info from the updated
621 	 * AGF to capture all the new values.
622 	 */
623 	pag->pagf_repair_refcount_level = pag->pagf_refcount_level;
624 
625 	/* Reinitialize with the values we just logged. */
626 	return xrep_reinit_pagf(sc);
627 }
628 
629 /*
630  * Use the collected refcount information to stage a new refcount btree.  If
631  * this is successful we'll return with the new btree root information logged
632  * to the repair transaction but not yet committed.
633  */
634 STATIC int
635 xrep_refc_build_new_tree(
636 	struct xrep_refc	*rr)
637 {
638 	struct xfs_scrub	*sc = rr->sc;
639 	struct xfs_btree_cur	*refc_cur;
640 	struct xfs_perag	*pag = sc->sa.pag;
641 	xfs_fsblock_t		fsbno;
642 	int			error;
643 
644 	error = xrep_refc_sort_records(rr);
645 	if (error)
646 		return error;
647 
648 	/*
649 	 * Prepare to construct the new btree by reserving disk space for the
650 	 * new btree and setting up all the accounting information we'll need
651 	 * to root the new btree while it's under construction and before we
652 	 * attach it to the AG header.
653 	 */
654 	fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
655 	xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
656 			XFS_AG_RESV_METADATA);
657 	rr->new_btree.bload.get_records = xrep_refc_get_records;
658 	rr->new_btree.bload.claim_block = xrep_refc_claim_block;
659 
660 	/* Compute how many blocks we'll need. */
661 	refc_cur = xfs_refcountbt_init_cursor(sc->mp, NULL, NULL, pag);
662 	xfs_btree_stage_afakeroot(refc_cur, &rr->new_btree.afake);
663 	error = xfs_btree_bload_compute_geometry(refc_cur,
664 			&rr->new_btree.bload,
665 			xfarray_length(rr->refcount_records));
666 	if (error)
667 		goto err_cur;
668 
669 	/* Last chance to abort before we start committing fixes. */
670 	if (xchk_should_terminate(sc, &error))
671 		goto err_cur;
672 
673 	/* Reserve the space we'll need for the new btree. */
674 	error = xrep_newbt_alloc_blocks(&rr->new_btree,
675 			rr->new_btree.bload.nr_blocks);
676 	if (error)
677 		goto err_cur;
678 
679 	/*
680 	 * Due to btree slack factors, it's possible for a new btree to be one
681 	 * level taller than the old btree.  Update the incore btree height so
682 	 * that we don't trip the verifiers when writing the new btree blocks
683 	 * to disk.
684 	 */
685 	pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height;
686 
687 	/* Add all observed refcount records. */
688 	rr->array_cur = XFARRAY_CURSOR_INIT;
689 	error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
690 	if (error)
691 		goto err_level;
692 
693 	/*
694 	 * Install the new btree in the AG header.  After this point the old
695 	 * btree is no longer accessible and the new tree is live.
696 	 */
697 	xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp);
698 	xfs_btree_del_cursor(refc_cur, 0);
699 
700 	/* Reset the AGF counters now that we've changed the btree shape. */
701 	error = xrep_refc_reset_counters(rr);
702 	if (error)
703 		goto err_newbt;
704 
705 	/* Dispose of any unused blocks and the accounting information. */
706 	error = xrep_newbt_commit(&rr->new_btree);
707 	if (error)
708 		return error;
709 
710 	return xrep_roll_ag_trans(sc);
711 
712 err_level:
713 	pag->pagf_repair_refcount_level = 0;
714 err_cur:
715 	xfs_btree_del_cursor(refc_cur, error);
716 err_newbt:
717 	xrep_newbt_cancel(&rr->new_btree);
718 	return error;
719 }
720 
721 /*
722  * Now that we've logged the roots of the new btrees, invalidate all of the
723  * old blocks and free them.
724  */
725 STATIC int
726 xrep_refc_remove_old_tree(
727 	struct xrep_refc	*rr)
728 {
729 	struct xfs_scrub	*sc = rr->sc;
730 	struct xfs_perag	*pag = sc->sa.pag;
731 	int			error;
732 
733 	/* Free the old refcountbt blocks if they're not in use. */
734 	error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks,
735 			&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
736 	if (error)
737 		return error;
738 
739 	/*
740 	 * Now that we've zapped all the old refcountbt blocks we can turn off
741 	 * the alternate height mechanism and reset the per-AG space
742 	 * reservations.
743 	 */
744 	pag->pagf_repair_refcount_level = 0;
745 	sc->flags |= XREP_RESET_PERAG_RESV;
746 	return 0;
747 }
748 
749 /* Rebuild the refcount btree. */
750 int
751 xrep_refcountbt(
752 	struct xfs_scrub	*sc)
753 {
754 	struct xrep_refc	*rr;
755 	struct xfs_mount	*mp = sc->mp;
756 	char			*descr;
757 	int			error;
758 
759 	/* We require the rmapbt to rebuild anything. */
760 	if (!xfs_has_rmapbt(mp))
761 		return -EOPNOTSUPP;
762 
763 	rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS);
764 	if (!rr)
765 		return -ENOMEM;
766 	rr->sc = sc;
767 
768 	/* Set up enough storage to handle one refcount record per block. */
769 	descr = xchk_xfile_ag_descr(sc, "reference count records");
770 	error = xfarray_create(descr, mp->m_sb.sb_agblocks,
771 			sizeof(struct xfs_refcount_irec),
772 			&rr->refcount_records);
773 	kfree(descr);
774 	if (error)
775 		goto out_rr;
776 
777 	/* Collect all reference counts. */
778 	xagb_bitmap_init(&rr->old_refcountbt_blocks);
779 	error = xrep_refc_find_refcounts(rr);
780 	if (error)
781 		goto out_bitmap;
782 
783 	/* Rebuild the refcount information. */
784 	error = xrep_refc_build_new_tree(rr);
785 	if (error)
786 		goto out_bitmap;
787 
788 	/* Kill the old tree. */
789 	error = xrep_refc_remove_old_tree(rr);
790 	if (error)
791 		goto out_bitmap;
792 
793 out_bitmap:
794 	xagb_bitmap_destroy(&rr->old_refcountbt_blocks);
795 	xfarray_destroy(rr->refcount_records);
796 out_rr:
797 	kfree(rr);
798 	return error;
799 }
800