xref: /linux/fs/xfs/scrub/rtrefcount_repair.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_alloc.h"
20 #include "xfs_ialloc.h"
21 #include "xfs_rmap.h"
22 #include "xfs_rmap_btree.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_refcount.h"
25 #include "xfs_rtrefcount_btree.h"
26 #include "xfs_error.h"
27 #include "xfs_health.h"
28 #include "xfs_inode.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_rtbitmap.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/newbt.h"
45 #include "scrub/reap.h"
46 #include "scrub/rcbag.h"
47 
48 /*
49  * Rebuilding the Reference Count Btree
50  * ====================================
51  *
52  * This algorithm is "borrowed" from xfs_repair.  Imagine the rmap
53  * entries as rectangles representing extents of physical blocks, and
54  * that the rectangles can be laid down to allow them to overlap each
55  * other; then we know that we must emit a refcnt btree entry wherever
56  * the amount of overlap changes, i.e. the emission stimulus is
57  * level-triggered:
58  *
59  *                 -    ---
60  *       --      ----- ----   ---        ------
61  * --   ----     ----------- ----     ---------
62  * -------------------------------- -----------
63  * ^ ^  ^^ ^^    ^ ^^ ^^^  ^^^^  ^ ^^ ^  ^     ^
64  * 2 1  23 21    3 43 234  2123  1 01 2  3     0
65  *
66  * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
67  *
68  * Note that in the actual refcnt btree we don't store the refcount < 2
69  * cases because the bnobt tells us which blocks are free; single-use
70  * blocks aren't recorded in the bnobt or the refcntbt.  If the rmapbt
71  * supports storing multiple entries covering a given block we could
72  * theoretically dispense with the refcntbt and simply count rmaps, but
73  * that's inefficient in the (hot) write path, so we'll take the cost of
74  * the extra tree to save time.  Also there's no guarantee that rmap
75  * will be enabled.
76  *
77  * Given an array of rmaps sorted by physical block number, a starting
78  * physical block (sp), a bag to hold rmaps that cover sp, and the next
79  * physical block where the level changes (np), we can reconstruct the
80  * rt refcount btree as follows:
81  *
82  * While there are still unprocessed rmaps in the array,
83  *  - Set sp to the physical block (pblk) of the next unprocessed rmap.
84  *  - Add to the bag all rmaps in the array where startblock == sp.
85  *  - Set np to the physical block where the bag size will change.  This
86  *    is the minimum of (the pblk of the next unprocessed rmap) and
87  *    (startblock + len of each rmap in the bag).
88  *  - Record the bag size as old_bag_size.
89  *
90  *  - While the bag isn't empty,
91  *     - Remove from the bag all rmaps where startblock + len == np.
92  *     - Add to the bag all rmaps in the array where startblock == np.
93  *     - If the bag size isn't old_bag_size, store the refcount entry
94  *       (sp, np - sp, bag_size) in the refcnt btree.
95  *     - If the bag is empty, break out of the inner loop.
96  *     - Set old_bag_size to the bag size
97  *     - Set sp = np.
98  *     - Set np to the physical block where the bag size will change.
99  *       This is the minimum of (the pblk of the next unprocessed rmap)
100  *       and (startblock + len of each rmap in the bag).
101  *
102  * Like all the other repairers, we make a list of all the refcount
103  * records we need, then reinitialize the rt refcount btree root and
104  * insert all the records.
105  */
106 
107 struct xrep_rtrefc {
108 	/* refcount extents */
109 	struct xfarray		*refcount_records;
110 
111 	/* new refcountbt information */
112 	struct xrep_newbt	new_btree;
113 
114 	/* old refcountbt blocks */
115 	struct xfsb_bitmap	old_rtrefcountbt_blocks;
116 
117 	struct xfs_scrub	*sc;
118 
119 	/* get_records()'s position in the rt refcount record array. */
120 	xfarray_idx_t		array_cur;
121 
122 	/* # of refcountbt blocks */
123 	xfs_filblks_t		btblocks;
124 };
125 
126 /* Set us up to repair refcount btrees. */
127 int
128 xrep_setup_rtrefcountbt(
129 	struct xfs_scrub	*sc)
130 {
131 	return xrep_setup_xfbtree(sc, "realtime rmap record bag");
132 }
133 
134 /* Check for any obvious conflicts with this shared/CoW staging extent. */
135 STATIC int
136 xrep_rtrefc_check_ext(
137 	struct xfs_scrub		*sc,
138 	const struct xfs_refcount_irec	*rec)
139 {
140 	xfs_rgblock_t			last;
141 
142 	if (xfs_rtrefcount_check_irec(sc->sr.rtg, rec) != NULL)
143 		return -EFSCORRUPTED;
144 
145 	if (xfs_rgbno_to_rtxoff(sc->mp, rec->rc_startblock) != 0)
146 		return -EFSCORRUPTED;
147 
148 	last = rec->rc_startblock + rec->rc_blockcount - 1;
149 	if (xfs_rgbno_to_rtxoff(sc->mp, last) != sc->mp->m_sb.sb_rextsize - 1)
150 		return -EFSCORRUPTED;
151 
152 	/* Make sure this isn't free space or misaligned. */
153 	return xrep_require_rtext_inuse(sc, rec->rc_startblock,
154 			rec->rc_blockcount);
155 }
156 
157 /* Record a reference count extent. */
158 STATIC int
159 xrep_rtrefc_stash(
160 	struct xrep_rtrefc		*rr,
161 	enum xfs_refc_domain		domain,
162 	xfs_rgblock_t			bno,
163 	xfs_extlen_t			len,
164 	uint64_t			refcount)
165 {
166 	struct xfs_refcount_irec	irec = {
167 		.rc_startblock		= bno,
168 		.rc_blockcount		= len,
169 		.rc_refcount		= refcount,
170 		.rc_domain		= domain,
171 	};
172 	int				error = 0;
173 
174 	if (xchk_should_terminate(rr->sc, &error))
175 		return error;
176 
177 	irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
178 
179 	error = xrep_rtrefc_check_ext(rr->sc, &irec);
180 	if (error)
181 		return error;
182 
183 	trace_xrep_refc_found(rtg_group(rr->sc->sr.rtg), &irec);
184 
185 	return xfarray_append(rr->refcount_records, &irec);
186 }
187 
188 /* Record a CoW staging extent. */
189 STATIC int
190 xrep_rtrefc_stash_cow(
191 	struct xrep_rtrefc		*rr,
192 	xfs_rgblock_t			bno,
193 	xfs_extlen_t			len)
194 {
195 	return xrep_rtrefc_stash(rr, XFS_REFC_DOMAIN_COW, bno, len, 1);
196 }
197 
198 /* Decide if an rmap could describe a shared extent. */
199 static inline bool
200 xrep_rtrefc_rmap_shareable(
201 	const struct xfs_rmap_irec	*rmap)
202 {
203 	/* rt metadata are never sharable */
204 	if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
205 		return false;
206 
207 	/* Unwritten file blocks are not shareable. */
208 	if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
209 		return false;
210 
211 	return true;
212 }
213 
214 /* Grab the next (abbreviated) rmap record from the rmapbt. */
215 STATIC int
216 xrep_rtrefc_walk_rmaps(
217 	struct xrep_rtrefc	*rr,
218 	struct xfs_rmap_irec	*rmap,
219 	bool			*have_rec)
220 {
221 	struct xfs_btree_cur	*cur = rr->sc->sr.rmap_cur;
222 	struct xfs_mount	*mp = cur->bc_mp;
223 	int			have_gt;
224 	int			error = 0;
225 
226 	*have_rec = false;
227 
228 	/*
229 	 * Loop through the remaining rmaps.  Remember CoW staging
230 	 * extents and the refcountbt blocks from the old tree for later
231 	 * disposal.  We can only share written data fork extents, so
232 	 * keep looping until we find an rmap for one.
233 	 */
234 	do {
235 		if (xchk_should_terminate(rr->sc, &error))
236 			return error;
237 
238 		error = xfs_btree_increment(cur, 0, &have_gt);
239 		if (error)
240 			return error;
241 		if (!have_gt)
242 			return 0;
243 
244 		error = xfs_rmap_get_rec(cur, rmap, &have_gt);
245 		if (error)
246 			return error;
247 		if (XFS_IS_CORRUPT(mp, !have_gt)) {
248 			xfs_btree_mark_sick(cur);
249 			return -EFSCORRUPTED;
250 		}
251 
252 		if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
253 			error = xrep_rtrefc_stash_cow(rr, rmap->rm_startblock,
254 					rmap->rm_blockcount);
255 			if (error)
256 				return error;
257 		} else if (xfs_is_sb_inum(mp, rmap->rm_owner) ||
258 			   (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
259 					      XFS_RMAP_BMBT_BLOCK))) {
260 			xfs_btree_mark_sick(cur);
261 			return -EFSCORRUPTED;
262 		}
263 	} while (!xrep_rtrefc_rmap_shareable(rmap));
264 
265 	*have_rec = true;
266 	return 0;
267 }
268 
269 static inline uint32_t
270 xrep_rtrefc_encode_startblock(
271 	const struct xfs_refcount_irec	*irec)
272 {
273 	uint32_t			start;
274 
275 	start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
276 	if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
277 		start |= XFS_REFC_COWFLAG;
278 
279 	return start;
280 }
281 
282 /*
283  * Compare two refcount records.  We want to sort in order of increasing block
284  * number.
285  */
286 static int
287 xrep_rtrefc_extent_cmp(
288 	const void			*a,
289 	const void			*b)
290 {
291 	const struct xfs_refcount_irec	*ap = a;
292 	const struct xfs_refcount_irec	*bp = b;
293 	uint32_t			sa, sb;
294 
295 	sa = xrep_rtrefc_encode_startblock(ap);
296 	sb = xrep_rtrefc_encode_startblock(bp);
297 
298 	if (sa > sb)
299 		return 1;
300 	if (sa < sb)
301 		return -1;
302 	return 0;
303 }
304 
305 /*
306  * Sort the refcount extents by startblock or else the btree records will be in
307  * the wrong order.  Make sure the records do not overlap in physical space.
308  */
309 STATIC int
310 xrep_rtrefc_sort_records(
311 	struct xrep_rtrefc		*rr)
312 {
313 	struct xfs_refcount_irec	irec;
314 	xfarray_idx_t			cur;
315 	enum xfs_refc_domain		dom = XFS_REFC_DOMAIN_SHARED;
316 	xfs_rgblock_t			next_rgbno = 0;
317 	int				error;
318 
319 	error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp,
320 			XFARRAY_SORT_KILLABLE);
321 	if (error)
322 		return error;
323 
324 	foreach_xfarray_idx(rr->refcount_records, cur) {
325 		if (xchk_should_terminate(rr->sc, &error))
326 			return error;
327 
328 		error = xfarray_load(rr->refcount_records, cur, &irec);
329 		if (error)
330 			return error;
331 
332 		if (dom == XFS_REFC_DOMAIN_SHARED &&
333 		    irec.rc_domain == XFS_REFC_DOMAIN_COW) {
334 			dom = irec.rc_domain;
335 			next_rgbno = 0;
336 		}
337 
338 		if (dom != irec.rc_domain)
339 			return -EFSCORRUPTED;
340 		if (irec.rc_startblock < next_rgbno)
341 			return -EFSCORRUPTED;
342 
343 		next_rgbno = irec.rc_startblock + irec.rc_blockcount;
344 	}
345 
346 	return error;
347 }
348 
349 /* Record extents that belong to the realtime refcount inode. */
350 STATIC int
351 xrep_rtrefc_walk_rmap(
352 	struct xfs_btree_cur		*cur,
353 	const struct xfs_rmap_irec	*rec,
354 	void				*priv)
355 {
356 	struct xrep_rtrefc		*rr = priv;
357 	int				error = 0;
358 
359 	if (xchk_should_terminate(rr->sc, &error))
360 		return error;
361 
362 	/* Skip extents which are not owned by this inode and fork. */
363 	if (rec->rm_owner != rr->sc->ip->i_ino)
364 		return 0;
365 
366 	error = xrep_check_ino_btree_mapping(rr->sc, rec);
367 	if (error)
368 		return error;
369 
370 	return xfsb_bitmap_set(&rr->old_rtrefcountbt_blocks,
371 			xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
372 			rec->rm_blockcount);
373 }
374 
375 /*
376  * Walk forward through the rmap btree to collect all rmaps starting at
377  * @bno in @rmap_bag.  These represent the file(s) that share ownership of
378  * the current block.  Upon return, the rmap cursor points to the last record
379  * satisfying the startblock constraint.
380  */
381 static int
382 xrep_rtrefc_push_rmaps_at(
383 	struct xrep_rtrefc	*rr,
384 	struct rcbag		*rcstack,
385 	xfs_rgblock_t		bno,
386 	struct xfs_rmap_irec	*rmap,
387 	bool			*have)
388 {
389 	struct xfs_scrub	*sc = rr->sc;
390 	int			have_gt;
391 	int			error;
392 
393 	while (*have && rmap->rm_startblock == bno) {
394 		error = rcbag_add(rcstack, rr->sc->tp, rmap);
395 		if (error)
396 			return error;
397 
398 		error = xrep_rtrefc_walk_rmaps(rr, rmap, have);
399 		if (error)
400 			return error;
401 	}
402 
403 	error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
404 	if (error)
405 		return error;
406 	if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
407 		xfs_btree_mark_sick(sc->sr.rmap_cur);
408 		return -EFSCORRUPTED;
409 	}
410 
411 	return 0;
412 }
413 
414 /* Scan one AG for reverse mappings for the realtime refcount btree. */
415 STATIC int
416 xrep_rtrefc_scan_ag(
417 	struct xrep_rtrefc	*rr,
418 	struct xfs_perag	*pag)
419 {
420 	struct xfs_scrub	*sc = rr->sc;
421 	int			error;
422 
423 	error = xrep_ag_init(sc, pag, &sc->sa);
424 	if (error)
425 		return error;
426 
427 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
428 	xchk_ag_free(sc, &sc->sa);
429 	return error;
430 }
431 
432 /* Iterate all the rmap records to generate reference count data. */
433 STATIC int
434 xrep_rtrefc_find_refcounts(
435 	struct xrep_rtrefc	*rr)
436 {
437 	struct xfs_scrub	*sc = rr->sc;
438 	struct rcbag		*rcstack;
439 	struct xfs_perag	*pag = NULL;
440 	uint64_t		old_stack_height;
441 	xfs_rgblock_t		sbno;
442 	xfs_rgblock_t		cbno;
443 	xfs_rgblock_t		nbno;
444 	bool			have;
445 	int			error;
446 
447 	/* Scan for old rtrefc btree blocks. */
448 	while ((pag = xfs_perag_next(sc->mp, pag))) {
449 		error = xrep_rtrefc_scan_ag(rr, pag);
450 		if (error) {
451 			xfs_perag_rele(pag);
452 			return error;
453 		}
454 	}
455 
456 	xrep_rtgroup_btcur_init(sc, &sc->sr);
457 
458 	/*
459 	 * Set up a bag to store all the rmap records that we're tracking to
460 	 * generate a reference count record.  If this exceeds
461 	 * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
462 	 */
463 	error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
464 	if (error)
465 		goto out_cur;
466 
467 	/* Start the rtrmapbt cursor to the left of all records. */
468 	error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
469 	if (error)
470 		goto out_bag;
471 
472 	/* Process reverse mappings into refcount data. */
473 	while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
474 		struct xfs_rmap_irec	rmap;
475 
476 		/* Push all rmaps with pblk == sbno onto the stack */
477 		error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
478 		if (error)
479 			goto out_bag;
480 		if (!have)
481 			break;
482 		sbno = cbno = rmap.rm_startblock;
483 		error = xrep_rtrefc_push_rmaps_at(rr, rcstack, sbno, &rmap,
484 				&have);
485 		if (error)
486 			goto out_bag;
487 
488 		/* Set nbno to the bno of the next refcount change */
489 		error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
490 		if (error)
491 			goto out_bag;
492 
493 		ASSERT(nbno > sbno);
494 		old_stack_height = rcbag_count(rcstack);
495 
496 		/* While stack isn't empty... */
497 		while (rcbag_count(rcstack) > 0) {
498 			/* Pop all rmaps that end at nbno */
499 			error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
500 			if (error)
501 				goto out_bag;
502 
503 			/* Push array items that start at nbno */
504 			error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
505 			if (error)
506 				goto out_bag;
507 			if (have) {
508 				error = xrep_rtrefc_push_rmaps_at(rr, rcstack,
509 						nbno, &rmap, &have);
510 				if (error)
511 					goto out_bag;
512 			}
513 
514 			/* Emit refcount if necessary */
515 			ASSERT(nbno > cbno);
516 			if (rcbag_count(rcstack) != old_stack_height) {
517 				if (old_stack_height > 1) {
518 					error = xrep_rtrefc_stash(rr,
519 							XFS_REFC_DOMAIN_SHARED,
520 							cbno, nbno - cbno,
521 							old_stack_height);
522 					if (error)
523 						goto out_bag;
524 				}
525 				cbno = nbno;
526 			}
527 
528 			/* Stack empty, go find the next rmap */
529 			if (rcbag_count(rcstack) == 0)
530 				break;
531 			old_stack_height = rcbag_count(rcstack);
532 			sbno = nbno;
533 
534 			/* Set nbno to the bno of the next refcount change */
535 			error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
536 					&nbno);
537 			if (error)
538 				goto out_bag;
539 
540 			ASSERT(nbno > sbno);
541 		}
542 	}
543 
544 	ASSERT(rcbag_count(rcstack) == 0);
545 out_bag:
546 	rcbag_free(&rcstack);
547 out_cur:
548 	xchk_rtgroup_btcur_free(&sc->sr);
549 	return error;
550 }
551 
552 /* Retrieve refcountbt data for bulk load. */
553 STATIC int
554 xrep_rtrefc_get_records(
555 	struct xfs_btree_cur		*cur,
556 	unsigned int			idx,
557 	struct xfs_btree_block		*block,
558 	unsigned int			nr_wanted,
559 	void				*priv)
560 {
561 	struct xrep_rtrefc		*rr = priv;
562 	union xfs_btree_rec		*block_rec;
563 	unsigned int			loaded;
564 	int				error;
565 
566 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
567 		error = xfarray_load(rr->refcount_records, rr->array_cur++,
568 				&cur->bc_rec.rc);
569 		if (error)
570 			return error;
571 
572 		block_rec = xfs_btree_rec_addr(cur, idx, block);
573 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
574 	}
575 
576 	return loaded;
577 }
578 
579 /* Feed one of the new btree blocks to the bulk loader. */
580 STATIC int
581 xrep_rtrefc_claim_block(
582 	struct xfs_btree_cur	*cur,
583 	union xfs_btree_ptr	*ptr,
584 	void			*priv)
585 {
586 	struct xrep_rtrefc	*rr = priv;
587 
588 	return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
589 }
590 
591 /* Figure out how much space we need to create the incore btree root block. */
592 STATIC size_t
593 xrep_rtrefc_iroot_size(
594 	struct xfs_btree_cur	*cur,
595 	unsigned int		level,
596 	unsigned int		nr_this_level,
597 	void			*priv)
598 {
599 	return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
600 			nr_this_level);
601 }
602 
603 /*
604  * Use the collected refcount information to stage a new rt refcount btree.  If
605  * this is successful we'll return with the new btree root information logged
606  * to the repair transaction but not yet committed.
607  */
608 STATIC int
609 xrep_rtrefc_build_new_tree(
610 	struct xrep_rtrefc	*rr)
611 {
612 	struct xfs_scrub	*sc = rr->sc;
613 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
614 	struct xfs_btree_cur	*refc_cur;
615 	int			error;
616 
617 	error = xrep_rtrefc_sort_records(rr);
618 	if (error)
619 		return error;
620 
621 	/*
622 	 * Prepare to construct the new btree by reserving disk space for the
623 	 * new btree and setting up all the accounting information we'll need
624 	 * to root the new btree while it's under construction and before we
625 	 * attach it to the realtime refcount inode.
626 	 */
627 	error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
628 	if (error)
629 		return error;
630 
631 	rr->new_btree.bload.get_records = xrep_rtrefc_get_records;
632 	rr->new_btree.bload.claim_block = xrep_rtrefc_claim_block;
633 	rr->new_btree.bload.iroot_size = xrep_rtrefc_iroot_size;
634 
635 	refc_cur = xfs_rtrefcountbt_init_cursor(NULL, rtg);
636 	xfs_btree_stage_ifakeroot(refc_cur, &rr->new_btree.ifake);
637 
638 	/* Compute how many blocks we'll need. */
639 	error = xfs_btree_bload_compute_geometry(refc_cur, &rr->new_btree.bload,
640 			xfarray_length(rr->refcount_records));
641 	if (error)
642 		goto err_cur;
643 
644 	/* Last chance to abort before we start committing fixes. */
645 	if (xchk_should_terminate(sc, &error))
646 		goto err_cur;
647 
648 	/*
649 	 * Guess how many blocks we're going to need to rebuild an entire
650 	 * rtrefcountbt from the number of extents we found, and pump up our
651 	 * transaction to have sufficient block reservation.  We're allowed
652 	 * to exceed quota to repair inconsistent metadata, though this is
653 	 * unlikely.
654 	 */
655 	error = xfs_trans_reserve_more_inode(sc->tp, rtg_refcount(rtg),
656 			rr->new_btree.bload.nr_blocks, 0, true);
657 	if (error)
658 		goto err_cur;
659 
660 	/* Reserve the space we'll need for the new btree. */
661 	error = xrep_newbt_alloc_blocks(&rr->new_btree,
662 			rr->new_btree.bload.nr_blocks);
663 	if (error)
664 		goto err_cur;
665 
666 	/* Add all observed refcount records. */
667 	rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
668 	rr->array_cur = XFARRAY_CURSOR_INIT;
669 	error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
670 	if (error)
671 		goto err_cur;
672 
673 	/*
674 	 * Install the new rtrefc btree in the inode.  After this point the old
675 	 * btree is no longer accessible, the new tree is live, and we can
676 	 * delete the cursor.
677 	 */
678 	xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
679 	xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
680 	xfs_btree_del_cursor(refc_cur, 0);
681 
682 	/* Dispose of any unused blocks and the accounting information. */
683 	error = xrep_newbt_commit(&rr->new_btree);
684 	if (error)
685 		return error;
686 
687 	return xrep_roll_trans(sc);
688 err_cur:
689 	xfs_btree_del_cursor(refc_cur, error);
690 	xrep_newbt_cancel(&rr->new_btree);
691 	return error;
692 }
693 
694 /* Rebuild the rt refcount btree. */
695 int
696 xrep_rtrefcountbt(
697 	struct xfs_scrub	*sc)
698 {
699 	struct xrep_rtrefc	*rr;
700 	struct xfs_mount	*mp = sc->mp;
701 	int			error;
702 
703 	/* We require the rmapbt to rebuild anything. */
704 	if (!xfs_has_rtrmapbt(mp))
705 		return -EOPNOTSUPP;
706 
707 	/* Make sure any problems with the fork are fixed. */
708 	error = xrep_metadata_inode_forks(sc);
709 	if (error)
710 		return error;
711 
712 	rr = kzalloc(sizeof(struct xrep_rtrefc), XCHK_GFP_FLAGS);
713 	if (!rr)
714 		return -ENOMEM;
715 	rr->sc = sc;
716 
717 	/* Set up enough storage to handle one refcount record per rt extent. */
718 	error = xfarray_create("realtime reference count records",
719 			mp->m_sb.sb_rextents, sizeof(struct xfs_refcount_irec),
720 			&rr->refcount_records);
721 	if (error)
722 		goto out_rr;
723 
724 	/* Collect all reference counts. */
725 	xfsb_bitmap_init(&rr->old_rtrefcountbt_blocks);
726 	error = xrep_rtrefc_find_refcounts(rr);
727 	if (error)
728 		goto out_bitmap;
729 
730 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
731 
732 	/* Rebuild the refcount information. */
733 	error = xrep_rtrefc_build_new_tree(rr);
734 	if (error)
735 		goto out_bitmap;
736 
737 	/*
738 	 * Free all the extents that were allocated to the former rtrefcountbt
739 	 * and aren't cross-linked with something else.
740 	 */
741 	error = xrep_reap_metadir_fsblocks(rr->sc,
742 			&rr->old_rtrefcountbt_blocks);
743 	if (error)
744 		goto out_bitmap;
745 
746 out_bitmap:
747 	xfsb_bitmap_destroy(&rr->old_rtrefcountbt_blocks);
748 	xfarray_destroy(rr->refcount_records);
749 out_rr:
750 	kfree(rr);
751 	return error;
752 }
753