xref: /linux/fs/xfs/scrub/rtrefcount_repair.c (revision e814f3fd16acfb7f9966773953de8f740a1e3202)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_alloc.h"
20 #include "xfs_ialloc.h"
21 #include "xfs_rmap.h"
22 #include "xfs_rmap_btree.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_refcount.h"
25 #include "xfs_rtrefcount_btree.h"
26 #include "xfs_error.h"
27 #include "xfs_health.h"
28 #include "xfs_inode.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_rtbitmap.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/newbt.h"
45 #include "scrub/reap.h"
46 #include "scrub/rcbag.h"
47 
48 /*
49  * Rebuilding the Reference Count Btree
50  * ====================================
51  *
52  * This algorithm is "borrowed" from xfs_repair.  Imagine the rmap
53  * entries as rectangles representing extents of physical blocks, and
54  * that the rectangles can be laid down to allow them to overlap each
55  * other; then we know that we must emit a refcnt btree entry wherever
56  * the amount of overlap changes, i.e. the emission stimulus is
57  * level-triggered:
58  *
59  *                 -    ---
60  *       --      ----- ----   ---        ------
61  * --   ----     ----------- ----     ---------
62  * -------------------------------- -----------
63  * ^ ^  ^^ ^^    ^ ^^ ^^^  ^^^^  ^ ^^ ^  ^     ^
64  * 2 1  23 21    3 43 234  2123  1 01 2  3     0
65  *
66  * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
67  *
68  * Note that in the actual refcnt btree we don't store the refcount < 2
69  * cases because the bnobt tells us which blocks are free; single-use
70  * blocks aren't recorded in the bnobt or the refcntbt.  If the rmapbt
71  * supports storing multiple entries covering a given block we could
72  * theoretically dispense with the refcntbt and simply count rmaps, but
73  * that's inefficient in the (hot) write path, so we'll take the cost of
74  * the extra tree to save time.  Also there's no guarantee that rmap
75  * will be enabled.
76  *
77  * Given an array of rmaps sorted by physical block number, a starting
78  * physical block (sp), a bag to hold rmaps that cover sp, and the next
79  * physical block where the level changes (np), we can reconstruct the
80  * rt refcount btree as follows:
81  *
82  * While there are still unprocessed rmaps in the array,
83  *  - Set sp to the physical block (pblk) of the next unprocessed rmap.
84  *  - Add to the bag all rmaps in the array where startblock == sp.
85  *  - Set np to the physical block where the bag size will change.  This
86  *    is the minimum of (the pblk of the next unprocessed rmap) and
87  *    (startblock + len of each rmap in the bag).
88  *  - Record the bag size as old_bag_size.
89  *
90  *  - While the bag isn't empty,
91  *     - Remove from the bag all rmaps where startblock + len == np.
92  *     - Add to the bag all rmaps in the array where startblock == np.
93  *     - If the bag size isn't old_bag_size, store the refcount entry
94  *       (sp, np - sp, bag_size) in the refcnt btree.
95  *     - If the bag is empty, break out of the inner loop.
96  *     - Set old_bag_size to the bag size
97  *     - Set sp = np.
98  *     - Set np to the physical block where the bag size will change.
99  *       This is the minimum of (the pblk of the next unprocessed rmap)
100  *       and (startblock + len of each rmap in the bag).
101  *
102  * Like all the other repairers, we make a list of all the refcount
103  * records we need, then reinitialize the rt refcount btree root and
104  * insert all the records.
105  */
106 
107 struct xrep_rtrefc {
108 	/* refcount extents */
109 	struct xfarray		*refcount_records;
110 
111 	/* new refcountbt information */
112 	struct xrep_newbt	new_btree;
113 
114 	/* old refcountbt blocks */
115 	struct xfsb_bitmap	old_rtrefcountbt_blocks;
116 
117 	struct xfs_scrub	*sc;
118 
119 	/* get_records()'s position in the rt refcount record array. */
120 	xfarray_idx_t		array_cur;
121 
122 	/* # of refcountbt blocks */
123 	xfs_filblks_t		btblocks;
124 };
125 
126 /* Set us up to repair refcount btrees. */
127 int
128 xrep_setup_rtrefcountbt(
129 	struct xfs_scrub	*sc)
130 {
131 	char			*descr;
132 	int			error;
133 
134 	descr = xchk_xfile_ag_descr(sc, "rmap record bag");
135 	error = xrep_setup_xfbtree(sc, descr);
136 	kfree(descr);
137 	return error;
138 }
139 
140 /* Check for any obvious conflicts with this shared/CoW staging extent. */
141 STATIC int
142 xrep_rtrefc_check_ext(
143 	struct xfs_scrub		*sc,
144 	const struct xfs_refcount_irec	*rec)
145 {
146 	xfs_rgblock_t			last;
147 
148 	if (xfs_rtrefcount_check_irec(sc->sr.rtg, rec) != NULL)
149 		return -EFSCORRUPTED;
150 
151 	if (xfs_rgbno_to_rtxoff(sc->mp, rec->rc_startblock) != 0)
152 		return -EFSCORRUPTED;
153 
154 	last = rec->rc_startblock + rec->rc_blockcount - 1;
155 	if (xfs_rgbno_to_rtxoff(sc->mp, last) != sc->mp->m_sb.sb_rextsize - 1)
156 		return -EFSCORRUPTED;
157 
158 	/* Make sure this isn't free space or misaligned. */
159 	return xrep_require_rtext_inuse(sc, rec->rc_startblock,
160 			rec->rc_blockcount);
161 }
162 
163 /* Record a reference count extent. */
164 STATIC int
165 xrep_rtrefc_stash(
166 	struct xrep_rtrefc		*rr,
167 	enum xfs_refc_domain		domain,
168 	xfs_rgblock_t			bno,
169 	xfs_extlen_t			len,
170 	uint64_t			refcount)
171 {
172 	struct xfs_refcount_irec	irec = {
173 		.rc_startblock		= bno,
174 		.rc_blockcount		= len,
175 		.rc_refcount		= refcount,
176 		.rc_domain		= domain,
177 	};
178 	int				error = 0;
179 
180 	if (xchk_should_terminate(rr->sc, &error))
181 		return error;
182 
183 	irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
184 
185 	error = xrep_rtrefc_check_ext(rr->sc, &irec);
186 	if (error)
187 		return error;
188 
189 	trace_xrep_refc_found(rtg_group(rr->sc->sr.rtg), &irec);
190 
191 	return xfarray_append(rr->refcount_records, &irec);
192 }
193 
194 /* Record a CoW staging extent. */
195 STATIC int
196 xrep_rtrefc_stash_cow(
197 	struct xrep_rtrefc		*rr,
198 	xfs_rgblock_t			bno,
199 	xfs_extlen_t			len)
200 {
201 	return xrep_rtrefc_stash(rr, XFS_REFC_DOMAIN_COW, bno, len, 1);
202 }
203 
204 /* Decide if an rmap could describe a shared extent. */
205 static inline bool
206 xrep_rtrefc_rmap_shareable(
207 	const struct xfs_rmap_irec	*rmap)
208 {
209 	/* rt metadata are never sharable */
210 	if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
211 		return false;
212 
213 	/* Unwritten file blocks are not shareable. */
214 	if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
215 		return false;
216 
217 	return true;
218 }
219 
220 /* Grab the next (abbreviated) rmap record from the rmapbt. */
221 STATIC int
222 xrep_rtrefc_walk_rmaps(
223 	struct xrep_rtrefc	*rr,
224 	struct xfs_rmap_irec	*rmap,
225 	bool			*have_rec)
226 {
227 	struct xfs_btree_cur	*cur = rr->sc->sr.rmap_cur;
228 	struct xfs_mount	*mp = cur->bc_mp;
229 	int			have_gt;
230 	int			error = 0;
231 
232 	*have_rec = false;
233 
234 	/*
235 	 * Loop through the remaining rmaps.  Remember CoW staging
236 	 * extents and the refcountbt blocks from the old tree for later
237 	 * disposal.  We can only share written data fork extents, so
238 	 * keep looping until we find an rmap for one.
239 	 */
240 	do {
241 		if (xchk_should_terminate(rr->sc, &error))
242 			return error;
243 
244 		error = xfs_btree_increment(cur, 0, &have_gt);
245 		if (error)
246 			return error;
247 		if (!have_gt)
248 			return 0;
249 
250 		error = xfs_rmap_get_rec(cur, rmap, &have_gt);
251 		if (error)
252 			return error;
253 		if (XFS_IS_CORRUPT(mp, !have_gt)) {
254 			xfs_btree_mark_sick(cur);
255 			return -EFSCORRUPTED;
256 		}
257 
258 		if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
259 			error = xrep_rtrefc_stash_cow(rr, rmap->rm_startblock,
260 					rmap->rm_blockcount);
261 			if (error)
262 				return error;
263 		} else if (xfs_is_sb_inum(mp, rmap->rm_owner) ||
264 			   (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
265 					      XFS_RMAP_BMBT_BLOCK))) {
266 			xfs_btree_mark_sick(cur);
267 			return -EFSCORRUPTED;
268 		}
269 	} while (!xrep_rtrefc_rmap_shareable(rmap));
270 
271 	*have_rec = true;
272 	return 0;
273 }
274 
275 static inline uint32_t
276 xrep_rtrefc_encode_startblock(
277 	const struct xfs_refcount_irec	*irec)
278 {
279 	uint32_t			start;
280 
281 	start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
282 	if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
283 		start |= XFS_REFC_COWFLAG;
284 
285 	return start;
286 }
287 
288 /*
289  * Compare two refcount records.  We want to sort in order of increasing block
290  * number.
291  */
292 static int
293 xrep_rtrefc_extent_cmp(
294 	const void			*a,
295 	const void			*b)
296 {
297 	const struct xfs_refcount_irec	*ap = a;
298 	const struct xfs_refcount_irec	*bp = b;
299 	uint32_t			sa, sb;
300 
301 	sa = xrep_rtrefc_encode_startblock(ap);
302 	sb = xrep_rtrefc_encode_startblock(bp);
303 
304 	if (sa > sb)
305 		return 1;
306 	if (sa < sb)
307 		return -1;
308 	return 0;
309 }
310 
311 /*
312  * Sort the refcount extents by startblock or else the btree records will be in
313  * the wrong order.  Make sure the records do not overlap in physical space.
314  */
315 STATIC int
316 xrep_rtrefc_sort_records(
317 	struct xrep_rtrefc		*rr)
318 {
319 	struct xfs_refcount_irec	irec;
320 	xfarray_idx_t			cur;
321 	enum xfs_refc_domain		dom = XFS_REFC_DOMAIN_SHARED;
322 	xfs_rgblock_t			next_rgbno = 0;
323 	int				error;
324 
325 	error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp,
326 			XFARRAY_SORT_KILLABLE);
327 	if (error)
328 		return error;
329 
330 	foreach_xfarray_idx(rr->refcount_records, cur) {
331 		if (xchk_should_terminate(rr->sc, &error))
332 			return error;
333 
334 		error = xfarray_load(rr->refcount_records, cur, &irec);
335 		if (error)
336 			return error;
337 
338 		if (dom == XFS_REFC_DOMAIN_SHARED &&
339 		    irec.rc_domain == XFS_REFC_DOMAIN_COW) {
340 			dom = irec.rc_domain;
341 			next_rgbno = 0;
342 		}
343 
344 		if (dom != irec.rc_domain)
345 			return -EFSCORRUPTED;
346 		if (irec.rc_startblock < next_rgbno)
347 			return -EFSCORRUPTED;
348 
349 		next_rgbno = irec.rc_startblock + irec.rc_blockcount;
350 	}
351 
352 	return error;
353 }
354 
355 /* Record extents that belong to the realtime refcount inode. */
356 STATIC int
357 xrep_rtrefc_walk_rmap(
358 	struct xfs_btree_cur		*cur,
359 	const struct xfs_rmap_irec	*rec,
360 	void				*priv)
361 {
362 	struct xrep_rtrefc		*rr = priv;
363 	int				error = 0;
364 
365 	if (xchk_should_terminate(rr->sc, &error))
366 		return error;
367 
368 	/* Skip extents which are not owned by this inode and fork. */
369 	if (rec->rm_owner != rr->sc->ip->i_ino)
370 		return 0;
371 
372 	error = xrep_check_ino_btree_mapping(rr->sc, rec);
373 	if (error)
374 		return error;
375 
376 	return xfsb_bitmap_set(&rr->old_rtrefcountbt_blocks,
377 			xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
378 			rec->rm_blockcount);
379 }
380 
381 /*
382  * Walk forward through the rmap btree to collect all rmaps starting at
383  * @bno in @rmap_bag.  These represent the file(s) that share ownership of
384  * the current block.  Upon return, the rmap cursor points to the last record
385  * satisfying the startblock constraint.
386  */
387 static int
388 xrep_rtrefc_push_rmaps_at(
389 	struct xrep_rtrefc	*rr,
390 	struct rcbag		*rcstack,
391 	xfs_rgblock_t		bno,
392 	struct xfs_rmap_irec	*rmap,
393 	bool			*have)
394 {
395 	struct xfs_scrub	*sc = rr->sc;
396 	int			have_gt;
397 	int			error;
398 
399 	while (*have && rmap->rm_startblock == bno) {
400 		error = rcbag_add(rcstack, rr->sc->tp, rmap);
401 		if (error)
402 			return error;
403 
404 		error = xrep_rtrefc_walk_rmaps(rr, rmap, have);
405 		if (error)
406 			return error;
407 	}
408 
409 	error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
410 	if (error)
411 		return error;
412 	if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
413 		xfs_btree_mark_sick(sc->sr.rmap_cur);
414 		return -EFSCORRUPTED;
415 	}
416 
417 	return 0;
418 }
419 
420 /* Scan one AG for reverse mappings for the realtime refcount btree. */
421 STATIC int
422 xrep_rtrefc_scan_ag(
423 	struct xrep_rtrefc	*rr,
424 	struct xfs_perag	*pag)
425 {
426 	struct xfs_scrub	*sc = rr->sc;
427 	int			error;
428 
429 	error = xrep_ag_init(sc, pag, &sc->sa);
430 	if (error)
431 		return error;
432 
433 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
434 	xchk_ag_free(sc, &sc->sa);
435 	return error;
436 }
437 
438 /* Iterate all the rmap records to generate reference count data. */
439 STATIC int
440 xrep_rtrefc_find_refcounts(
441 	struct xrep_rtrefc	*rr)
442 {
443 	struct xfs_scrub	*sc = rr->sc;
444 	struct rcbag		*rcstack;
445 	struct xfs_perag	*pag = NULL;
446 	uint64_t		old_stack_height;
447 	xfs_rgblock_t		sbno;
448 	xfs_rgblock_t		cbno;
449 	xfs_rgblock_t		nbno;
450 	bool			have;
451 	int			error;
452 
453 	/* Scan for old rtrefc btree blocks. */
454 	while ((pag = xfs_perag_next(sc->mp, pag))) {
455 		error = xrep_rtrefc_scan_ag(rr, pag);
456 		if (error) {
457 			xfs_perag_rele(pag);
458 			return error;
459 		}
460 	}
461 
462 	xrep_rtgroup_btcur_init(sc, &sc->sr);
463 
464 	/*
465 	 * Set up a bag to store all the rmap records that we're tracking to
466 	 * generate a reference count record.  If this exceeds
467 	 * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
468 	 */
469 	error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
470 	if (error)
471 		goto out_cur;
472 
473 	/* Start the rtrmapbt cursor to the left of all records. */
474 	error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
475 	if (error)
476 		goto out_bag;
477 
478 	/* Process reverse mappings into refcount data. */
479 	while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
480 		struct xfs_rmap_irec	rmap;
481 
482 		/* Push all rmaps with pblk == sbno onto the stack */
483 		error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
484 		if (error)
485 			goto out_bag;
486 		if (!have)
487 			break;
488 		sbno = cbno = rmap.rm_startblock;
489 		error = xrep_rtrefc_push_rmaps_at(rr, rcstack, sbno, &rmap,
490 				&have);
491 		if (error)
492 			goto out_bag;
493 
494 		/* Set nbno to the bno of the next refcount change */
495 		error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
496 		if (error)
497 			goto out_bag;
498 
499 		ASSERT(nbno > sbno);
500 		old_stack_height = rcbag_count(rcstack);
501 
502 		/* While stack isn't empty... */
503 		while (rcbag_count(rcstack) > 0) {
504 			/* Pop all rmaps that end at nbno */
505 			error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
506 			if (error)
507 				goto out_bag;
508 
509 			/* Push array items that start at nbno */
510 			error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
511 			if (error)
512 				goto out_bag;
513 			if (have) {
514 				error = xrep_rtrefc_push_rmaps_at(rr, rcstack,
515 						nbno, &rmap, &have);
516 				if (error)
517 					goto out_bag;
518 			}
519 
520 			/* Emit refcount if necessary */
521 			ASSERT(nbno > cbno);
522 			if (rcbag_count(rcstack) != old_stack_height) {
523 				if (old_stack_height > 1) {
524 					error = xrep_rtrefc_stash(rr,
525 							XFS_REFC_DOMAIN_SHARED,
526 							cbno, nbno - cbno,
527 							old_stack_height);
528 					if (error)
529 						goto out_bag;
530 				}
531 				cbno = nbno;
532 			}
533 
534 			/* Stack empty, go find the next rmap */
535 			if (rcbag_count(rcstack) == 0)
536 				break;
537 			old_stack_height = rcbag_count(rcstack);
538 			sbno = nbno;
539 
540 			/* Set nbno to the bno of the next refcount change */
541 			error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
542 					&nbno);
543 			if (error)
544 				goto out_bag;
545 
546 			ASSERT(nbno > sbno);
547 		}
548 	}
549 
550 	ASSERT(rcbag_count(rcstack) == 0);
551 out_bag:
552 	rcbag_free(&rcstack);
553 out_cur:
554 	xchk_rtgroup_btcur_free(&sc->sr);
555 	return error;
556 }
557 
558 /* Retrieve refcountbt data for bulk load. */
559 STATIC int
560 xrep_rtrefc_get_records(
561 	struct xfs_btree_cur		*cur,
562 	unsigned int			idx,
563 	struct xfs_btree_block		*block,
564 	unsigned int			nr_wanted,
565 	void				*priv)
566 {
567 	struct xrep_rtrefc		*rr = priv;
568 	union xfs_btree_rec		*block_rec;
569 	unsigned int			loaded;
570 	int				error;
571 
572 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
573 		error = xfarray_load(rr->refcount_records, rr->array_cur++,
574 				&cur->bc_rec.rc);
575 		if (error)
576 			return error;
577 
578 		block_rec = xfs_btree_rec_addr(cur, idx, block);
579 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
580 	}
581 
582 	return loaded;
583 }
584 
585 /* Feed one of the new btree blocks to the bulk loader. */
586 STATIC int
587 xrep_rtrefc_claim_block(
588 	struct xfs_btree_cur	*cur,
589 	union xfs_btree_ptr	*ptr,
590 	void			*priv)
591 {
592 	struct xrep_rtrefc	*rr = priv;
593 
594 	return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
595 }
596 
597 /* Figure out how much space we need to create the incore btree root block. */
598 STATIC size_t
599 xrep_rtrefc_iroot_size(
600 	struct xfs_btree_cur	*cur,
601 	unsigned int		level,
602 	unsigned int		nr_this_level,
603 	void			*priv)
604 {
605 	return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
606 			nr_this_level);
607 }
608 
609 /*
610  * Use the collected refcount information to stage a new rt refcount btree.  If
611  * this is successful we'll return with the new btree root information logged
612  * to the repair transaction but not yet committed.
613  */
614 STATIC int
615 xrep_rtrefc_build_new_tree(
616 	struct xrep_rtrefc	*rr)
617 {
618 	struct xfs_scrub	*sc = rr->sc;
619 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
620 	struct xfs_btree_cur	*refc_cur;
621 	int			error;
622 
623 	error = xrep_rtrefc_sort_records(rr);
624 	if (error)
625 		return error;
626 
627 	/*
628 	 * Prepare to construct the new btree by reserving disk space for the
629 	 * new btree and setting up all the accounting information we'll need
630 	 * to root the new btree while it's under construction and before we
631 	 * attach it to the realtime refcount inode.
632 	 */
633 	error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
634 	if (error)
635 		return error;
636 
637 	rr->new_btree.bload.get_records = xrep_rtrefc_get_records;
638 	rr->new_btree.bload.claim_block = xrep_rtrefc_claim_block;
639 	rr->new_btree.bload.iroot_size = xrep_rtrefc_iroot_size;
640 
641 	refc_cur = xfs_rtrefcountbt_init_cursor(NULL, rtg);
642 	xfs_btree_stage_ifakeroot(refc_cur, &rr->new_btree.ifake);
643 
644 	/* Compute how many blocks we'll need. */
645 	error = xfs_btree_bload_compute_geometry(refc_cur, &rr->new_btree.bload,
646 			xfarray_length(rr->refcount_records));
647 	if (error)
648 		goto err_cur;
649 
650 	/* Last chance to abort before we start committing fixes. */
651 	if (xchk_should_terminate(sc, &error))
652 		goto err_cur;
653 
654 	/*
655 	 * Guess how many blocks we're going to need to rebuild an entire
656 	 * rtrefcountbt from the number of extents we found, and pump up our
657 	 * transaction to have sufficient block reservation.  We're allowed
658 	 * to exceed quota to repair inconsistent metadata, though this is
659 	 * unlikely.
660 	 */
661 	error = xfs_trans_reserve_more_inode(sc->tp, rtg_refcount(rtg),
662 			rr->new_btree.bload.nr_blocks, 0, true);
663 	if (error)
664 		goto err_cur;
665 
666 	/* Reserve the space we'll need for the new btree. */
667 	error = xrep_newbt_alloc_blocks(&rr->new_btree,
668 			rr->new_btree.bload.nr_blocks);
669 	if (error)
670 		goto err_cur;
671 
672 	/* Add all observed refcount records. */
673 	rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
674 	rr->array_cur = XFARRAY_CURSOR_INIT;
675 	error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
676 	if (error)
677 		goto err_cur;
678 
679 	/*
680 	 * Install the new rtrefc btree in the inode.  After this point the old
681 	 * btree is no longer accessible, the new tree is live, and we can
682 	 * delete the cursor.
683 	 */
684 	xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
685 	xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
686 	xfs_btree_del_cursor(refc_cur, 0);
687 
688 	/* Dispose of any unused blocks and the accounting information. */
689 	error = xrep_newbt_commit(&rr->new_btree);
690 	if (error)
691 		return error;
692 
693 	return xrep_roll_trans(sc);
694 err_cur:
695 	xfs_btree_del_cursor(refc_cur, error);
696 	xrep_newbt_cancel(&rr->new_btree);
697 	return error;
698 }
699 
700 /*
701  * Now that we've logged the roots of the new btrees, invalidate all of the
702  * old blocks and free them.
703  */
704 STATIC int
705 xrep_rtrefc_remove_old_tree(
706 	struct xrep_rtrefc	*rr)
707 {
708 	int			error;
709 
710 	/*
711 	 * Free all the extents that were allocated to the former rtrefcountbt
712 	 * and aren't cross-linked with something else.
713 	 */
714 	error = xrep_reap_metadir_fsblocks(rr->sc,
715 			&rr->old_rtrefcountbt_blocks);
716 	if (error)
717 		return error;
718 
719 	/*
720 	 * Ensure the proper reservation for the rtrefcount inode so that we
721 	 * don't fail to expand the btree.
722 	 */
723 	return xrep_reset_metafile_resv(rr->sc);
724 }
725 
726 /* Rebuild the rt refcount btree. */
727 int
728 xrep_rtrefcountbt(
729 	struct xfs_scrub	*sc)
730 {
731 	struct xrep_rtrefc	*rr;
732 	struct xfs_mount	*mp = sc->mp;
733 	char			*descr;
734 	int			error;
735 
736 	/* We require the rmapbt to rebuild anything. */
737 	if (!xfs_has_rtrmapbt(mp))
738 		return -EOPNOTSUPP;
739 
740 	/* Make sure any problems with the fork are fixed. */
741 	error = xrep_metadata_inode_forks(sc);
742 	if (error)
743 		return error;
744 
745 	rr = kzalloc(sizeof(struct xrep_rtrefc), XCHK_GFP_FLAGS);
746 	if (!rr)
747 		return -ENOMEM;
748 	rr->sc = sc;
749 
750 	/* Set up enough storage to handle one refcount record per rt extent. */
751 	descr = xchk_xfile_ag_descr(sc, "reference count records");
752 	error = xfarray_create(descr, mp->m_sb.sb_rextents,
753 			sizeof(struct xfs_refcount_irec),
754 			&rr->refcount_records);
755 	kfree(descr);
756 	if (error)
757 		goto out_rr;
758 
759 	/* Collect all reference counts. */
760 	xfsb_bitmap_init(&rr->old_rtrefcountbt_blocks);
761 	error = xrep_rtrefc_find_refcounts(rr);
762 	if (error)
763 		goto out_bitmap;
764 
765 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
766 
767 	/* Rebuild the refcount information. */
768 	error = xrep_rtrefc_build_new_tree(rr);
769 	if (error)
770 		goto out_bitmap;
771 
772 	/* Kill the old tree. */
773 	error = xrep_rtrefc_remove_old_tree(rr);
774 	if (error)
775 		goto out_bitmap;
776 
777 out_bitmap:
778 	xfsb_bitmap_destroy(&rr->old_rtrefcountbt_blocks);
779 	xfarray_destroy(rr->refcount_records);
780 out_rr:
781 	kfree(rr);
782 	return error;
783 }
784