xref: /linux/fs/xfs/scrub/rtrmap_repair.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_buf_mem.h"
16 #include "xfs_btree_mem.h"
17 #include "xfs_bit.h"
18 #include "xfs_log_format.h"
19 #include "xfs_trans.h"
20 #include "xfs_sb.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rmap_btree.h"
24 #include "xfs_rtrmap_btree.h"
25 #include "xfs_inode.h"
26 #include "xfs_icache.h"
27 #include "xfs_bmap.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_refcount.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/rgb_bitmap.h"
43 #include "scrub/xfile.h"
44 #include "scrub/xfarray.h"
45 #include "scrub/iscan.h"
46 #include "scrub/newbt.h"
47 #include "scrub/reap.h"
48 
49 /*
50  * Realtime Reverse Mapping Btree Repair
51  * =====================================
52  *
53  * This isn't quite as difficult as repairing the rmap btree on the data
54  * device, since we only store the data fork extents of realtime files on the
55  * realtime device.  We still have to freeze the filesystem and stop the
56  * background threads like we do for the rmap repair, but we only have to scan
57  * realtime inodes.
58  *
59  * Collecting entries for the new realtime rmap btree is easy -- all we have
60  * to do is generate rtrmap entries from the data fork mappings of all realtime
61  * files in the filesystem.  We then scan the rmap btrees of the data device
62  * looking for extents belonging to the old btree and note them in a bitmap.
63  *
64  * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
65  * a new btree cursor and atomically swap that into the realtime inode.  Then
66  * we can free the blocks from the old btree.
67  *
68  * We use the 'xrep_rtrmap' prefix for all the rmap functions.
69  */
70 
71 /* Context for collecting rmaps */
72 struct xrep_rtrmap {
73 	/* new rtrmapbt information */
74 	struct xrep_newbt	new_btree;
75 
76 	/* lock for the xfbtree and xfile */
77 	struct mutex		lock;
78 
79 	/* rmap records generated from primary metadata */
80 	struct xfbtree		rtrmap_btree;
81 
82 	struct xfs_scrub	*sc;
83 
84 	/* bitmap of old rtrmapbt blocks */
85 	struct xfsb_bitmap	old_rtrmapbt_blocks;
86 
87 	/* Hooks into rtrmap update code. */
88 	struct xfs_rmap_hook	rhook;
89 
90 	/* inode scan cursor */
91 	struct xchk_iscan	iscan;
92 
93 	/* in-memory btree cursor for the ->get_blocks walk */
94 	struct xfs_btree_cur	*mcur;
95 
96 	/* Number of records we're staging in the new btree. */
97 	uint64_t		nr_records;
98 };
99 
100 /* Set us up to repair rt reverse mapping btrees. */
101 int
102 xrep_setup_rtrmapbt(
103 	struct xfs_scrub	*sc)
104 {
105 	struct xrep_rtrmap	*rr;
106 	int			error;
107 
108 	xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
109 
110 	error = xrep_setup_xfbtree(sc, "realtime reverse mapping records");
111 	if (error)
112 		return error;
113 
114 	rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
115 	if (!rr)
116 		return -ENOMEM;
117 
118 	rr->sc = sc;
119 	sc->buf = rr;
120 	return 0;
121 }
122 
123 /* Make sure there's nothing funny about this mapping. */
124 STATIC int
125 xrep_rtrmap_check_mapping(
126 	struct xfs_scrub	*sc,
127 	const struct xfs_rmap_irec *rec)
128 {
129 	if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
130 		return -EFSCORRUPTED;
131 
132 	/* Make sure this isn't free space. */
133 	return xrep_require_rtext_inuse(sc, rec->rm_startblock,
134 			rec->rm_blockcount);
135 }
136 
137 /* Store a reverse-mapping record. */
138 static inline int
139 xrep_rtrmap_stash(
140 	struct xrep_rtrmap	*rr,
141 	xfs_rgblock_t		startblock,
142 	xfs_extlen_t		blockcount,
143 	uint64_t		owner,
144 	uint64_t		offset,
145 	unsigned int		flags)
146 {
147 	struct xfs_rmap_irec	rmap = {
148 		.rm_startblock	= startblock,
149 		.rm_blockcount	= blockcount,
150 		.rm_owner	= owner,
151 		.rm_offset	= offset,
152 		.rm_flags	= flags,
153 	};
154 	struct xfs_scrub	*sc = rr->sc;
155 	struct xfs_btree_cur	*mcur;
156 	int			error = 0;
157 
158 	if (xchk_should_terminate(sc, &error))
159 		return error;
160 
161 	if (xchk_iscan_aborted(&rr->iscan))
162 		return -EFSCORRUPTED;
163 
164 	trace_xrep_rtrmap_found(sc->mp, &rmap);
165 
166 	/* Add entry to in-memory btree. */
167 	mutex_lock(&rr->lock);
168 	mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
169 	error = xfs_rmap_map_raw(mcur, &rmap);
170 	xfs_btree_del_cursor(mcur, error);
171 	if (error)
172 		goto out_cancel;
173 
174 	error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
175 	if (error)
176 		goto out_abort;
177 
178 	mutex_unlock(&rr->lock);
179 	return 0;
180 
181 out_cancel:
182 	xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
183 out_abort:
184 	xchk_iscan_abort(&rr->iscan);
185 	mutex_unlock(&rr->lock);
186 	return error;
187 }
188 
189 /* Finding all file and bmbt extents. */
190 
191 /* Context for accumulating rmaps for an inode fork. */
192 struct xrep_rtrmap_ifork {
193 	/*
194 	 * Accumulate rmap data here to turn multiple adjacent bmaps into a
195 	 * single rmap.
196 	 */
197 	struct xfs_rmap_irec	accum;
198 
199 	struct xrep_rtrmap	*rr;
200 };
201 
202 /* Stash an rmap that we accumulated while walking an inode fork. */
203 STATIC int
204 xrep_rtrmap_stash_accumulated(
205 	struct xrep_rtrmap_ifork	*rf)
206 {
207 	if (rf->accum.rm_blockcount == 0)
208 		return 0;
209 
210 	return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
211 			rf->accum.rm_blockcount, rf->accum.rm_owner,
212 			rf->accum.rm_offset, rf->accum.rm_flags);
213 }
214 
215 /* Accumulate a bmbt record. */
216 STATIC int
217 xrep_rtrmap_visit_bmbt(
218 	struct xfs_btree_cur	*cur,
219 	struct xfs_bmbt_irec	*rec,
220 	void			*priv)
221 {
222 	struct xrep_rtrmap_ifork *rf = priv;
223 	struct xfs_rmap_irec	*accum = &rf->accum;
224 	struct xfs_mount	*mp = rf->rr->sc->mp;
225 	xfs_rgblock_t		rgbno;
226 	unsigned int		rmap_flags = 0;
227 	int			error;
228 
229 	if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
230 	    rtg_rgno(rf->rr->sc->sr.rtg))
231 		return 0;
232 
233 	if (rec->br_state == XFS_EXT_UNWRITTEN)
234 		rmap_flags |= XFS_RMAP_UNWRITTEN;
235 
236 	/* If this bmap is adjacent to the previous one, just add it. */
237 	rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
238 	if (accum->rm_blockcount > 0 &&
239 	    rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
240 	    rgbno == accum->rm_startblock + accum->rm_blockcount &&
241 	    rmap_flags == accum->rm_flags) {
242 		accum->rm_blockcount += rec->br_blockcount;
243 		return 0;
244 	}
245 
246 	/* Otherwise stash the old rmap and start accumulating a new one. */
247 	error = xrep_rtrmap_stash_accumulated(rf);
248 	if (error)
249 		return error;
250 
251 	accum->rm_startblock = rgbno;
252 	accum->rm_blockcount = rec->br_blockcount;
253 	accum->rm_offset = rec->br_startoff;
254 	accum->rm_flags = rmap_flags;
255 	return 0;
256 }
257 
258 /*
259  * Iterate the block mapping btree to collect rmap records for anything in this
260  * fork that maps to the rt volume.  Sets @mappings_done to true if we've
261  * scanned the block mappings in this fork.
262  */
263 STATIC int
264 xrep_rtrmap_scan_bmbt(
265 	struct xrep_rtrmap_ifork *rf,
266 	struct xfs_inode	*ip,
267 	bool			*mappings_done)
268 {
269 	struct xrep_rtrmap	*rr = rf->rr;
270 	struct xfs_btree_cur	*cur;
271 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
272 	int			error = 0;
273 
274 	*mappings_done = false;
275 
276 	/*
277 	 * If the incore extent cache is already loaded, we'll just use the
278 	 * incore extent scanner to record mappings.  Don't bother walking the
279 	 * ondisk extent tree.
280 	 */
281 	if (!xfs_need_iread_extents(ifp))
282 		return 0;
283 
284 	/* Accumulate all the mappings in the bmap btree. */
285 	cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
286 	error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
287 	xfs_btree_del_cursor(cur, error);
288 	if (error)
289 		return error;
290 
291 	/* Stash any remaining accumulated rmaps and exit. */
292 	*mappings_done = true;
293 	return xrep_rtrmap_stash_accumulated(rf);
294 }
295 
296 /*
297  * Iterate the in-core extent cache to collect rmap records for anything in
298  * this fork that matches the AG.
299  */
300 STATIC int
301 xrep_rtrmap_scan_iext(
302 	struct xrep_rtrmap_ifork *rf,
303 	struct xfs_ifork	*ifp)
304 {
305 	struct xfs_bmbt_irec	rec;
306 	struct xfs_iext_cursor	icur;
307 	int			error;
308 
309 	for_each_xfs_iext(ifp, &icur, &rec) {
310 		if (isnullstartblock(rec.br_startblock))
311 			continue;
312 		error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
313 		if (error)
314 			return error;
315 	}
316 
317 	return xrep_rtrmap_stash_accumulated(rf);
318 }
319 
320 /* Find all the extents on the realtime device mapped by an inode fork. */
321 STATIC int
322 xrep_rtrmap_scan_dfork(
323 	struct xrep_rtrmap	*rr,
324 	struct xfs_inode	*ip)
325 {
326 	struct xrep_rtrmap_ifork rf = {
327 		.accum		= { .rm_owner = ip->i_ino, },
328 		.rr		= rr,
329 	};
330 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
331 	int			error = 0;
332 
333 	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
334 		bool		mappings_done;
335 
336 		/*
337 		 * Scan the bmbt for mappings.  If the incore extent tree is
338 		 * loaded, we want to scan the cached mappings since that's
339 		 * faster when the extent counts are very high.
340 		 */
341 		error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
342 		if (error || mappings_done)
343 			return error;
344 	} else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
345 		/* realtime data forks should only be extents or btree */
346 		return -EFSCORRUPTED;
347 	}
348 
349 	/* Scan incore extent cache. */
350 	return xrep_rtrmap_scan_iext(&rf, ifp);
351 }
352 
353 /* Record reverse mappings for a file. */
354 STATIC int
355 xrep_rtrmap_scan_inode(
356 	struct xrep_rtrmap	*rr,
357 	struct xfs_inode	*ip)
358 {
359 	unsigned int		lock_mode;
360 	int			error = 0;
361 
362 	/* Skip the rt rmap btree inode. */
363 	if (rr->sc->ip == ip)
364 		return 0;
365 
366 	lock_mode = xfs_ilock_data_map_shared(ip);
367 
368 	/* Check the data fork if it's on the realtime device. */
369 	if (XFS_IS_REALTIME_INODE(ip)) {
370 		error = xrep_rtrmap_scan_dfork(rr, ip);
371 		if (error)
372 			goto out_unlock;
373 	}
374 
375 	xchk_iscan_mark_visited(&rr->iscan, ip);
376 out_unlock:
377 	xfs_iunlock(ip, lock_mode);
378 	return error;
379 }
380 
381 /* Record extents that belong to the realtime rmap inode. */
382 STATIC int
383 xrep_rtrmap_walk_rmap(
384 	struct xfs_btree_cur		*cur,
385 	const struct xfs_rmap_irec	*rec,
386 	void				*priv)
387 {
388 	struct xrep_rtrmap		*rr = priv;
389 	int				error = 0;
390 
391 	if (xchk_should_terminate(rr->sc, &error))
392 		return error;
393 
394 	/* Skip extents which are not owned by this inode and fork. */
395 	if (rec->rm_owner != rr->sc->ip->i_ino)
396 		return 0;
397 
398 	error = xrep_check_ino_btree_mapping(rr->sc, rec);
399 	if (error)
400 		return error;
401 
402 	return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
403 			xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
404 			rec->rm_blockcount);
405 }
406 
407 /* Scan one AG for reverse mappings for the realtime rmap btree. */
408 STATIC int
409 xrep_rtrmap_scan_ag(
410 	struct xrep_rtrmap	*rr,
411 	struct xfs_perag	*pag)
412 {
413 	struct xfs_scrub	*sc = rr->sc;
414 	int			error;
415 
416 	error = xrep_ag_init(sc, pag, &sc->sa);
417 	if (error)
418 		return error;
419 
420 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
421 	xchk_ag_free(sc, &sc->sa);
422 	return error;
423 }
424 
425 struct xrep_rtrmap_stash_run {
426 	struct xrep_rtrmap	*rr;
427 	uint64_t		owner;
428 };
429 
430 static int
431 xrep_rtrmap_stash_run(
432 	uint32_t			start,
433 	uint32_t			len,
434 	void				*priv)
435 {
436 	struct xrep_rtrmap_stash_run	*rsr = priv;
437 	struct xrep_rtrmap		*rr = rsr->rr;
438 	xfs_rgblock_t			rgbno = start;
439 
440 	return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
441 }
442 
443 /*
444  * Emit rmaps for every extent of bits set in the bitmap.  Caller must ensure
445  * that the ranges are in units of FS blocks.
446  */
447 STATIC int
448 xrep_rtrmap_stash_bitmap(
449 	struct xrep_rtrmap		*rr,
450 	struct xrgb_bitmap		*bitmap,
451 	const struct xfs_owner_info	*oinfo)
452 {
453 	struct xrep_rtrmap_stash_run	rsr = {
454 		.rr			= rr,
455 		.owner			= oinfo->oi_owner,
456 	};
457 
458 	return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
459 }
460 
461 /* Record a CoW staging extent. */
462 STATIC int
463 xrep_rtrmap_walk_cowblocks(
464 	struct xfs_btree_cur		*cur,
465 	const struct xfs_refcount_irec	*irec,
466 	void				*priv)
467 {
468 	struct xrgb_bitmap		*bitmap = priv;
469 
470 	if (!xfs_refcount_check_domain(irec) ||
471 	    irec->rc_domain != XFS_REFC_DOMAIN_COW)
472 		return -EFSCORRUPTED;
473 
474 	return xrgb_bitmap_set(bitmap, irec->rc_startblock,
475 			irec->rc_blockcount);
476 }
477 
478 /*
479  * Collect rmaps for the blocks containing the refcount btree, and all CoW
480  * staging extents.
481  */
482 STATIC int
483 xrep_rtrmap_find_refcount_rmaps(
484 	struct xrep_rtrmap	*rr)
485 {
486 	struct xrgb_bitmap	cow_blocks;		/* COWBIT */
487 	struct xfs_refcount_irec low = {
488 		.rc_startblock	= 0,
489 		.rc_domain	= XFS_REFC_DOMAIN_COW,
490 	};
491 	struct xfs_refcount_irec high = {
492 		.rc_startblock	= -1U,
493 		.rc_domain	= XFS_REFC_DOMAIN_COW,
494 	};
495 	struct xfs_scrub	*sc = rr->sc;
496 	int			error;
497 
498 	if (!xfs_has_rtreflink(sc->mp))
499 		return 0;
500 
501 	xrgb_bitmap_init(&cow_blocks);
502 
503 	/* Collect rmaps for CoW staging extents. */
504 	error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
505 			xrep_rtrmap_walk_cowblocks, &cow_blocks);
506 	if (error)
507 		goto out_bitmap;
508 
509 	/* Generate rmaps for everything. */
510 	error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
511 	if (error)
512 		goto out_bitmap;
513 
514 out_bitmap:
515 	xrgb_bitmap_destroy(&cow_blocks);
516 	return error;
517 }
518 
519 /* Count and check all collected records. */
520 STATIC int
521 xrep_rtrmap_check_record(
522 	struct xfs_btree_cur		*cur,
523 	const struct xfs_rmap_irec	*rec,
524 	void				*priv)
525 {
526 	struct xrep_rtrmap		*rr = priv;
527 	int				error;
528 
529 	error = xrep_rtrmap_check_mapping(rr->sc, rec);
530 	if (error)
531 		return error;
532 
533 	rr->nr_records++;
534 	return 0;
535 }
536 
537 /* Generate all the reverse-mappings for the realtime device. */
538 STATIC int
539 xrep_rtrmap_find_rmaps(
540 	struct xrep_rtrmap	*rr)
541 {
542 	struct xfs_scrub	*sc = rr->sc;
543 	struct xfs_perag	*pag = NULL;
544 	struct xfs_inode	*ip;
545 	struct xfs_btree_cur	*mcur;
546 	int			error;
547 
548 	/* Generate rmaps for the realtime superblock */
549 	if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
550 		error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
551 				XFS_RMAP_OWN_FS, 0, 0);
552 		if (error)
553 			return error;
554 	}
555 
556 	/* Find CoW staging extents. */
557 	xrep_rtgroup_btcur_init(sc, &sc->sr);
558 	error = xrep_rtrmap_find_refcount_rmaps(rr);
559 	xchk_rtgroup_btcur_free(&sc->sr);
560 	if (error)
561 		return error;
562 
563 	/*
564 	 * Set up for a potentially lengthy filesystem scan by reducing our
565 	 * transaction resource usage for the duration.  Specifically:
566 	 *
567 	 * Unlock the realtime metadata inodes and cancel the transaction to
568 	 * release the log grant space while we scan the filesystem.
569 	 *
570 	 * Create a new empty transaction to eliminate the possibility of the
571 	 * inode scan deadlocking on cyclical metadata.
572 	 *
573 	 * We pass the empty transaction to the file scanning function to avoid
574 	 * repeatedly cycling empty transactions.  This can be done even though
575 	 * we take the IOLOCK to quiesce the file because empty transactions
576 	 * do not take sb_internal.
577 	 */
578 	xchk_trans_cancel(sc);
579 	xchk_rtgroup_unlock(&sc->sr);
580 	xchk_trans_alloc_empty(sc);
581 
582 	while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
583 		error = xrep_rtrmap_scan_inode(rr, ip);
584 		xchk_irele(sc, ip);
585 		if (error)
586 			break;
587 
588 		if (xchk_should_terminate(sc, &error))
589 			break;
590 	}
591 	xchk_iscan_iter_finish(&rr->iscan);
592 	if (error)
593 		return error;
594 
595 	/*
596 	 * Switch out for a real transaction and lock the RT metadata in
597 	 * preparation for building a new tree.
598 	 */
599 	xchk_trans_cancel(sc);
600 	error = xchk_setup_rt(sc);
601 	if (error)
602 		return error;
603 	error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
604 	if (error)
605 		return error;
606 
607 	/*
608 	 * If a hook failed to update the in-memory btree, we lack the data to
609 	 * continue the repair.
610 	 */
611 	if (xchk_iscan_aborted(&rr->iscan))
612 		return -EFSCORRUPTED;
613 
614 	/* Scan for old rtrmap blocks. */
615 	while ((pag = xfs_perag_next(sc->mp, pag))) {
616 		error = xrep_rtrmap_scan_ag(rr, pag);
617 		if (error) {
618 			xfs_perag_rele(pag);
619 			return error;
620 		}
621 	}
622 
623 	/*
624 	 * Now that we have everything locked again, we need to count the
625 	 * number of rmap records stashed in the btree.  This should reflect
626 	 * all actively-owned rt files in the filesystem.  At the same time,
627 	 * check all our records before we start building a new btree, which
628 	 * requires the rtbitmap lock.
629 	 */
630 	mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
631 	rr->nr_records = 0;
632 	error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
633 	xfs_btree_del_cursor(mcur, error);
634 
635 	return error;
636 }
637 
638 /* Building the new rtrmap btree. */
639 
640 /* Retrieve rtrmapbt data for bulk load. */
641 STATIC int
642 xrep_rtrmap_get_records(
643 	struct xfs_btree_cur		*cur,
644 	unsigned int			idx,
645 	struct xfs_btree_block		*block,
646 	unsigned int			nr_wanted,
647 	void				*priv)
648 {
649 	struct xrep_rtrmap		*rr = priv;
650 	union xfs_btree_rec		*block_rec;
651 	unsigned int			loaded;
652 	int				error;
653 
654 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
655 		int			stat = 0;
656 
657 		error = xfs_btree_increment(rr->mcur, 0, &stat);
658 		if (error)
659 			return error;
660 		if (!stat)
661 			return -EFSCORRUPTED;
662 
663 		error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
664 		if (error)
665 			return error;
666 		if (!stat)
667 			return -EFSCORRUPTED;
668 
669 		block_rec = xfs_btree_rec_addr(cur, idx, block);
670 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
671 	}
672 
673 	return loaded;
674 }
675 
676 /* Feed one of the new btree blocks to the bulk loader. */
677 STATIC int
678 xrep_rtrmap_claim_block(
679 	struct xfs_btree_cur	*cur,
680 	union xfs_btree_ptr	*ptr,
681 	void			*priv)
682 {
683 	struct xrep_rtrmap	*rr = priv;
684 
685 	return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
686 }
687 
688 /* Figure out how much space we need to create the incore btree root block. */
689 STATIC size_t
690 xrep_rtrmap_iroot_size(
691 	struct xfs_btree_cur	*cur,
692 	unsigned int		level,
693 	unsigned int		nr_this_level,
694 	void			*priv)
695 {
696 	return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
697 }
698 
699 /*
700  * Use the collected rmap information to stage a new rmap btree.  If this is
701  * successful we'll return with the new btree root information logged to the
702  * repair transaction but not yet committed.  This implements section (III)
703  * above.
704  */
705 STATIC int
706 xrep_rtrmap_build_new_tree(
707 	struct xrep_rtrmap	*rr)
708 {
709 	struct xfs_scrub	*sc = rr->sc;
710 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
711 	struct xfs_btree_cur	*rmap_cur;
712 	int			error;
713 
714 	/*
715 	 * Prepare to construct the new btree by reserving disk space for the
716 	 * new btree and setting up all the accounting information we'll need
717 	 * to root the new btree while it's under construction and before we
718 	 * attach it to the realtime rmapbt inode.
719 	 */
720 	error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
721 	if (error)
722 		return error;
723 
724 	rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
725 	rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
726 	rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
727 
728 	rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
729 	xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
730 
731 	/* Compute how many blocks we'll need for the rmaps collected. */
732 	error = xfs_btree_bload_compute_geometry(rmap_cur,
733 			&rr->new_btree.bload, rr->nr_records);
734 	if (error)
735 		goto err_cur;
736 
737 	/* Last chance to abort before we start committing fixes. */
738 	if (xchk_should_terminate(sc, &error))
739 		goto err_cur;
740 
741 	/*
742 	 * Guess how many blocks we're going to need to rebuild an entire
743 	 * rtrmapbt from the number of extents we found, and pump up our
744 	 * transaction to have sufficient block reservation.  We're allowed
745 	 * to exceed quota to repair inconsistent metadata, though this is
746 	 * unlikely.
747 	 */
748 	error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
749 			rr->new_btree.bload.nr_blocks, 0, true);
750 	if (error)
751 		goto err_cur;
752 
753 	/* Reserve the space we'll need for the new btree. */
754 	error = xrep_newbt_alloc_blocks(&rr->new_btree,
755 			rr->new_btree.bload.nr_blocks);
756 	if (error)
757 		goto err_cur;
758 
759 	/*
760 	 * Create a cursor to the in-memory btree so that we can bulk load the
761 	 * new btree.
762 	 */
763 	rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
764 	error = xfs_btree_goto_left_edge(rr->mcur);
765 	if (error)
766 		goto err_mcur;
767 
768 	/* Add all observed rmap records. */
769 	rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
770 	error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
771 	if (error)
772 		goto err_mcur;
773 
774 	/*
775 	 * Install the new rtrmap btree in the inode.  After this point the old
776 	 * btree is no longer accessible, the new tree is live, and we can
777 	 * delete the cursor.
778 	 */
779 	xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
780 	xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
781 	xfs_btree_del_cursor(rmap_cur, 0);
782 	xfs_btree_del_cursor(rr->mcur, 0);
783 	rr->mcur = NULL;
784 
785 	/*
786 	 * Now that we've written the new btree to disk, we don't need to keep
787 	 * updating the in-memory btree.  Abort the scan to stop live updates.
788 	 */
789 	xchk_iscan_abort(&rr->iscan);
790 
791 	/* Dispose of any unused blocks and the accounting information. */
792 	error = xrep_newbt_commit(&rr->new_btree);
793 	if (error)
794 		return error;
795 
796 	return xrep_roll_trans(sc);
797 
798 err_mcur:
799 	xfs_btree_del_cursor(rr->mcur, error);
800 err_cur:
801 	xfs_btree_del_cursor(rmap_cur, error);
802 	xrep_newbt_cancel(&rr->new_btree);
803 	return error;
804 }
805 
806 /* Reaping the old btree. */
807 
808 static inline bool
809 xrep_rtrmapbt_want_live_update(
810 	struct xchk_iscan		*iscan,
811 	const struct xfs_owner_info	*oi)
812 {
813 	if (xchk_iscan_aborted(iscan))
814 		return false;
815 
816 	/*
817 	 * We scanned the CoW staging extents before we started the iscan, so
818 	 * we need all the updates.
819 	 */
820 	if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
821 		return true;
822 
823 	/* Ignore updates to files that the scanner hasn't visited yet. */
824 	return xchk_iscan_want_live_update(iscan, oi->oi_owner);
825 }
826 
827 /*
828  * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
829  * We're running from the thread that owns the rtrmap ILOCK and is generating
830  * the update, so we must be careful about which parts of the struct
831  * xrep_rtrmap that we change.
832  */
833 static int
834 xrep_rtrmapbt_live_update(
835 	struct notifier_block		*nb,
836 	unsigned long			action,
837 	void				*data)
838 {
839 	struct xfs_rmap_update_params	*p = data;
840 	struct xrep_rtrmap		*rr;
841 	struct xfs_mount		*mp;
842 	struct xfs_btree_cur		*mcur;
843 	struct xfs_trans		*tp;
844 	int				error;
845 
846 	rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
847 	mp = rr->sc->mp;
848 
849 	if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
850 		goto out_unlock;
851 
852 	trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
853 
854 	tp = xfs_trans_alloc_empty(mp);
855 
856 	mutex_lock(&rr->lock);
857 	mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
858 	error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
859 			p->blockcount, &p->oinfo, p->unwritten);
860 	xfs_btree_del_cursor(mcur, error);
861 	if (error)
862 		goto out_cancel;
863 
864 	error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
865 	if (error)
866 		goto out_cancel;
867 
868 	xfs_trans_cancel(tp);
869 	mutex_unlock(&rr->lock);
870 	return NOTIFY_DONE;
871 
872 out_cancel:
873 	xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
874 	xfs_trans_cancel(tp);
875 	xchk_iscan_abort(&rr->iscan);
876 	mutex_unlock(&rr->lock);
877 out_unlock:
878 	return NOTIFY_DONE;
879 }
880 
881 /* Set up the filesystem scan components. */
882 STATIC int
883 xrep_rtrmap_setup_scan(
884 	struct xrep_rtrmap	*rr)
885 {
886 	struct xfs_scrub	*sc = rr->sc;
887 	int			error;
888 
889 	mutex_init(&rr->lock);
890 	xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
891 
892 	/* Set up some storage */
893 	error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
894 			rtg_rgno(sc->sr.rtg));
895 	if (error)
896 		goto out_bitmap;
897 
898 	/* Retry iget every tenth of a second for up to 30 seconds. */
899 	xchk_iscan_start(sc, 30000, 100, &rr->iscan);
900 
901 	/*
902 	 * Hook into live rtrmap operations so that we can update our in-memory
903 	 * btree to reflect live changes on the filesystem.  Since we drop the
904 	 * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
905 	 * installing a stale btree.
906 	 */
907 	ASSERT(sc->flags & XCHK_FSGATES_RMAP);
908 	xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
909 	error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
910 	if (error)
911 		goto out_iscan;
912 	return 0;
913 
914 out_iscan:
915 	xchk_iscan_teardown(&rr->iscan);
916 	xfbtree_destroy(&rr->rtrmap_btree);
917 out_bitmap:
918 	xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
919 	mutex_destroy(&rr->lock);
920 	return error;
921 }
922 
923 /* Tear down scan components. */
924 STATIC void
925 xrep_rtrmap_teardown(
926 	struct xrep_rtrmap	*rr)
927 {
928 	struct xfs_scrub	*sc = rr->sc;
929 
930 	xchk_iscan_abort(&rr->iscan);
931 	xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
932 	xchk_iscan_teardown(&rr->iscan);
933 	xfbtree_destroy(&rr->rtrmap_btree);
934 	xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
935 	mutex_destroy(&rr->lock);
936 }
937 
938 /* Repair the realtime rmap btree. */
939 int
940 xrep_rtrmapbt(
941 	struct xfs_scrub	*sc)
942 {
943 	struct xrep_rtrmap	*rr = sc->buf;
944 	int			error;
945 
946 	/* Make sure any problems with the fork are fixed. */
947 	error = xrep_metadata_inode_forks(sc);
948 	if (error)
949 		return error;
950 
951 	error = xrep_rtrmap_setup_scan(rr);
952 	if (error)
953 		return error;
954 
955 	/* Collect rmaps for realtime files. */
956 	error = xrep_rtrmap_find_rmaps(rr);
957 	if (error)
958 		goto out_records;
959 
960 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
961 
962 	/* Rebuild the rtrmap information. */
963 	error = xrep_rtrmap_build_new_tree(rr);
964 	if (error)
965 		goto out_records;
966 
967 	/*
968 	 * Free all the extents that were allocated to the former rtrmapbt and
969 	 * aren't cross-linked with something else.
970 	 */
971 	error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
972 	if (error)
973 		goto out_records;
974 
975 out_records:
976 	xrep_rtrmap_teardown(rr);
977 	return error;
978 }
979