xref: /linux/fs/xfs/scrub/rtrmap_repair.c (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_buf_mem.h"
16 #include "xfs_btree_mem.h"
17 #include "xfs_bit.h"
18 #include "xfs_log_format.h"
19 #include "xfs_trans.h"
20 #include "xfs_sb.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rmap_btree.h"
24 #include "xfs_rtrmap_btree.h"
25 #include "xfs_inode.h"
26 #include "xfs_icache.h"
27 #include "xfs_bmap.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_refcount.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/rgb_bitmap.h"
43 #include "scrub/xfile.h"
44 #include "scrub/xfarray.h"
45 #include "scrub/iscan.h"
46 #include "scrub/newbt.h"
47 #include "scrub/reap.h"
48 
49 /*
50  * Realtime Reverse Mapping Btree Repair
51  * =====================================
52  *
53  * This isn't quite as difficult as repairing the rmap btree on the data
54  * device, since we only store the data fork extents of realtime files on the
55  * realtime device.  We still have to freeze the filesystem and stop the
56  * background threads like we do for the rmap repair, but we only have to scan
57  * realtime inodes.
58  *
59  * Collecting entries for the new realtime rmap btree is easy -- all we have
60  * to do is generate rtrmap entries from the data fork mappings of all realtime
61  * files in the filesystem.  We then scan the rmap btrees of the data device
62  * looking for extents belonging to the old btree and note them in a bitmap.
63  *
64  * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
65  * a new btree cursor and atomically swap that into the realtime inode.  Then
66  * we can free the blocks from the old btree.
67  *
68  * We use the 'xrep_rtrmap' prefix for all the rmap functions.
69  */
70 
71 /* Context for collecting rmaps */
72 struct xrep_rtrmap {
73 	/* new rtrmapbt information */
74 	struct xrep_newbt	new_btree;
75 
76 	/* lock for the xfbtree and xfile */
77 	struct mutex		lock;
78 
79 	/* rmap records generated from primary metadata */
80 	struct xfbtree		rtrmap_btree;
81 
82 	struct xfs_scrub	*sc;
83 
84 	/* bitmap of old rtrmapbt blocks */
85 	struct xfsb_bitmap	old_rtrmapbt_blocks;
86 
87 	/* Hooks into rtrmap update code. */
88 	struct xfs_rmap_hook	rhook;
89 
90 	/* inode scan cursor */
91 	struct xchk_iscan	iscan;
92 
93 	/* in-memory btree cursor for the ->get_blocks walk */
94 	struct xfs_btree_cur	*mcur;
95 
96 	/* Number of records we're staging in the new btree. */
97 	uint64_t		nr_records;
98 };
99 
100 /* Set us up to repair rt reverse mapping btrees. */
101 int
xrep_setup_rtrmapbt(struct xfs_scrub * sc)102 xrep_setup_rtrmapbt(
103 	struct xfs_scrub	*sc)
104 {
105 	struct xrep_rtrmap	*rr;
106 	char			*descr;
107 	int			error;
108 
109 	xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
110 
111 	descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
112 	error = xrep_setup_xfbtree(sc, descr);
113 	kfree(descr);
114 	if (error)
115 		return error;
116 
117 	rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
118 	if (!rr)
119 		return -ENOMEM;
120 
121 	rr->sc = sc;
122 	sc->buf = rr;
123 	return 0;
124 }
125 
126 /* Make sure there's nothing funny about this mapping. */
127 STATIC int
xrep_rtrmap_check_mapping(struct xfs_scrub * sc,const struct xfs_rmap_irec * rec)128 xrep_rtrmap_check_mapping(
129 	struct xfs_scrub	*sc,
130 	const struct xfs_rmap_irec *rec)
131 {
132 	if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
133 		return -EFSCORRUPTED;
134 
135 	/* Make sure this isn't free space. */
136 	return xrep_require_rtext_inuse(sc, rec->rm_startblock,
137 			rec->rm_blockcount);
138 }
139 
140 /* Store a reverse-mapping record. */
141 static inline int
xrep_rtrmap_stash(struct xrep_rtrmap * rr,xfs_rgblock_t startblock,xfs_extlen_t blockcount,uint64_t owner,uint64_t offset,unsigned int flags)142 xrep_rtrmap_stash(
143 	struct xrep_rtrmap	*rr,
144 	xfs_rgblock_t		startblock,
145 	xfs_extlen_t		blockcount,
146 	uint64_t		owner,
147 	uint64_t		offset,
148 	unsigned int		flags)
149 {
150 	struct xfs_rmap_irec	rmap = {
151 		.rm_startblock	= startblock,
152 		.rm_blockcount	= blockcount,
153 		.rm_owner	= owner,
154 		.rm_offset	= offset,
155 		.rm_flags	= flags,
156 	};
157 	struct xfs_scrub	*sc = rr->sc;
158 	struct xfs_btree_cur	*mcur;
159 	int			error = 0;
160 
161 	if (xchk_should_terminate(sc, &error))
162 		return error;
163 
164 	if (xchk_iscan_aborted(&rr->iscan))
165 		return -EFSCORRUPTED;
166 
167 	trace_xrep_rtrmap_found(sc->mp, &rmap);
168 
169 	/* Add entry to in-memory btree. */
170 	mutex_lock(&rr->lock);
171 	mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
172 	error = xfs_rmap_map_raw(mcur, &rmap);
173 	xfs_btree_del_cursor(mcur, error);
174 	if (error)
175 		goto out_cancel;
176 
177 	error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
178 	if (error)
179 		goto out_abort;
180 
181 	mutex_unlock(&rr->lock);
182 	return 0;
183 
184 out_cancel:
185 	xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
186 out_abort:
187 	xchk_iscan_abort(&rr->iscan);
188 	mutex_unlock(&rr->lock);
189 	return error;
190 }
191 
192 /* Finding all file and bmbt extents. */
193 
194 /* Context for accumulating rmaps for an inode fork. */
195 struct xrep_rtrmap_ifork {
196 	/*
197 	 * Accumulate rmap data here to turn multiple adjacent bmaps into a
198 	 * single rmap.
199 	 */
200 	struct xfs_rmap_irec	accum;
201 
202 	struct xrep_rtrmap	*rr;
203 };
204 
205 /* Stash an rmap that we accumulated while walking an inode fork. */
206 STATIC int
xrep_rtrmap_stash_accumulated(struct xrep_rtrmap_ifork * rf)207 xrep_rtrmap_stash_accumulated(
208 	struct xrep_rtrmap_ifork	*rf)
209 {
210 	if (rf->accum.rm_blockcount == 0)
211 		return 0;
212 
213 	return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
214 			rf->accum.rm_blockcount, rf->accum.rm_owner,
215 			rf->accum.rm_offset, rf->accum.rm_flags);
216 }
217 
218 /* Accumulate a bmbt record. */
219 STATIC int
xrep_rtrmap_visit_bmbt(struct xfs_btree_cur * cur,struct xfs_bmbt_irec * rec,void * priv)220 xrep_rtrmap_visit_bmbt(
221 	struct xfs_btree_cur	*cur,
222 	struct xfs_bmbt_irec	*rec,
223 	void			*priv)
224 {
225 	struct xrep_rtrmap_ifork *rf = priv;
226 	struct xfs_rmap_irec	*accum = &rf->accum;
227 	struct xfs_mount	*mp = rf->rr->sc->mp;
228 	xfs_rgblock_t		rgbno;
229 	unsigned int		rmap_flags = 0;
230 	int			error;
231 
232 	if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
233 	    rtg_rgno(rf->rr->sc->sr.rtg))
234 		return 0;
235 
236 	if (rec->br_state == XFS_EXT_UNWRITTEN)
237 		rmap_flags |= XFS_RMAP_UNWRITTEN;
238 
239 	/* If this bmap is adjacent to the previous one, just add it. */
240 	rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
241 	if (accum->rm_blockcount > 0 &&
242 	    rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
243 	    rgbno == accum->rm_startblock + accum->rm_blockcount &&
244 	    rmap_flags == accum->rm_flags) {
245 		accum->rm_blockcount += rec->br_blockcount;
246 		return 0;
247 	}
248 
249 	/* Otherwise stash the old rmap and start accumulating a new one. */
250 	error = xrep_rtrmap_stash_accumulated(rf);
251 	if (error)
252 		return error;
253 
254 	accum->rm_startblock = rgbno;
255 	accum->rm_blockcount = rec->br_blockcount;
256 	accum->rm_offset = rec->br_startoff;
257 	accum->rm_flags = rmap_flags;
258 	return 0;
259 }
260 
261 /*
262  * Iterate the block mapping btree to collect rmap records for anything in this
263  * fork that maps to the rt volume.  Sets @mappings_done to true if we've
264  * scanned the block mappings in this fork.
265  */
266 STATIC int
xrep_rtrmap_scan_bmbt(struct xrep_rtrmap_ifork * rf,struct xfs_inode * ip,bool * mappings_done)267 xrep_rtrmap_scan_bmbt(
268 	struct xrep_rtrmap_ifork *rf,
269 	struct xfs_inode	*ip,
270 	bool			*mappings_done)
271 {
272 	struct xrep_rtrmap	*rr = rf->rr;
273 	struct xfs_btree_cur	*cur;
274 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
275 	int			error = 0;
276 
277 	*mappings_done = false;
278 
279 	/*
280 	 * If the incore extent cache is already loaded, we'll just use the
281 	 * incore extent scanner to record mappings.  Don't bother walking the
282 	 * ondisk extent tree.
283 	 */
284 	if (!xfs_need_iread_extents(ifp))
285 		return 0;
286 
287 	/* Accumulate all the mappings in the bmap btree. */
288 	cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
289 	error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
290 	xfs_btree_del_cursor(cur, error);
291 	if (error)
292 		return error;
293 
294 	/* Stash any remaining accumulated rmaps and exit. */
295 	*mappings_done = true;
296 	return xrep_rtrmap_stash_accumulated(rf);
297 }
298 
299 /*
300  * Iterate the in-core extent cache to collect rmap records for anything in
301  * this fork that matches the AG.
302  */
303 STATIC int
xrep_rtrmap_scan_iext(struct xrep_rtrmap_ifork * rf,struct xfs_ifork * ifp)304 xrep_rtrmap_scan_iext(
305 	struct xrep_rtrmap_ifork *rf,
306 	struct xfs_ifork	*ifp)
307 {
308 	struct xfs_bmbt_irec	rec;
309 	struct xfs_iext_cursor	icur;
310 	int			error;
311 
312 	for_each_xfs_iext(ifp, &icur, &rec) {
313 		if (isnullstartblock(rec.br_startblock))
314 			continue;
315 		error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
316 		if (error)
317 			return error;
318 	}
319 
320 	return xrep_rtrmap_stash_accumulated(rf);
321 }
322 
323 /* Find all the extents on the realtime device mapped by an inode fork. */
324 STATIC int
xrep_rtrmap_scan_dfork(struct xrep_rtrmap * rr,struct xfs_inode * ip)325 xrep_rtrmap_scan_dfork(
326 	struct xrep_rtrmap	*rr,
327 	struct xfs_inode	*ip)
328 {
329 	struct xrep_rtrmap_ifork rf = {
330 		.accum		= { .rm_owner = ip->i_ino, },
331 		.rr		= rr,
332 	};
333 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
334 	int			error = 0;
335 
336 	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
337 		bool		mappings_done;
338 
339 		/*
340 		 * Scan the bmbt for mappings.  If the incore extent tree is
341 		 * loaded, we want to scan the cached mappings since that's
342 		 * faster when the extent counts are very high.
343 		 */
344 		error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
345 		if (error || mappings_done)
346 			return error;
347 	} else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
348 		/* realtime data forks should only be extents or btree */
349 		return -EFSCORRUPTED;
350 	}
351 
352 	/* Scan incore extent cache. */
353 	return xrep_rtrmap_scan_iext(&rf, ifp);
354 }
355 
356 /* Record reverse mappings for a file. */
357 STATIC int
xrep_rtrmap_scan_inode(struct xrep_rtrmap * rr,struct xfs_inode * ip)358 xrep_rtrmap_scan_inode(
359 	struct xrep_rtrmap	*rr,
360 	struct xfs_inode	*ip)
361 {
362 	unsigned int		lock_mode;
363 	int			error = 0;
364 
365 	/* Skip the rt rmap btree inode. */
366 	if (rr->sc->ip == ip)
367 		return 0;
368 
369 	lock_mode = xfs_ilock_data_map_shared(ip);
370 
371 	/* Check the data fork if it's on the realtime device. */
372 	if (XFS_IS_REALTIME_INODE(ip)) {
373 		error = xrep_rtrmap_scan_dfork(rr, ip);
374 		if (error)
375 			goto out_unlock;
376 	}
377 
378 	xchk_iscan_mark_visited(&rr->iscan, ip);
379 out_unlock:
380 	xfs_iunlock(ip, lock_mode);
381 	return error;
382 }
383 
384 /* Record extents that belong to the realtime rmap inode. */
385 STATIC int
xrep_rtrmap_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)386 xrep_rtrmap_walk_rmap(
387 	struct xfs_btree_cur		*cur,
388 	const struct xfs_rmap_irec	*rec,
389 	void				*priv)
390 {
391 	struct xrep_rtrmap		*rr = priv;
392 	int				error = 0;
393 
394 	if (xchk_should_terminate(rr->sc, &error))
395 		return error;
396 
397 	/* Skip extents which are not owned by this inode and fork. */
398 	if (rec->rm_owner != rr->sc->ip->i_ino)
399 		return 0;
400 
401 	error = xrep_check_ino_btree_mapping(rr->sc, rec);
402 	if (error)
403 		return error;
404 
405 	return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
406 			xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
407 			rec->rm_blockcount);
408 }
409 
410 /* Scan one AG for reverse mappings for the realtime rmap btree. */
411 STATIC int
xrep_rtrmap_scan_ag(struct xrep_rtrmap * rr,struct xfs_perag * pag)412 xrep_rtrmap_scan_ag(
413 	struct xrep_rtrmap	*rr,
414 	struct xfs_perag	*pag)
415 {
416 	struct xfs_scrub	*sc = rr->sc;
417 	int			error;
418 
419 	error = xrep_ag_init(sc, pag, &sc->sa);
420 	if (error)
421 		return error;
422 
423 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
424 	xchk_ag_free(sc, &sc->sa);
425 	return error;
426 }
427 
428 struct xrep_rtrmap_stash_run {
429 	struct xrep_rtrmap	*rr;
430 	uint64_t		owner;
431 };
432 
433 static int
xrep_rtrmap_stash_run(uint32_t start,uint32_t len,void * priv)434 xrep_rtrmap_stash_run(
435 	uint32_t			start,
436 	uint32_t			len,
437 	void				*priv)
438 {
439 	struct xrep_rtrmap_stash_run	*rsr = priv;
440 	struct xrep_rtrmap		*rr = rsr->rr;
441 	xfs_rgblock_t			rgbno = start;
442 
443 	return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
444 }
445 
446 /*
447  * Emit rmaps for every extent of bits set in the bitmap.  Caller must ensure
448  * that the ranges are in units of FS blocks.
449  */
450 STATIC int
xrep_rtrmap_stash_bitmap(struct xrep_rtrmap * rr,struct xrgb_bitmap * bitmap,const struct xfs_owner_info * oinfo)451 xrep_rtrmap_stash_bitmap(
452 	struct xrep_rtrmap		*rr,
453 	struct xrgb_bitmap		*bitmap,
454 	const struct xfs_owner_info	*oinfo)
455 {
456 	struct xrep_rtrmap_stash_run	rsr = {
457 		.rr			= rr,
458 		.owner			= oinfo->oi_owner,
459 	};
460 
461 	return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
462 }
463 
464 /* Record a CoW staging extent. */
465 STATIC int
xrep_rtrmap_walk_cowblocks(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * irec,void * priv)466 xrep_rtrmap_walk_cowblocks(
467 	struct xfs_btree_cur		*cur,
468 	const struct xfs_refcount_irec	*irec,
469 	void				*priv)
470 {
471 	struct xrgb_bitmap		*bitmap = priv;
472 
473 	if (!xfs_refcount_check_domain(irec) ||
474 	    irec->rc_domain != XFS_REFC_DOMAIN_COW)
475 		return -EFSCORRUPTED;
476 
477 	return xrgb_bitmap_set(bitmap, irec->rc_startblock,
478 			irec->rc_blockcount);
479 }
480 
481 /*
482  * Collect rmaps for the blocks containing the refcount btree, and all CoW
483  * staging extents.
484  */
485 STATIC int
xrep_rtrmap_find_refcount_rmaps(struct xrep_rtrmap * rr)486 xrep_rtrmap_find_refcount_rmaps(
487 	struct xrep_rtrmap	*rr)
488 {
489 	struct xrgb_bitmap	cow_blocks;		/* COWBIT */
490 	struct xfs_refcount_irec low = {
491 		.rc_startblock	= 0,
492 		.rc_domain	= XFS_REFC_DOMAIN_COW,
493 	};
494 	struct xfs_refcount_irec high = {
495 		.rc_startblock	= -1U,
496 		.rc_domain	= XFS_REFC_DOMAIN_COW,
497 	};
498 	struct xfs_scrub	*sc = rr->sc;
499 	int			error;
500 
501 	if (!xfs_has_rtreflink(sc->mp))
502 		return 0;
503 
504 	xrgb_bitmap_init(&cow_blocks);
505 
506 	/* Collect rmaps for CoW staging extents. */
507 	error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
508 			xrep_rtrmap_walk_cowblocks, &cow_blocks);
509 	if (error)
510 		goto out_bitmap;
511 
512 	/* Generate rmaps for everything. */
513 	error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
514 	if (error)
515 		goto out_bitmap;
516 
517 out_bitmap:
518 	xrgb_bitmap_destroy(&cow_blocks);
519 	return error;
520 }
521 
522 /* Count and check all collected records. */
523 STATIC int
xrep_rtrmap_check_record(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)524 xrep_rtrmap_check_record(
525 	struct xfs_btree_cur		*cur,
526 	const struct xfs_rmap_irec	*rec,
527 	void				*priv)
528 {
529 	struct xrep_rtrmap		*rr = priv;
530 	int				error;
531 
532 	error = xrep_rtrmap_check_mapping(rr->sc, rec);
533 	if (error)
534 		return error;
535 
536 	rr->nr_records++;
537 	return 0;
538 }
539 
540 /* Generate all the reverse-mappings for the realtime device. */
541 STATIC int
xrep_rtrmap_find_rmaps(struct xrep_rtrmap * rr)542 xrep_rtrmap_find_rmaps(
543 	struct xrep_rtrmap	*rr)
544 {
545 	struct xfs_scrub	*sc = rr->sc;
546 	struct xfs_perag	*pag = NULL;
547 	struct xfs_inode	*ip;
548 	struct xfs_btree_cur	*mcur;
549 	int			error;
550 
551 	/* Generate rmaps for the realtime superblock */
552 	if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
553 		error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
554 				XFS_RMAP_OWN_FS, 0, 0);
555 		if (error)
556 			return error;
557 	}
558 
559 	/* Find CoW staging extents. */
560 	xrep_rtgroup_btcur_init(sc, &sc->sr);
561 	error = xrep_rtrmap_find_refcount_rmaps(rr);
562 	xchk_rtgroup_btcur_free(&sc->sr);
563 	if (error)
564 		return error;
565 
566 	/*
567 	 * Set up for a potentially lengthy filesystem scan by reducing our
568 	 * transaction resource usage for the duration.  Specifically:
569 	 *
570 	 * Unlock the realtime metadata inodes and cancel the transaction to
571 	 * release the log grant space while we scan the filesystem.
572 	 *
573 	 * Create a new empty transaction to eliminate the possibility of the
574 	 * inode scan deadlocking on cyclical metadata.
575 	 *
576 	 * We pass the empty transaction to the file scanning function to avoid
577 	 * repeatedly cycling empty transactions.  This can be done even though
578 	 * we take the IOLOCK to quiesce the file because empty transactions
579 	 * do not take sb_internal.
580 	 */
581 	xchk_trans_cancel(sc);
582 	xchk_rtgroup_unlock(&sc->sr);
583 	error = xchk_trans_alloc_empty(sc);
584 	if (error)
585 		return error;
586 
587 	while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
588 		error = xrep_rtrmap_scan_inode(rr, ip);
589 		xchk_irele(sc, ip);
590 		if (error)
591 			break;
592 
593 		if (xchk_should_terminate(sc, &error))
594 			break;
595 	}
596 	xchk_iscan_iter_finish(&rr->iscan);
597 	if (error)
598 		return error;
599 
600 	/*
601 	 * Switch out for a real transaction and lock the RT metadata in
602 	 * preparation for building a new tree.
603 	 */
604 	xchk_trans_cancel(sc);
605 	error = xchk_setup_rt(sc);
606 	if (error)
607 		return error;
608 	error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
609 	if (error)
610 		return error;
611 
612 	/*
613 	 * If a hook failed to update the in-memory btree, we lack the data to
614 	 * continue the repair.
615 	 */
616 	if (xchk_iscan_aborted(&rr->iscan))
617 		return -EFSCORRUPTED;
618 
619 	/* Scan for old rtrmap blocks. */
620 	while ((pag = xfs_perag_next(sc->mp, pag))) {
621 		error = xrep_rtrmap_scan_ag(rr, pag);
622 		if (error) {
623 			xfs_perag_rele(pag);
624 			return error;
625 		}
626 	}
627 
628 	/*
629 	 * Now that we have everything locked again, we need to count the
630 	 * number of rmap records stashed in the btree.  This should reflect
631 	 * all actively-owned rt files in the filesystem.  At the same time,
632 	 * check all our records before we start building a new btree, which
633 	 * requires the rtbitmap lock.
634 	 */
635 	mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
636 	rr->nr_records = 0;
637 	error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
638 	xfs_btree_del_cursor(mcur, error);
639 
640 	return error;
641 }
642 
643 /* Building the new rtrmap btree. */
644 
645 /* Retrieve rtrmapbt data for bulk load. */
646 STATIC int
xrep_rtrmap_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)647 xrep_rtrmap_get_records(
648 	struct xfs_btree_cur		*cur,
649 	unsigned int			idx,
650 	struct xfs_btree_block		*block,
651 	unsigned int			nr_wanted,
652 	void				*priv)
653 {
654 	struct xrep_rtrmap		*rr = priv;
655 	union xfs_btree_rec		*block_rec;
656 	unsigned int			loaded;
657 	int				error;
658 
659 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
660 		int			stat = 0;
661 
662 		error = xfs_btree_increment(rr->mcur, 0, &stat);
663 		if (error)
664 			return error;
665 		if (!stat)
666 			return -EFSCORRUPTED;
667 
668 		error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
669 		if (error)
670 			return error;
671 		if (!stat)
672 			return -EFSCORRUPTED;
673 
674 		block_rec = xfs_btree_rec_addr(cur, idx, block);
675 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
676 	}
677 
678 	return loaded;
679 }
680 
681 /* Feed one of the new btree blocks to the bulk loader. */
682 STATIC int
xrep_rtrmap_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)683 xrep_rtrmap_claim_block(
684 	struct xfs_btree_cur	*cur,
685 	union xfs_btree_ptr	*ptr,
686 	void			*priv)
687 {
688 	struct xrep_rtrmap	*rr = priv;
689 
690 	return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
691 }
692 
693 /* Figure out how much space we need to create the incore btree root block. */
694 STATIC size_t
xrep_rtrmap_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)695 xrep_rtrmap_iroot_size(
696 	struct xfs_btree_cur	*cur,
697 	unsigned int		level,
698 	unsigned int		nr_this_level,
699 	void			*priv)
700 {
701 	return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
702 }
703 
704 /*
705  * Use the collected rmap information to stage a new rmap btree.  If this is
706  * successful we'll return with the new btree root information logged to the
707  * repair transaction but not yet committed.  This implements section (III)
708  * above.
709  */
710 STATIC int
xrep_rtrmap_build_new_tree(struct xrep_rtrmap * rr)711 xrep_rtrmap_build_new_tree(
712 	struct xrep_rtrmap	*rr)
713 {
714 	struct xfs_scrub	*sc = rr->sc;
715 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
716 	struct xfs_btree_cur	*rmap_cur;
717 	int			error;
718 
719 	/*
720 	 * Prepare to construct the new btree by reserving disk space for the
721 	 * new btree and setting up all the accounting information we'll need
722 	 * to root the new btree while it's under construction and before we
723 	 * attach it to the realtime rmapbt inode.
724 	 */
725 	error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
726 	if (error)
727 		return error;
728 
729 	rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
730 	rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
731 	rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
732 
733 	rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
734 	xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
735 
736 	/* Compute how many blocks we'll need for the rmaps collected. */
737 	error = xfs_btree_bload_compute_geometry(rmap_cur,
738 			&rr->new_btree.bload, rr->nr_records);
739 	if (error)
740 		goto err_cur;
741 
742 	/* Last chance to abort before we start committing fixes. */
743 	if (xchk_should_terminate(sc, &error))
744 		goto err_cur;
745 
746 	/*
747 	 * Guess how many blocks we're going to need to rebuild an entire
748 	 * rtrmapbt from the number of extents we found, and pump up our
749 	 * transaction to have sufficient block reservation.  We're allowed
750 	 * to exceed quota to repair inconsistent metadata, though this is
751 	 * unlikely.
752 	 */
753 	error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
754 			rr->new_btree.bload.nr_blocks, 0, true);
755 	if (error)
756 		goto err_cur;
757 
758 	/* Reserve the space we'll need for the new btree. */
759 	error = xrep_newbt_alloc_blocks(&rr->new_btree,
760 			rr->new_btree.bload.nr_blocks);
761 	if (error)
762 		goto err_cur;
763 
764 	/*
765 	 * Create a cursor to the in-memory btree so that we can bulk load the
766 	 * new btree.
767 	 */
768 	rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
769 	error = xfs_btree_goto_left_edge(rr->mcur);
770 	if (error)
771 		goto err_mcur;
772 
773 	/* Add all observed rmap records. */
774 	rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
775 	error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
776 	if (error)
777 		goto err_mcur;
778 
779 	/*
780 	 * Install the new rtrmap btree in the inode.  After this point the old
781 	 * btree is no longer accessible, the new tree is live, and we can
782 	 * delete the cursor.
783 	 */
784 	xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
785 	xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
786 	xfs_btree_del_cursor(rmap_cur, 0);
787 	xfs_btree_del_cursor(rr->mcur, 0);
788 	rr->mcur = NULL;
789 
790 	/*
791 	 * Now that we've written the new btree to disk, we don't need to keep
792 	 * updating the in-memory btree.  Abort the scan to stop live updates.
793 	 */
794 	xchk_iscan_abort(&rr->iscan);
795 
796 	/* Dispose of any unused blocks and the accounting information. */
797 	error = xrep_newbt_commit(&rr->new_btree);
798 	if (error)
799 		return error;
800 
801 	return xrep_roll_trans(sc);
802 
803 err_mcur:
804 	xfs_btree_del_cursor(rr->mcur, error);
805 err_cur:
806 	xfs_btree_del_cursor(rmap_cur, error);
807 	xrep_newbt_cancel(&rr->new_btree);
808 	return error;
809 }
810 
811 /* Reaping the old btree. */
812 
813 /* Reap the old rtrmapbt blocks. */
814 STATIC int
xrep_rtrmap_remove_old_tree(struct xrep_rtrmap * rr)815 xrep_rtrmap_remove_old_tree(
816 	struct xrep_rtrmap	*rr)
817 {
818 	int			error;
819 
820 	/*
821 	 * Free all the extents that were allocated to the former rtrmapbt and
822 	 * aren't cross-linked with something else.
823 	 */
824 	error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
825 	if (error)
826 		return error;
827 
828 	/*
829 	 * Ensure the proper reservation for the rtrmap inode so that we don't
830 	 * fail to expand the new btree.
831 	 */
832 	return xrep_reset_metafile_resv(rr->sc);
833 }
834 
835 static inline bool
xrep_rtrmapbt_want_live_update(struct xchk_iscan * iscan,const struct xfs_owner_info * oi)836 xrep_rtrmapbt_want_live_update(
837 	struct xchk_iscan		*iscan,
838 	const struct xfs_owner_info	*oi)
839 {
840 	if (xchk_iscan_aborted(iscan))
841 		return false;
842 
843 	/*
844 	 * We scanned the CoW staging extents before we started the iscan, so
845 	 * we need all the updates.
846 	 */
847 	if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
848 		return true;
849 
850 	/* Ignore updates to files that the scanner hasn't visited yet. */
851 	return xchk_iscan_want_live_update(iscan, oi->oi_owner);
852 }
853 
854 /*
855  * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
856  * We're running from the thread that owns the rtrmap ILOCK and is generating
857  * the update, so we must be careful about which parts of the struct
858  * xrep_rtrmap that we change.
859  */
860 static int
xrep_rtrmapbt_live_update(struct notifier_block * nb,unsigned long action,void * data)861 xrep_rtrmapbt_live_update(
862 	struct notifier_block		*nb,
863 	unsigned long			action,
864 	void				*data)
865 {
866 	struct xfs_rmap_update_params	*p = data;
867 	struct xrep_rtrmap		*rr;
868 	struct xfs_mount		*mp;
869 	struct xfs_btree_cur		*mcur;
870 	struct xfs_trans		*tp;
871 	void				*txcookie;
872 	int				error;
873 
874 	rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
875 	mp = rr->sc->mp;
876 
877 	if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
878 		goto out_unlock;
879 
880 	trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
881 
882 	error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
883 	if (error)
884 		goto out_abort;
885 
886 	mutex_lock(&rr->lock);
887 	mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
888 	error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
889 			p->blockcount, &p->oinfo, p->unwritten);
890 	xfs_btree_del_cursor(mcur, error);
891 	if (error)
892 		goto out_cancel;
893 
894 	error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
895 	if (error)
896 		goto out_cancel;
897 
898 	xrep_trans_cancel_hook_dummy(&txcookie, tp);
899 	mutex_unlock(&rr->lock);
900 	return NOTIFY_DONE;
901 
902 out_cancel:
903 	xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
904 	xrep_trans_cancel_hook_dummy(&txcookie, tp);
905 out_abort:
906 	xchk_iscan_abort(&rr->iscan);
907 	mutex_unlock(&rr->lock);
908 out_unlock:
909 	return NOTIFY_DONE;
910 }
911 
912 /* Set up the filesystem scan components. */
913 STATIC int
xrep_rtrmap_setup_scan(struct xrep_rtrmap * rr)914 xrep_rtrmap_setup_scan(
915 	struct xrep_rtrmap	*rr)
916 {
917 	struct xfs_scrub	*sc = rr->sc;
918 	int			error;
919 
920 	mutex_init(&rr->lock);
921 	xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
922 
923 	/* Set up some storage */
924 	error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
925 			rtg_rgno(sc->sr.rtg));
926 	if (error)
927 		goto out_bitmap;
928 
929 	/* Retry iget every tenth of a second for up to 30 seconds. */
930 	xchk_iscan_start(sc, 30000, 100, &rr->iscan);
931 
932 	/*
933 	 * Hook into live rtrmap operations so that we can update our in-memory
934 	 * btree to reflect live changes on the filesystem.  Since we drop the
935 	 * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
936 	 * installing a stale btree.
937 	 */
938 	ASSERT(sc->flags & XCHK_FSGATES_RMAP);
939 	xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
940 	error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
941 	if (error)
942 		goto out_iscan;
943 	return 0;
944 
945 out_iscan:
946 	xchk_iscan_teardown(&rr->iscan);
947 	xfbtree_destroy(&rr->rtrmap_btree);
948 out_bitmap:
949 	xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
950 	mutex_destroy(&rr->lock);
951 	return error;
952 }
953 
954 /* Tear down scan components. */
955 STATIC void
xrep_rtrmap_teardown(struct xrep_rtrmap * rr)956 xrep_rtrmap_teardown(
957 	struct xrep_rtrmap	*rr)
958 {
959 	struct xfs_scrub	*sc = rr->sc;
960 
961 	xchk_iscan_abort(&rr->iscan);
962 	xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
963 	xchk_iscan_teardown(&rr->iscan);
964 	xfbtree_destroy(&rr->rtrmap_btree);
965 	xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
966 	mutex_destroy(&rr->lock);
967 }
968 
969 /* Repair the realtime rmap btree. */
970 int
xrep_rtrmapbt(struct xfs_scrub * sc)971 xrep_rtrmapbt(
972 	struct xfs_scrub	*sc)
973 {
974 	struct xrep_rtrmap	*rr = sc->buf;
975 	int			error;
976 
977 	/* Make sure any problems with the fork are fixed. */
978 	error = xrep_metadata_inode_forks(sc);
979 	if (error)
980 		return error;
981 
982 	error = xrep_rtrmap_setup_scan(rr);
983 	if (error)
984 		return error;
985 
986 	/* Collect rmaps for realtime files. */
987 	error = xrep_rtrmap_find_rmaps(rr);
988 	if (error)
989 		goto out_records;
990 
991 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
992 
993 	/* Rebuild the rtrmap information. */
994 	error = xrep_rtrmap_build_new_tree(rr);
995 	if (error)
996 		goto out_records;
997 
998 	/* Kill the old tree. */
999 	error = xrep_rtrmap_remove_old_tree(rr);
1000 	if (error)
1001 		goto out_records;
1002 
1003 out_records:
1004 	xrep_rtrmap_teardown(rr);
1005 	return error;
1006 }
1007