xref: /linux/fs/xfs/scrub/bmap_repair.c (revision 68c402fe5c5e5aa9a04c8bba9d99feb08a68afa7)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_inode.h"
20 #include "xfs_inode_fork.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rtalloc.h"
23 #include "xfs_bmap.h"
24 #include "xfs_bmap_util.h"
25 #include "xfs_bmap_btree.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_refcount.h"
29 #include "xfs_quota.h"
30 #include "xfs_ialloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_reflink.h"
33 #include "scrub/xfs_scrub.h"
34 #include "scrub/scrub.h"
35 #include "scrub/common.h"
36 #include "scrub/btree.h"
37 #include "scrub/trace.h"
38 #include "scrub/repair.h"
39 #include "scrub/bitmap.h"
40 #include "scrub/fsb_bitmap.h"
41 #include "scrub/xfile.h"
42 #include "scrub/xfarray.h"
43 #include "scrub/newbt.h"
44 #include "scrub/reap.h"
45 
46 /*
47  * Inode Fork Block Mapping (BMBT) Repair
48  * ======================================
49  *
50  * Gather all the rmap records for the inode and fork we're fixing, reset the
51  * incore fork, then recreate the btree.
52  */
53 
54 enum reflink_scan_state {
55 	RLS_IRRELEVANT = -1,	/* not applicable to this file */
56 	RLS_UNKNOWN,		/* shared extent scans required */
57 	RLS_SET_IFLAG,		/* iflag must be set */
58 };
59 
60 struct xrep_bmap {
61 	/* Old bmbt blocks */
62 	struct xfsb_bitmap	old_bmbt_blocks;
63 
64 	/* New fork. */
65 	struct xrep_newbt	new_bmapbt;
66 
67 	/* List of new bmap records. */
68 	struct xfarray		*bmap_records;
69 
70 	struct xfs_scrub	*sc;
71 
72 	/* How many blocks did we find allocated to this file? */
73 	xfs_rfsblock_t		nblocks;
74 
75 	/* How many bmbt blocks did we find for this fork? */
76 	xfs_rfsblock_t		old_bmbt_block_count;
77 
78 	/* get_records()'s position in the free space record array. */
79 	xfarray_idx_t		array_cur;
80 
81 	/* How many real (non-hole, non-delalloc) mappings do we have? */
82 	uint64_t		real_mappings;
83 
84 	/* Which fork are we fixing? */
85 	int			whichfork;
86 
87 	/* What d the REFLINK flag be set when the repair is over? */
88 	enum reflink_scan_state	reflink_scan;
89 
90 	/* Do we allow unwritten extents? */
91 	bool			allow_unwritten;
92 };
93 
94 /* Is this space extent shared?  Flag the inode if it is. */
95 STATIC int
96 xrep_bmap_discover_shared(
97 	struct xrep_bmap	*rb,
98 	xfs_fsblock_t		startblock,
99 	xfs_filblks_t		blockcount)
100 {
101 	struct xfs_scrub	*sc = rb->sc;
102 	xfs_agblock_t		agbno;
103 	xfs_agblock_t		fbno;
104 	xfs_extlen_t		flen;
105 	int			error;
106 
107 	agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
108 	error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount,
109 			&fbno, &flen, false);
110 	if (error)
111 		return error;
112 
113 	if (fbno != NULLAGBLOCK)
114 		rb->reflink_scan = RLS_SET_IFLAG;
115 
116 	return 0;
117 }
118 
119 /* Remember this reverse-mapping as a series of bmap records. */
120 STATIC int
121 xrep_bmap_from_rmap(
122 	struct xrep_bmap	*rb,
123 	xfs_fileoff_t		startoff,
124 	xfs_fsblock_t		startblock,
125 	xfs_filblks_t		blockcount,
126 	bool			unwritten)
127 {
128 	struct xfs_bmbt_irec	irec = {
129 		.br_startoff	= startoff,
130 		.br_startblock	= startblock,
131 		.br_state	= unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
132 	};
133 	struct xfs_bmbt_rec	rbe;
134 	struct xfs_scrub	*sc = rb->sc;
135 	int			error = 0;
136 
137 	/*
138 	 * If we're repairing the data fork of a non-reflinked regular file on
139 	 * a reflink filesystem, we need to figure out if this space extent is
140 	 * shared.
141 	 */
142 	if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
143 		error = xrep_bmap_discover_shared(rb, startblock, blockcount);
144 		if (error)
145 			return error;
146 	}
147 
148 	do {
149 		xfs_failaddr_t	fa;
150 
151 		irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
152 				XFS_MAX_BMBT_EXTLEN);
153 
154 		fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
155 		if (fa)
156 			return -EFSCORRUPTED;
157 
158 		xfs_bmbt_disk_set_all(&rbe, &irec);
159 
160 		trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
161 
162 		if (xchk_should_terminate(sc, &error))
163 			return error;
164 
165 		error = xfarray_append(rb->bmap_records, &rbe);
166 		if (error)
167 			return error;
168 
169 		rb->real_mappings++;
170 
171 		irec.br_startblock += irec.br_blockcount;
172 		irec.br_startoff += irec.br_blockcount;
173 		blockcount -= irec.br_blockcount;
174 	} while (blockcount > 0);
175 
176 	return 0;
177 }
178 
179 /* Check for any obvious errors or conflicts in the file mapping. */
180 STATIC int
181 xrep_bmap_check_fork_rmap(
182 	struct xrep_bmap		*rb,
183 	struct xfs_btree_cur		*cur,
184 	const struct xfs_rmap_irec	*rec)
185 {
186 	struct xfs_scrub		*sc = rb->sc;
187 	enum xbtree_recpacking		outcome;
188 	int				error;
189 
190 	/*
191 	 * Data extents for rt files are never stored on the data device, but
192 	 * everything else (xattrs, bmbt blocks) can be.
193 	 */
194 	if (XFS_IS_REALTIME_INODE(sc->ip) &&
195 	    !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
196 		return -EFSCORRUPTED;
197 
198 	/* Check that this is within the AG. */
199 	if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
200 				rec->rm_blockcount))
201 		return -EFSCORRUPTED;
202 
203 	/* Check the file offset range. */
204 	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
205 	    !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
206 		return -EFSCORRUPTED;
207 
208 	/* No contradictory flags. */
209 	if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
210 	    (rec->rm_flags & XFS_RMAP_UNWRITTEN))
211 		return -EFSCORRUPTED;
212 
213 	/* Make sure this isn't free space. */
214 	error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
215 			rec->rm_blockcount, &outcome);
216 	if (error)
217 		return error;
218 	if (outcome != XBTREE_RECPACKING_EMPTY)
219 		return -EFSCORRUPTED;
220 
221 	/* Must not be an inode chunk. */
222 	error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
223 			rec->rm_startblock, rec->rm_blockcount, &outcome);
224 	if (error)
225 		return error;
226 	if (outcome != XBTREE_RECPACKING_EMPTY)
227 		return -EFSCORRUPTED;
228 
229 	return 0;
230 }
231 
232 /* Record extents that belong to this inode's fork. */
233 STATIC int
234 xrep_bmap_walk_rmap(
235 	struct xfs_btree_cur		*cur,
236 	const struct xfs_rmap_irec	*rec,
237 	void				*priv)
238 {
239 	struct xrep_bmap		*rb = priv;
240 	struct xfs_mount		*mp = cur->bc_mp;
241 	xfs_fsblock_t			fsbno;
242 	int				error = 0;
243 
244 	if (xchk_should_terminate(rb->sc, &error))
245 		return error;
246 
247 	if (rec->rm_owner != rb->sc->ip->i_ino)
248 		return 0;
249 
250 	error = xrep_bmap_check_fork_rmap(rb, cur, rec);
251 	if (error)
252 		return error;
253 
254 	/*
255 	 * Record all blocks allocated to this file even if the extent isn't
256 	 * for the fork we're rebuilding so that we can reset di_nblocks later.
257 	 */
258 	rb->nblocks += rec->rm_blockcount;
259 
260 	/* If this rmap isn't for the fork we want, we're done. */
261 	if (rb->whichfork == XFS_DATA_FORK &&
262 	    (rec->rm_flags & XFS_RMAP_ATTR_FORK))
263 		return 0;
264 	if (rb->whichfork == XFS_ATTR_FORK &&
265 	    !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
266 		return 0;
267 
268 	/* Reject unwritten extents if we don't allow those. */
269 	if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
270 		return -EFSCORRUPTED;
271 
272 	fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
273 			rec->rm_startblock);
274 
275 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
276 		rb->old_bmbt_block_count += rec->rm_blockcount;
277 		return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
278 				rec->rm_blockcount);
279 	}
280 
281 	return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
282 			rec->rm_blockcount,
283 			rec->rm_flags & XFS_RMAP_UNWRITTEN);
284 }
285 
286 /*
287  * Compare two block mapping records.  We want to sort in order of increasing
288  * file offset.
289  */
290 static int
291 xrep_bmap_extent_cmp(
292 	const void			*a,
293 	const void			*b)
294 {
295 	const struct xfs_bmbt_rec	*ba = a;
296 	const struct xfs_bmbt_rec	*bb = b;
297 	xfs_fileoff_t			ao = xfs_bmbt_disk_get_startoff(ba);
298 	xfs_fileoff_t			bo = xfs_bmbt_disk_get_startoff(bb);
299 
300 	if (ao > bo)
301 		return 1;
302 	else if (ao < bo)
303 		return -1;
304 	return 0;
305 }
306 
307 /*
308  * Sort the bmap extents by fork offset or else the records will be in the
309  * wrong order.  Ensure there are no overlaps in the file offset ranges.
310  */
311 STATIC int
312 xrep_bmap_sort_records(
313 	struct xrep_bmap	*rb)
314 {
315 	struct xfs_bmbt_irec	irec;
316 	xfs_fileoff_t		next_off = 0;
317 	xfarray_idx_t		array_cur;
318 	int			error;
319 
320 	error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
321 			XFARRAY_SORT_KILLABLE);
322 	if (error)
323 		return error;
324 
325 	foreach_xfarray_idx(rb->bmap_records, array_cur) {
326 		struct xfs_bmbt_rec	rec;
327 
328 		if (xchk_should_terminate(rb->sc, &error))
329 			return error;
330 
331 		error = xfarray_load(rb->bmap_records, array_cur, &rec);
332 		if (error)
333 			return error;
334 
335 		xfs_bmbt_disk_get_all(&rec, &irec);
336 
337 		if (irec.br_startoff < next_off)
338 			return -EFSCORRUPTED;
339 
340 		next_off = irec.br_startoff + irec.br_blockcount;
341 	}
342 
343 	return 0;
344 }
345 
346 /* Scan one AG for reverse mappings that we can turn into extent maps. */
347 STATIC int
348 xrep_bmap_scan_ag(
349 	struct xrep_bmap	*rb,
350 	struct xfs_perag	*pag)
351 {
352 	struct xfs_scrub	*sc = rb->sc;
353 	int			error;
354 
355 	error = xrep_ag_init(sc, pag, &sc->sa);
356 	if (error)
357 		return error;
358 
359 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
360 	xchk_ag_free(sc, &sc->sa);
361 	return error;
362 }
363 
364 /* Find the delalloc extents from the old incore extent tree. */
365 STATIC int
366 xrep_bmap_find_delalloc(
367 	struct xrep_bmap	*rb)
368 {
369 	struct xfs_bmbt_irec	irec;
370 	struct xfs_iext_cursor	icur;
371 	struct xfs_bmbt_rec	rbe;
372 	struct xfs_inode	*ip = rb->sc->ip;
373 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, rb->whichfork);
374 	int			error = 0;
375 
376 	/*
377 	 * Skip this scan if we don't expect to find delayed allocation
378 	 * reservations in this fork.
379 	 */
380 	if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
381 		return 0;
382 
383 	for_each_xfs_iext(ifp, &icur, &irec) {
384 		if (!isnullstartblock(irec.br_startblock))
385 			continue;
386 
387 		xfs_bmbt_disk_set_all(&rbe, &irec);
388 
389 		trace_xrep_bmap_found(ip, rb->whichfork, &irec);
390 
391 		if (xchk_should_terminate(rb->sc, &error))
392 			return error;
393 
394 		error = xfarray_append(rb->bmap_records, &rbe);
395 		if (error)
396 			return error;
397 	}
398 
399 	return 0;
400 }
401 
402 /*
403  * Collect block mappings for this fork of this inode and decide if we have
404  * enough space to rebuild.  Caller is responsible for cleaning up the list if
405  * anything goes wrong.
406  */
407 STATIC int
408 xrep_bmap_find_mappings(
409 	struct xrep_bmap	*rb)
410 {
411 	struct xfs_scrub	*sc = rb->sc;
412 	struct xfs_perag	*pag;
413 	xfs_agnumber_t		agno;
414 	int			error = 0;
415 
416 	/* Iterate the rmaps for extents. */
417 	for_each_perag(sc->mp, agno, pag) {
418 		error = xrep_bmap_scan_ag(rb, pag);
419 		if (error) {
420 			xfs_perag_rele(pag);
421 			return error;
422 		}
423 	}
424 
425 	return xrep_bmap_find_delalloc(rb);
426 }
427 
428 /* Retrieve real extent mappings for bulk loading the bmap btree. */
429 STATIC int
430 xrep_bmap_get_records(
431 	struct xfs_btree_cur	*cur,
432 	unsigned int		idx,
433 	struct xfs_btree_block	*block,
434 	unsigned int		nr_wanted,
435 	void			*priv)
436 {
437 	struct xfs_bmbt_rec	rec;
438 	struct xfs_bmbt_irec	*irec = &cur->bc_rec.b;
439 	struct xrep_bmap	*rb = priv;
440 	union xfs_btree_rec	*block_rec;
441 	unsigned int		loaded;
442 	int			error;
443 
444 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
445 		do {
446 			error = xfarray_load(rb->bmap_records, rb->array_cur++,
447 					&rec);
448 			if (error)
449 				return error;
450 
451 			xfs_bmbt_disk_get_all(&rec, irec);
452 		} while (isnullstartblock(irec->br_startblock));
453 
454 		block_rec = xfs_btree_rec_addr(cur, idx, block);
455 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
456 	}
457 
458 	return loaded;
459 }
460 
461 /* Feed one of the new btree blocks to the bulk loader. */
462 STATIC int
463 xrep_bmap_claim_block(
464 	struct xfs_btree_cur	*cur,
465 	union xfs_btree_ptr	*ptr,
466 	void			*priv)
467 {
468 	struct xrep_bmap        *rb = priv;
469 
470 	return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
471 }
472 
473 /* Figure out how much space we need to create the incore btree root block. */
474 STATIC size_t
475 xrep_bmap_iroot_size(
476 	struct xfs_btree_cur	*cur,
477 	unsigned int		level,
478 	unsigned int		nr_this_level,
479 	void			*priv)
480 {
481 	ASSERT(level > 0);
482 
483 	return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level);
484 }
485 
486 /* Update the inode counters. */
487 STATIC int
488 xrep_bmap_reset_counters(
489 	struct xrep_bmap	*rb)
490 {
491 	struct xfs_scrub	*sc = rb->sc;
492 	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
493 	int64_t			delta;
494 
495 	if (rb->reflink_scan == RLS_SET_IFLAG)
496 		sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
497 
498 	/*
499 	 * Update the inode block counts to reflect the extents we found in the
500 	 * rmapbt.
501 	 */
502 	delta = ifake->if_blocks - rb->old_bmbt_block_count;
503 	sc->ip->i_nblocks = rb->nblocks + delta;
504 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
505 
506 	/*
507 	 * Adjust the quota counts by the difference in size between the old
508 	 * and new bmbt.
509 	 */
510 	xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
511 	return 0;
512 }
513 
514 /*
515  * Create a new iext tree and load it with block mappings.  If the inode is
516  * in extents format, that's all we need to do to commit the new mappings.
517  * If it is in btree format, this takes care of preloading the incore tree.
518  */
519 STATIC int
520 xrep_bmap_extents_load(
521 	struct xrep_bmap	*rb)
522 {
523 	struct xfs_iext_cursor	icur;
524 	struct xfs_bmbt_irec	irec;
525 	struct xfs_ifork	*ifp = rb->new_bmapbt.ifake.if_fork;
526 	xfarray_idx_t		array_cur;
527 	int			error;
528 
529 	ASSERT(ifp->if_bytes == 0);
530 
531 	/* Add all the mappings (incl. delalloc) to the incore extent tree. */
532 	xfs_iext_first(ifp, &icur);
533 	foreach_xfarray_idx(rb->bmap_records, array_cur) {
534 		struct xfs_bmbt_rec	rec;
535 
536 		error = xfarray_load(rb->bmap_records, array_cur, &rec);
537 		if (error)
538 			return error;
539 
540 		xfs_bmbt_disk_get_all(&rec, &irec);
541 
542 		xfs_iext_insert_raw(ifp, &icur, &irec);
543 		if (!isnullstartblock(irec.br_startblock))
544 			ifp->if_nextents++;
545 
546 		xfs_iext_next(ifp, &icur);
547 	}
548 
549 	return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
550 			ifp->if_nextents);
551 }
552 
553 /*
554  * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
555  * and load the incore extent tree.
556  */
557 STATIC int
558 xrep_bmap_btree_load(
559 	struct xrep_bmap	*rb,
560 	struct xfs_btree_cur	*bmap_cur)
561 {
562 	struct xfs_scrub	*sc = rb->sc;
563 	int			error;
564 
565 	/* Compute how many blocks we'll need. */
566 	error = xfs_btree_bload_compute_geometry(bmap_cur,
567 			&rb->new_bmapbt.bload, rb->real_mappings);
568 	if (error)
569 		return error;
570 
571 	/* Last chance to abort before we start committing fixes. */
572 	if (xchk_should_terminate(sc, &error))
573 		return error;
574 
575 	/*
576 	 * Guess how many blocks we're going to need to rebuild an entire bmap
577 	 * from the number of extents we found, and pump up our transaction to
578 	 * have sufficient block reservation.  We're allowed to exceed file
579 	 * quota to repair inconsistent metadata.
580 	 */
581 	error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
582 			rb->new_bmapbt.bload.nr_blocks, 0, true);
583 	if (error)
584 		return error;
585 
586 	/* Reserve the space we'll need for the new btree. */
587 	error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
588 			rb->new_bmapbt.bload.nr_blocks);
589 	if (error)
590 		return error;
591 
592 	/* Add all observed bmap records. */
593 	rb->array_cur = XFARRAY_CURSOR_INIT;
594 	error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
595 	if (error)
596 		return error;
597 
598 	/*
599 	 * Load the new bmap records into the new incore extent tree to
600 	 * preserve delalloc reservations for regular files.  The directory
601 	 * code loads the extent tree during xfs_dir_open and assumes
602 	 * thereafter that it remains loaded, so we must not violate that
603 	 * assumption.
604 	 */
605 	return xrep_bmap_extents_load(rb);
606 }
607 
608 /*
609  * Use the collected bmap information to stage a new bmap fork.  If this is
610  * successful we'll return with the new fork information logged to the repair
611  * transaction but not yet committed.  The caller must ensure that the inode
612  * is joined to the transaction; the inode will be joined to a clean
613  * transaction when the function returns.
614  */
615 STATIC int
616 xrep_bmap_build_new_fork(
617 	struct xrep_bmap	*rb)
618 {
619 	struct xfs_owner_info	oinfo;
620 	struct xfs_scrub	*sc = rb->sc;
621 	struct xfs_btree_cur	*bmap_cur;
622 	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
623 	int			error;
624 
625 	error = xrep_bmap_sort_records(rb);
626 	if (error)
627 		return error;
628 
629 	/*
630 	 * Prepare to construct the new fork by initializing the new btree
631 	 * structure and creating a fake ifork in the ifakeroot structure.
632 	 */
633 	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
634 	error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
635 			&oinfo);
636 	if (error)
637 		return error;
638 
639 	rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
640 	rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
641 	rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
642 
643 	/*
644 	 * Allocate a new bmap btree cursor for reloading an inode block mapping
645 	 * data structure.
646 	 */
647 	bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
648 	xfs_btree_stage_ifakeroot(bmap_cur, ifake);
649 
650 	/*
651 	 * Figure out the size and format of the new fork, then fill it with
652 	 * all the bmap records we've found.  Join the inode to the transaction
653 	 * so that we can roll the transaction while holding the inode locked.
654 	 */
655 	if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
656 		ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
657 		error = xrep_bmap_extents_load(rb);
658 	} else {
659 		ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
660 		error = xrep_bmap_btree_load(rb, bmap_cur);
661 	}
662 	if (error)
663 		goto err_cur;
664 
665 	/*
666 	 * Install the new fork in the inode.  After this point the old mapping
667 	 * data are no longer accessible and the new tree is live.  We delete
668 	 * the cursor immediately after committing the staged root because the
669 	 * staged fork might be in extents format.
670 	 */
671 	xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
672 	xfs_btree_del_cursor(bmap_cur, 0);
673 
674 	/* Reset the inode counters now that we've changed the fork. */
675 	error = xrep_bmap_reset_counters(rb);
676 	if (error)
677 		goto err_newbt;
678 
679 	/* Dispose of any unused blocks and the accounting information. */
680 	error = xrep_newbt_commit(&rb->new_bmapbt);
681 	if (error)
682 		return error;
683 
684 	return xrep_roll_trans(sc);
685 
686 err_cur:
687 	if (bmap_cur)
688 		xfs_btree_del_cursor(bmap_cur, error);
689 err_newbt:
690 	xrep_newbt_cancel(&rb->new_bmapbt);
691 	return error;
692 }
693 
694 /*
695  * Now that we've logged the new inode btree, invalidate all of the old blocks
696  * and free them, if there were any.
697  */
698 STATIC int
699 xrep_bmap_remove_old_tree(
700 	struct xrep_bmap	*rb)
701 {
702 	struct xfs_scrub	*sc = rb->sc;
703 	struct xfs_owner_info	oinfo;
704 
705 	/* Free the old bmbt blocks if they're not in use. */
706 	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
707 	return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
708 }
709 
710 /* Check for garbage inputs.  Returns -ECANCELED if there's nothing to do. */
711 STATIC int
712 xrep_bmap_check_inputs(
713 	struct xfs_scrub	*sc,
714 	int			whichfork)
715 {
716 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
717 
718 	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
719 
720 	if (!xfs_has_rmapbt(sc->mp))
721 		return -EOPNOTSUPP;
722 
723 	/* No fork means nothing to rebuild. */
724 	if (!ifp)
725 		return -ECANCELED;
726 
727 	/*
728 	 * We only know how to repair extent mappings, which is to say that we
729 	 * only support extents and btree fork format.  Repairs to a local
730 	 * format fork require a higher level repair function, so we do not
731 	 * have any work to do here.
732 	 */
733 	switch (ifp->if_format) {
734 	case XFS_DINODE_FMT_DEV:
735 	case XFS_DINODE_FMT_LOCAL:
736 	case XFS_DINODE_FMT_UUID:
737 		return -ECANCELED;
738 	case XFS_DINODE_FMT_EXTENTS:
739 	case XFS_DINODE_FMT_BTREE:
740 		break;
741 	default:
742 		return -EFSCORRUPTED;
743 	}
744 
745 	if (whichfork == XFS_ATTR_FORK)
746 		return 0;
747 
748 	/* Only files, symlinks, and directories get to have data forks. */
749 	switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
750 	case S_IFREG:
751 	case S_IFDIR:
752 	case S_IFLNK:
753 		/* ok */
754 		break;
755 	default:
756 		return -EINVAL;
757 	}
758 
759 	/* Don't know how to rebuild realtime data forks. */
760 	if (XFS_IS_REALTIME_INODE(sc->ip))
761 		return -EOPNOTSUPP;
762 
763 	return 0;
764 }
765 
766 /* Set up the initial state of the reflink scan. */
767 static inline enum reflink_scan_state
768 xrep_bmap_init_reflink_scan(
769 	struct xfs_scrub	*sc,
770 	int			whichfork)
771 {
772 	/* cannot share on non-reflink filesystem */
773 	if (!xfs_has_reflink(sc->mp))
774 		return RLS_IRRELEVANT;
775 
776 	/* preserve flag if it's already set */
777 	if (xfs_is_reflink_inode(sc->ip))
778 		return RLS_SET_IFLAG;
779 
780 	/* can only share regular files */
781 	if (!S_ISREG(VFS_I(sc->ip)->i_mode))
782 		return RLS_IRRELEVANT;
783 
784 	/* cannot share attr fork extents */
785 	if (whichfork != XFS_DATA_FORK)
786 		return RLS_IRRELEVANT;
787 
788 	/* cannot share realtime extents */
789 	if (XFS_IS_REALTIME_INODE(sc->ip))
790 		return RLS_IRRELEVANT;
791 
792 	return RLS_UNKNOWN;
793 }
794 
795 /* Repair an inode fork. */
796 int
797 xrep_bmap(
798 	struct xfs_scrub	*sc,
799 	int			whichfork,
800 	bool			allow_unwritten)
801 {
802 	struct xrep_bmap	*rb;
803 	char			*descr;
804 	unsigned int		max_bmbt_recs;
805 	bool			large_extcount;
806 	int			error = 0;
807 
808 	error = xrep_bmap_check_inputs(sc, whichfork);
809 	if (error == -ECANCELED)
810 		return 0;
811 	if (error)
812 		return error;
813 
814 	rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
815 	if (!rb)
816 		return -ENOMEM;
817 	rb->sc = sc;
818 	rb->whichfork = whichfork;
819 	rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
820 	rb->allow_unwritten = allow_unwritten;
821 
822 	/* Set up enough storage to handle the max records for this fork. */
823 	large_extcount = xfs_has_large_extent_counts(sc->mp);
824 	max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
825 	descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
826 			whichfork == XFS_DATA_FORK ? "data" : "attr");
827 	error = xfarray_create(descr, max_bmbt_recs,
828 			sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
829 	kfree(descr);
830 	if (error)
831 		goto out_rb;
832 
833 	/* Collect all reverse mappings for this fork's extents. */
834 	xfsb_bitmap_init(&rb->old_bmbt_blocks);
835 	error = xrep_bmap_find_mappings(rb);
836 	if (error)
837 		goto out_bitmap;
838 
839 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
840 
841 	/* Rebuild the bmap information. */
842 	error = xrep_bmap_build_new_fork(rb);
843 	if (error)
844 		goto out_bitmap;
845 
846 	/* Kill the old tree. */
847 	error = xrep_bmap_remove_old_tree(rb);
848 	if (error)
849 		goto out_bitmap;
850 
851 out_bitmap:
852 	xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
853 	xfarray_destroy(rb->bmap_records);
854 out_rb:
855 	kfree(rb);
856 	return error;
857 }
858 
859 /* Repair an inode's data fork. */
860 int
861 xrep_bmap_data(
862 	struct xfs_scrub	*sc)
863 {
864 	return xrep_bmap(sc, XFS_DATA_FORK, true);
865 }
866 
867 /* Repair an inode's attr fork. */
868 int
869 xrep_bmap_attr(
870 	struct xfs_scrub	*sc)
871 {
872 	return xrep_bmap(sc, XFS_ATTR_FORK, false);
873 }
874