xref: /linux/fs/xfs/scrub/bmap_repair.c (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_inode.h"
20 #include "xfs_inode_fork.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rtalloc.h"
23 #include "xfs_bmap.h"
24 #include "xfs_bmap_util.h"
25 #include "xfs_bmap_btree.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_rtrmap_btree.h"
29 #include "xfs_refcount.h"
30 #include "xfs_quota.h"
31 #include "xfs_ialloc.h"
32 #include "xfs_ag.h"
33 #include "xfs_reflink.h"
34 #include "xfs_rtgroup.h"
35 #include "scrub/xfs_scrub.h"
36 #include "scrub/scrub.h"
37 #include "scrub/common.h"
38 #include "scrub/btree.h"
39 #include "scrub/trace.h"
40 #include "scrub/repair.h"
41 #include "scrub/bitmap.h"
42 #include "scrub/fsb_bitmap.h"
43 #include "scrub/xfile.h"
44 #include "scrub/xfarray.h"
45 #include "scrub/newbt.h"
46 #include "scrub/reap.h"
47 
48 /*
49  * Inode Fork Block Mapping (BMBT) Repair
50  * ======================================
51  *
52  * Gather all the rmap records for the inode and fork we're fixing, reset the
53  * incore fork, then recreate the btree.
54  */
55 
56 enum reflink_scan_state {
57 	RLS_IRRELEVANT = -1,	/* not applicable to this file */
58 	RLS_UNKNOWN,		/* shared extent scans required */
59 	RLS_SET_IFLAG,		/* iflag must be set */
60 };
61 
62 struct xrep_bmap {
63 	/* Old bmbt blocks */
64 	struct xfsb_bitmap	old_bmbt_blocks;
65 
66 	/* New fork. */
67 	struct xrep_newbt	new_bmapbt;
68 
69 	/* List of new bmap records. */
70 	struct xfarray		*bmap_records;
71 
72 	struct xfs_scrub	*sc;
73 
74 	/* How many blocks did we find allocated to this file? */
75 	xfs_rfsblock_t		nblocks;
76 
77 	/* How many bmbt blocks did we find for this fork? */
78 	xfs_rfsblock_t		old_bmbt_block_count;
79 
80 	/* get_records()'s position in the free space record array. */
81 	xfarray_idx_t		array_cur;
82 
83 	/* How many real (non-hole, non-delalloc) mappings do we have? */
84 	uint64_t		real_mappings;
85 
86 	/* Which fork are we fixing? */
87 	int			whichfork;
88 
89 	/* What d the REFLINK flag be set when the repair is over? */
90 	enum reflink_scan_state	reflink_scan;
91 
92 	/* Do we allow unwritten extents? */
93 	bool			allow_unwritten;
94 };
95 
96 /* Is this space extent shared?  Flag the inode if it is. */
97 STATIC int
xrep_bmap_discover_shared(struct xrep_bmap * rb,xfs_fsblock_t startblock,xfs_filblks_t blockcount)98 xrep_bmap_discover_shared(
99 	struct xrep_bmap	*rb,
100 	xfs_fsblock_t		startblock,
101 	xfs_filblks_t		blockcount)
102 {
103 	struct xfs_scrub	*sc = rb->sc;
104 	struct xfs_btree_cur	*cur;
105 	xfs_agblock_t		agbno;
106 	xfs_agblock_t		fbno;
107 	xfs_extlen_t		flen;
108 	int			error;
109 
110 	if (XFS_IS_REALTIME_INODE(sc->ip)) {
111 		agbno = xfs_rtb_to_rgbno(sc->mp, startblock);
112 		cur = sc->sr.refc_cur;
113 	} else {
114 		agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
115 		cur = sc->sa.refc_cur;
116 	}
117 	error = xfs_refcount_find_shared(cur, agbno, blockcount, &fbno, &flen,
118 			false);
119 	if (error)
120 		return error;
121 
122 	if (fbno != NULLAGBLOCK)
123 		rb->reflink_scan = RLS_SET_IFLAG;
124 
125 	return 0;
126 }
127 
128 /* Remember this reverse-mapping as a series of bmap records. */
129 STATIC int
xrep_bmap_from_rmap(struct xrep_bmap * rb,xfs_fileoff_t startoff,xfs_fsblock_t startblock,xfs_filblks_t blockcount,bool unwritten)130 xrep_bmap_from_rmap(
131 	struct xrep_bmap	*rb,
132 	xfs_fileoff_t		startoff,
133 	xfs_fsblock_t		startblock,
134 	xfs_filblks_t		blockcount,
135 	bool			unwritten)
136 {
137 	struct xfs_bmbt_irec	irec = {
138 		.br_startoff	= startoff,
139 		.br_startblock	= startblock,
140 		.br_state	= unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
141 	};
142 	struct xfs_bmbt_rec	rbe;
143 	struct xfs_scrub	*sc = rb->sc;
144 	int			error = 0;
145 
146 	/*
147 	 * If we're repairing the data fork of a non-reflinked regular file on
148 	 * a reflink filesystem, we need to figure out if this space extent is
149 	 * shared.
150 	 */
151 	if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
152 		error = xrep_bmap_discover_shared(rb, startblock, blockcount);
153 		if (error)
154 			return error;
155 	}
156 
157 	do {
158 		xfs_failaddr_t	fa;
159 
160 		irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
161 				XFS_MAX_BMBT_EXTLEN);
162 
163 		fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
164 		if (fa)
165 			return -EFSCORRUPTED;
166 
167 		xfs_bmbt_disk_set_all(&rbe, &irec);
168 
169 		trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
170 
171 		if (xchk_should_terminate(sc, &error))
172 			return error;
173 
174 		error = xfarray_append(rb->bmap_records, &rbe);
175 		if (error)
176 			return error;
177 
178 		rb->real_mappings++;
179 
180 		irec.br_startblock += irec.br_blockcount;
181 		irec.br_startoff += irec.br_blockcount;
182 		blockcount -= irec.br_blockcount;
183 	} while (blockcount > 0);
184 
185 	return 0;
186 }
187 
188 /* Check for any obvious errors or conflicts in the file mapping. */
189 STATIC int
xrep_bmap_check_fork_rmap(struct xrep_bmap * rb,struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec)190 xrep_bmap_check_fork_rmap(
191 	struct xrep_bmap		*rb,
192 	struct xfs_btree_cur		*cur,
193 	const struct xfs_rmap_irec	*rec)
194 {
195 	struct xfs_scrub		*sc = rb->sc;
196 	enum xbtree_recpacking		outcome;
197 	int				error;
198 
199 	/*
200 	 * Data extents for rt files are never stored on the data device, but
201 	 * everything else (xattrs, bmbt blocks) can be.
202 	 */
203 	if (XFS_IS_REALTIME_INODE(sc->ip) &&
204 	    !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
205 		return -EFSCORRUPTED;
206 
207 	/* Check that this is within the AG. */
208 	if (!xfs_verify_agbext(to_perag(cur->bc_group), rec->rm_startblock,
209 				rec->rm_blockcount))
210 		return -EFSCORRUPTED;
211 
212 	/* Check the file offset range. */
213 	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
214 	    !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
215 		return -EFSCORRUPTED;
216 
217 	/* No contradictory flags. */
218 	if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
219 	    (rec->rm_flags & XFS_RMAP_UNWRITTEN))
220 		return -EFSCORRUPTED;
221 
222 	/* Make sure this isn't free space. */
223 	error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
224 			rec->rm_blockcount, &outcome);
225 	if (error)
226 		return error;
227 	if (outcome != XBTREE_RECPACKING_EMPTY)
228 		return -EFSCORRUPTED;
229 
230 	/* Must not be an inode chunk. */
231 	error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
232 			rec->rm_startblock, rec->rm_blockcount, &outcome);
233 	if (error)
234 		return error;
235 	if (outcome != XBTREE_RECPACKING_EMPTY)
236 		return -EFSCORRUPTED;
237 
238 	return 0;
239 }
240 
241 /* Record extents that belong to this inode's fork. */
242 STATIC int
xrep_bmap_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)243 xrep_bmap_walk_rmap(
244 	struct xfs_btree_cur		*cur,
245 	const struct xfs_rmap_irec	*rec,
246 	void				*priv)
247 {
248 	struct xrep_bmap		*rb = priv;
249 	xfs_fsblock_t			fsbno;
250 	int				error = 0;
251 
252 	if (xchk_should_terminate(rb->sc, &error))
253 		return error;
254 
255 	if (rec->rm_owner != rb->sc->ip->i_ino)
256 		return 0;
257 
258 	error = xrep_bmap_check_fork_rmap(rb, cur, rec);
259 	if (error)
260 		return error;
261 
262 	/*
263 	 * Record all blocks allocated to this file even if the extent isn't
264 	 * for the fork we're rebuilding so that we can reset di_nblocks later.
265 	 */
266 	rb->nblocks += rec->rm_blockcount;
267 
268 	/* If this rmap isn't for the fork we want, we're done. */
269 	if (rb->whichfork == XFS_DATA_FORK &&
270 	    (rec->rm_flags & XFS_RMAP_ATTR_FORK))
271 		return 0;
272 	if (rb->whichfork == XFS_ATTR_FORK &&
273 	    !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
274 		return 0;
275 
276 	/* Reject unwritten extents if we don't allow those. */
277 	if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
278 		return -EFSCORRUPTED;
279 
280 	fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), rec->rm_startblock);
281 
282 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
283 		rb->old_bmbt_block_count += rec->rm_blockcount;
284 		return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
285 				rec->rm_blockcount);
286 	}
287 
288 	return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
289 			rec->rm_blockcount,
290 			rec->rm_flags & XFS_RMAP_UNWRITTEN);
291 }
292 
293 /*
294  * Compare two block mapping records.  We want to sort in order of increasing
295  * file offset.
296  */
297 static int
xrep_bmap_extent_cmp(const void * a,const void * b)298 xrep_bmap_extent_cmp(
299 	const void			*a,
300 	const void			*b)
301 {
302 	const struct xfs_bmbt_rec	*ba = a;
303 	const struct xfs_bmbt_rec	*bb = b;
304 	xfs_fileoff_t			ao = xfs_bmbt_disk_get_startoff(ba);
305 	xfs_fileoff_t			bo = xfs_bmbt_disk_get_startoff(bb);
306 
307 	if (ao > bo)
308 		return 1;
309 	else if (ao < bo)
310 		return -1;
311 	return 0;
312 }
313 
314 /*
315  * Sort the bmap extents by fork offset or else the records will be in the
316  * wrong order.  Ensure there are no overlaps in the file offset ranges.
317  */
318 STATIC int
xrep_bmap_sort_records(struct xrep_bmap * rb)319 xrep_bmap_sort_records(
320 	struct xrep_bmap	*rb)
321 {
322 	struct xfs_bmbt_irec	irec;
323 	xfs_fileoff_t		next_off = 0;
324 	xfarray_idx_t		array_cur;
325 	int			error;
326 
327 	error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
328 			XFARRAY_SORT_KILLABLE);
329 	if (error)
330 		return error;
331 
332 	foreach_xfarray_idx(rb->bmap_records, array_cur) {
333 		struct xfs_bmbt_rec	rec;
334 
335 		if (xchk_should_terminate(rb->sc, &error))
336 			return error;
337 
338 		error = xfarray_load(rb->bmap_records, array_cur, &rec);
339 		if (error)
340 			return error;
341 
342 		xfs_bmbt_disk_get_all(&rec, &irec);
343 
344 		if (irec.br_startoff < next_off)
345 			return -EFSCORRUPTED;
346 
347 		next_off = irec.br_startoff + irec.br_blockcount;
348 	}
349 
350 	return 0;
351 }
352 
353 /* Scan one AG for reverse mappings that we can turn into extent maps. */
354 STATIC int
xrep_bmap_scan_ag(struct xrep_bmap * rb,struct xfs_perag * pag)355 xrep_bmap_scan_ag(
356 	struct xrep_bmap	*rb,
357 	struct xfs_perag	*pag)
358 {
359 	struct xfs_scrub	*sc = rb->sc;
360 	int			error;
361 
362 	error = xrep_ag_init(sc, pag, &sc->sa);
363 	if (error)
364 		return error;
365 
366 	error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
367 	xchk_ag_free(sc, &sc->sa);
368 	return error;
369 }
370 
371 #ifdef CONFIG_XFS_RT
372 /* Check for any obvious errors or conflicts in the file mapping. */
373 STATIC int
xrep_bmap_check_rtfork_rmap(struct xfs_scrub * sc,struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec)374 xrep_bmap_check_rtfork_rmap(
375 	struct xfs_scrub		*sc,
376 	struct xfs_btree_cur		*cur,
377 	const struct xfs_rmap_irec	*rec)
378 {
379 	/* xattr extents are never stored on realtime devices */
380 	if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
381 		return -EFSCORRUPTED;
382 
383 	/* bmbt blocks are never stored on realtime devices */
384 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
385 		return -EFSCORRUPTED;
386 
387 	/* Data extents for non-rt files are never stored on the rt device. */
388 	if (!XFS_IS_REALTIME_INODE(sc->ip))
389 		return -EFSCORRUPTED;
390 
391 	/* Check the file offsets and physical extents. */
392 	if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
393 		return -EFSCORRUPTED;
394 
395 	/* Check that this is within the rtgroup. */
396 	if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock,
397 				rec->rm_blockcount))
398 		return -EFSCORRUPTED;
399 
400 	/* Make sure this isn't free space. */
401 	return xrep_require_rtext_inuse(sc, rec->rm_startblock,
402 			rec->rm_blockcount);
403 }
404 
405 /* Record realtime extents that belong to this inode's fork. */
406 STATIC int
xrep_bmap_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)407 xrep_bmap_walk_rtrmap(
408 	struct xfs_btree_cur		*cur,
409 	const struct xfs_rmap_irec	*rec,
410 	void				*priv)
411 {
412 	struct xrep_bmap		*rb = priv;
413 	int				error = 0;
414 
415 	if (xchk_should_terminate(rb->sc, &error))
416 		return error;
417 
418 	/* Skip extents which are not owned by this inode and fork. */
419 	if (rec->rm_owner != rb->sc->ip->i_ino)
420 		return 0;
421 
422 	error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec);
423 	if (error)
424 		return error;
425 
426 	/*
427 	 * Record all blocks allocated to this file even if the extent isn't
428 	 * for the fork we're rebuilding so that we can reset di_nblocks later.
429 	 */
430 	rb->nblocks += rec->rm_blockcount;
431 
432 	/* If this rmap isn't for the fork we want, we're done. */
433 	if (rb->whichfork == XFS_DATA_FORK &&
434 	    (rec->rm_flags & XFS_RMAP_ATTR_FORK))
435 		return 0;
436 	if (rb->whichfork == XFS_ATTR_FORK &&
437 	    !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
438 		return 0;
439 
440 	return xrep_bmap_from_rmap(rb, rec->rm_offset,
441 			xfs_rgbno_to_rtb(to_rtg(cur->bc_group),
442 				rec->rm_startblock),
443 			rec->rm_blockcount,
444 			rec->rm_flags & XFS_RMAP_UNWRITTEN);
445 }
446 
447 /* Scan the realtime reverse mappings to build the new extent map. */
448 STATIC int
xrep_bmap_scan_rtgroup(struct xrep_bmap * rb,struct xfs_rtgroup * rtg)449 xrep_bmap_scan_rtgroup(
450 	struct xrep_bmap	*rb,
451 	struct xfs_rtgroup	*rtg)
452 {
453 	struct xfs_scrub	*sc = rb->sc;
454 	int			error;
455 
456 	if (!xfs_has_rtrmapbt(sc->mp))
457 		return 0;
458 
459 	error = xrep_rtgroup_init(sc, rtg, &sc->sr,
460 			XFS_RTGLOCK_RMAP |
461 			XFS_RTGLOCK_REFCOUNT |
462 			XFS_RTGLOCK_BITMAP_SHARED);
463 	if (error)
464 		return error;
465 
466 	error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
467 	xchk_rtgroup_btcur_free(&sc->sr);
468 	xchk_rtgroup_free(sc, &sc->sr);
469 	return error;
470 }
471 #else
472 static inline int
xrep_bmap_scan_rtgroup(struct xrep_bmap * rb,struct xfs_rtgroup * rtg)473 xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg)
474 {
475 	return -EFSCORRUPTED;
476 }
477 #endif
478 
479 /* Find the delalloc extents from the old incore extent tree. */
480 STATIC int
xrep_bmap_find_delalloc(struct xrep_bmap * rb)481 xrep_bmap_find_delalloc(
482 	struct xrep_bmap	*rb)
483 {
484 	struct xfs_bmbt_irec	irec;
485 	struct xfs_iext_cursor	icur;
486 	struct xfs_bmbt_rec	rbe;
487 	struct xfs_inode	*ip = rb->sc->ip;
488 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, rb->whichfork);
489 	int			error = 0;
490 
491 	/*
492 	 * Skip this scan if we don't expect to find delayed allocation
493 	 * reservations in this fork.
494 	 */
495 	if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
496 		return 0;
497 
498 	for_each_xfs_iext(ifp, &icur, &irec) {
499 		if (!isnullstartblock(irec.br_startblock))
500 			continue;
501 
502 		xfs_bmbt_disk_set_all(&rbe, &irec);
503 
504 		trace_xrep_bmap_found(ip, rb->whichfork, &irec);
505 
506 		if (xchk_should_terminate(rb->sc, &error))
507 			return error;
508 
509 		error = xfarray_append(rb->bmap_records, &rbe);
510 		if (error)
511 			return error;
512 	}
513 
514 	return 0;
515 }
516 
517 /*
518  * Collect block mappings for this fork of this inode and decide if we have
519  * enough space to rebuild.  Caller is responsible for cleaning up the list if
520  * anything goes wrong.
521  */
522 STATIC int
xrep_bmap_find_mappings(struct xrep_bmap * rb)523 xrep_bmap_find_mappings(
524 	struct xrep_bmap	*rb)
525 {
526 	struct xfs_scrub	*sc = rb->sc;
527 	struct xfs_perag	*pag = NULL;
528 	int			error = 0;
529 
530 	/*
531 	 * Iterate the rtrmaps for extents.  Metadata files never have content
532 	 * on the realtime device, so there's no need to scan them.
533 	 */
534 	if (!xfs_is_metadir_inode(sc->ip)) {
535 		struct xfs_rtgroup	*rtg = NULL;
536 
537 		while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
538 			error = xrep_bmap_scan_rtgroup(rb, rtg);
539 			if (error) {
540 				xfs_rtgroup_rele(rtg);
541 				return error;
542 			}
543 		}
544 	}
545 
546 	/* Iterate the rmaps for extents. */
547 	while ((pag = xfs_perag_next(sc->mp, pag))) {
548 		error = xrep_bmap_scan_ag(rb, pag);
549 		if (error) {
550 			xfs_perag_rele(pag);
551 			return error;
552 		}
553 	}
554 
555 	return xrep_bmap_find_delalloc(rb);
556 }
557 
558 /* Retrieve real extent mappings for bulk loading the bmap btree. */
559 STATIC int
xrep_bmap_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)560 xrep_bmap_get_records(
561 	struct xfs_btree_cur	*cur,
562 	unsigned int		idx,
563 	struct xfs_btree_block	*block,
564 	unsigned int		nr_wanted,
565 	void			*priv)
566 {
567 	struct xfs_bmbt_rec	rec;
568 	struct xfs_bmbt_irec	*irec = &cur->bc_rec.b;
569 	struct xrep_bmap	*rb = priv;
570 	union xfs_btree_rec	*block_rec;
571 	unsigned int		loaded;
572 	int			error;
573 
574 	for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
575 		do {
576 			error = xfarray_load(rb->bmap_records, rb->array_cur++,
577 					&rec);
578 			if (error)
579 				return error;
580 
581 			xfs_bmbt_disk_get_all(&rec, irec);
582 		} while (isnullstartblock(irec->br_startblock));
583 
584 		block_rec = xfs_btree_rec_addr(cur, idx, block);
585 		cur->bc_ops->init_rec_from_cur(cur, block_rec);
586 	}
587 
588 	return loaded;
589 }
590 
591 /* Feed one of the new btree blocks to the bulk loader. */
592 STATIC int
xrep_bmap_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)593 xrep_bmap_claim_block(
594 	struct xfs_btree_cur	*cur,
595 	union xfs_btree_ptr	*ptr,
596 	void			*priv)
597 {
598 	struct xrep_bmap        *rb = priv;
599 
600 	return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
601 }
602 
603 /* Figure out how much space we need to create the incore btree root block. */
604 STATIC size_t
xrep_bmap_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)605 xrep_bmap_iroot_size(
606 	struct xfs_btree_cur	*cur,
607 	unsigned int		level,
608 	unsigned int		nr_this_level,
609 	void			*priv)
610 {
611 	ASSERT(level > 0);
612 
613 	return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level);
614 }
615 
616 /* Update the inode counters. */
617 STATIC int
xrep_bmap_reset_counters(struct xrep_bmap * rb)618 xrep_bmap_reset_counters(
619 	struct xrep_bmap	*rb)
620 {
621 	struct xfs_scrub	*sc = rb->sc;
622 	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
623 	int64_t			delta;
624 
625 	if (rb->reflink_scan == RLS_SET_IFLAG)
626 		sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
627 
628 	/*
629 	 * Update the inode block counts to reflect the extents we found in the
630 	 * rmapbt.
631 	 */
632 	delta = ifake->if_blocks - rb->old_bmbt_block_count;
633 	sc->ip->i_nblocks = rb->nblocks + delta;
634 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
635 
636 	/*
637 	 * Adjust the quota counts by the difference in size between the old
638 	 * and new bmbt.
639 	 */
640 	xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
641 	return 0;
642 }
643 
644 /*
645  * Create a new iext tree and load it with block mappings.  If the inode is
646  * in extents format, that's all we need to do to commit the new mappings.
647  * If it is in btree format, this takes care of preloading the incore tree.
648  */
649 STATIC int
xrep_bmap_extents_load(struct xrep_bmap * rb)650 xrep_bmap_extents_load(
651 	struct xrep_bmap	*rb)
652 {
653 	struct xfs_iext_cursor	icur;
654 	struct xfs_bmbt_irec	irec;
655 	struct xfs_ifork	*ifp = rb->new_bmapbt.ifake.if_fork;
656 	xfarray_idx_t		array_cur;
657 	int			error;
658 
659 	ASSERT(ifp->if_bytes == 0);
660 
661 	/* Add all the mappings (incl. delalloc) to the incore extent tree. */
662 	xfs_iext_first(ifp, &icur);
663 	foreach_xfarray_idx(rb->bmap_records, array_cur) {
664 		struct xfs_bmbt_rec	rec;
665 
666 		error = xfarray_load(rb->bmap_records, array_cur, &rec);
667 		if (error)
668 			return error;
669 
670 		xfs_bmbt_disk_get_all(&rec, &irec);
671 
672 		xfs_iext_insert_raw(ifp, &icur, &irec);
673 		if (!isnullstartblock(irec.br_startblock))
674 			ifp->if_nextents++;
675 
676 		xfs_iext_next(ifp, &icur);
677 	}
678 
679 	return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
680 			ifp->if_nextents);
681 }
682 
683 /*
684  * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
685  * and load the incore extent tree.
686  */
687 STATIC int
xrep_bmap_btree_load(struct xrep_bmap * rb,struct xfs_btree_cur * bmap_cur)688 xrep_bmap_btree_load(
689 	struct xrep_bmap	*rb,
690 	struct xfs_btree_cur	*bmap_cur)
691 {
692 	struct xfs_scrub	*sc = rb->sc;
693 	int			error;
694 
695 	/* Compute how many blocks we'll need. */
696 	error = xfs_btree_bload_compute_geometry(bmap_cur,
697 			&rb->new_bmapbt.bload, rb->real_mappings);
698 	if (error)
699 		return error;
700 
701 	/* Last chance to abort before we start committing fixes. */
702 	if (xchk_should_terminate(sc, &error))
703 		return error;
704 
705 	/*
706 	 * Guess how many blocks we're going to need to rebuild an entire bmap
707 	 * from the number of extents we found, and pump up our transaction to
708 	 * have sufficient block reservation.  We're allowed to exceed file
709 	 * quota to repair inconsistent metadata.
710 	 */
711 	error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
712 			rb->new_bmapbt.bload.nr_blocks, 0, true);
713 	if (error)
714 		return error;
715 
716 	/* Reserve the space we'll need for the new btree. */
717 	error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
718 			rb->new_bmapbt.bload.nr_blocks);
719 	if (error)
720 		return error;
721 
722 	/* Add all observed bmap records. */
723 	rb->array_cur = XFARRAY_CURSOR_INIT;
724 	error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
725 	if (error)
726 		return error;
727 
728 	/*
729 	 * Load the new bmap records into the new incore extent tree to
730 	 * preserve delalloc reservations for regular files.  The directory
731 	 * code loads the extent tree during xfs_dir_open and assumes
732 	 * thereafter that it remains loaded, so we must not violate that
733 	 * assumption.
734 	 */
735 	return xrep_bmap_extents_load(rb);
736 }
737 
738 /*
739  * Use the collected bmap information to stage a new bmap fork.  If this is
740  * successful we'll return with the new fork information logged to the repair
741  * transaction but not yet committed.  The caller must ensure that the inode
742  * is joined to the transaction; the inode will be joined to a clean
743  * transaction when the function returns.
744  */
745 STATIC int
xrep_bmap_build_new_fork(struct xrep_bmap * rb)746 xrep_bmap_build_new_fork(
747 	struct xrep_bmap	*rb)
748 {
749 	struct xfs_owner_info	oinfo;
750 	struct xfs_scrub	*sc = rb->sc;
751 	struct xfs_btree_cur	*bmap_cur;
752 	struct xbtree_ifakeroot	*ifake = &rb->new_bmapbt.ifake;
753 	int			error;
754 
755 	error = xrep_bmap_sort_records(rb);
756 	if (error)
757 		return error;
758 
759 	/*
760 	 * Prepare to construct the new fork by initializing the new btree
761 	 * structure and creating a fake ifork in the ifakeroot structure.
762 	 */
763 	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
764 	error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
765 			&oinfo);
766 	if (error)
767 		return error;
768 
769 	rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
770 	rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
771 	rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
772 
773 	/*
774 	 * Allocate a new bmap btree cursor for reloading an inode block mapping
775 	 * data structure.
776 	 */
777 	bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
778 	xfs_btree_stage_ifakeroot(bmap_cur, ifake);
779 
780 	/*
781 	 * Figure out the size and format of the new fork, then fill it with
782 	 * all the bmap records we've found.  Join the inode to the transaction
783 	 * so that we can roll the transaction while holding the inode locked.
784 	 */
785 	if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
786 		ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
787 		error = xrep_bmap_extents_load(rb);
788 	} else {
789 		ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
790 		error = xrep_bmap_btree_load(rb, bmap_cur);
791 	}
792 	if (error)
793 		goto err_cur;
794 
795 	/*
796 	 * Install the new fork in the inode.  After this point the old mapping
797 	 * data are no longer accessible and the new tree is live.  We delete
798 	 * the cursor immediately after committing the staged root because the
799 	 * staged fork might be in extents format.
800 	 */
801 	xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
802 	xfs_btree_del_cursor(bmap_cur, 0);
803 
804 	/* Reset the inode counters now that we've changed the fork. */
805 	error = xrep_bmap_reset_counters(rb);
806 	if (error)
807 		goto err_newbt;
808 
809 	/* Dispose of any unused blocks and the accounting information. */
810 	error = xrep_newbt_commit(&rb->new_bmapbt);
811 	if (error)
812 		return error;
813 
814 	return xrep_roll_trans(sc);
815 
816 err_cur:
817 	if (bmap_cur)
818 		xfs_btree_del_cursor(bmap_cur, error);
819 err_newbt:
820 	xrep_newbt_cancel(&rb->new_bmapbt);
821 	return error;
822 }
823 
824 /*
825  * Now that we've logged the new inode btree, invalidate all of the old blocks
826  * and free them, if there were any.
827  */
828 STATIC int
xrep_bmap_remove_old_tree(struct xrep_bmap * rb)829 xrep_bmap_remove_old_tree(
830 	struct xrep_bmap	*rb)
831 {
832 	struct xfs_scrub	*sc = rb->sc;
833 	struct xfs_owner_info	oinfo;
834 
835 	/* Free the old bmbt blocks if they're not in use. */
836 	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
837 	return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
838 }
839 
840 /* Check for garbage inputs.  Returns -ECANCELED if there's nothing to do. */
841 STATIC int
xrep_bmap_check_inputs(struct xfs_scrub * sc,int whichfork)842 xrep_bmap_check_inputs(
843 	struct xfs_scrub	*sc,
844 	int			whichfork)
845 {
846 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
847 
848 	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
849 
850 	if (!xfs_has_rmapbt(sc->mp))
851 		return -EOPNOTSUPP;
852 
853 	/* No fork means nothing to rebuild. */
854 	if (!ifp)
855 		return -ECANCELED;
856 
857 	/*
858 	 * We only know how to repair extent mappings, which is to say that we
859 	 * only support extents and btree fork format.  Repairs to a local
860 	 * format fork require a higher level repair function, so we do not
861 	 * have any work to do here.
862 	 */
863 	switch (ifp->if_format) {
864 	case XFS_DINODE_FMT_DEV:
865 	case XFS_DINODE_FMT_LOCAL:
866 	case XFS_DINODE_FMT_UUID:
867 	case XFS_DINODE_FMT_META_BTREE:
868 		return -ECANCELED;
869 	case XFS_DINODE_FMT_EXTENTS:
870 	case XFS_DINODE_FMT_BTREE:
871 		break;
872 	default:
873 		return -EFSCORRUPTED;
874 	}
875 
876 	if (whichfork == XFS_ATTR_FORK)
877 		return 0;
878 
879 	/* Only files, symlinks, and directories get to have data forks. */
880 	switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
881 	case S_IFREG:
882 	case S_IFDIR:
883 	case S_IFLNK:
884 		/* ok */
885 		break;
886 	default:
887 		return -EINVAL;
888 	}
889 
890 	return 0;
891 }
892 
893 /* Set up the initial state of the reflink scan. */
894 static inline enum reflink_scan_state
xrep_bmap_init_reflink_scan(struct xfs_scrub * sc,int whichfork)895 xrep_bmap_init_reflink_scan(
896 	struct xfs_scrub	*sc,
897 	int			whichfork)
898 {
899 	/* cannot share on non-reflink filesystem */
900 	if (!xfs_has_reflink(sc->mp))
901 		return RLS_IRRELEVANT;
902 
903 	/* preserve flag if it's already set */
904 	if (xfs_is_reflink_inode(sc->ip))
905 		return RLS_SET_IFLAG;
906 
907 	/* can only share regular files */
908 	if (!S_ISREG(VFS_I(sc->ip)->i_mode))
909 		return RLS_IRRELEVANT;
910 
911 	/* cannot share attr fork extents */
912 	if (whichfork != XFS_DATA_FORK)
913 		return RLS_IRRELEVANT;
914 
915 	return RLS_UNKNOWN;
916 }
917 
918 /* Repair an inode fork. */
919 int
xrep_bmap(struct xfs_scrub * sc,int whichfork,bool allow_unwritten)920 xrep_bmap(
921 	struct xfs_scrub	*sc,
922 	int			whichfork,
923 	bool			allow_unwritten)
924 {
925 	struct xrep_bmap	*rb;
926 	char			*descr;
927 	xfs_extnum_t		max_bmbt_recs;
928 	bool			large_extcount;
929 	int			error = 0;
930 
931 	error = xrep_bmap_check_inputs(sc, whichfork);
932 	if (error == -ECANCELED)
933 		return 0;
934 	if (error)
935 		return error;
936 
937 	rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
938 	if (!rb)
939 		return -ENOMEM;
940 	rb->sc = sc;
941 	rb->whichfork = whichfork;
942 	rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
943 	rb->allow_unwritten = allow_unwritten;
944 
945 	/* Set up enough storage to handle the max records for this fork. */
946 	large_extcount = xfs_has_large_extent_counts(sc->mp);
947 	max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
948 	descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
949 			whichfork == XFS_DATA_FORK ? "data" : "attr");
950 	error = xfarray_create(descr, max_bmbt_recs,
951 			sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
952 	kfree(descr);
953 	if (error)
954 		goto out_rb;
955 
956 	/* Collect all reverse mappings for this fork's extents. */
957 	xfsb_bitmap_init(&rb->old_bmbt_blocks);
958 	error = xrep_bmap_find_mappings(rb);
959 	if (error)
960 		goto out_bitmap;
961 
962 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
963 
964 	/* Rebuild the bmap information. */
965 	error = xrep_bmap_build_new_fork(rb);
966 	if (error)
967 		goto out_bitmap;
968 
969 	/* Kill the old tree. */
970 	error = xrep_bmap_remove_old_tree(rb);
971 	if (error)
972 		goto out_bitmap;
973 
974 out_bitmap:
975 	xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
976 	xfarray_destroy(rb->bmap_records);
977 out_rb:
978 	kfree(rb);
979 	return error;
980 }
981 
982 /* Repair an inode's data fork. */
983 int
xrep_bmap_data(struct xfs_scrub * sc)984 xrep_bmap_data(
985 	struct xfs_scrub	*sc)
986 {
987 	return xrep_bmap(sc, XFS_DATA_FORK, true);
988 }
989 
990 /* Repair an inode's attr fork. */
991 int
xrep_bmap_attr(struct xfs_scrub * sc)992 xrep_bmap_attr(
993 	struct xfs_scrub	*sc)
994 {
995 	return xrep_bmap(sc, XFS_ATTR_FORK, false);
996 }
997