xref: /linux/fs/xfs/scrub/cow_repair.c (revision 6a5358410af387fd48251a5e5cc4cf73dc16de52)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_alloc.h"
19 #include "xfs_bmap.h"
20 #include "xfs_rmap.h"
21 #include "xfs_refcount.h"
22 #include "xfs_quota.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ag.h"
25 #include "xfs_error.h"
26 #include "xfs_errortag.h"
27 #include "xfs_icache.h"
28 #include "xfs_refcount_btree.h"
29 #include "xfs_rtalloc.h"
30 #include "xfs_rtbitmap.h"
31 #include "xfs_rtgroup.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/bitmap.h"
38 #include "scrub/off_bitmap.h"
39 #include "scrub/fsb_bitmap.h"
40 #include "scrub/rtb_bitmap.h"
41 #include "scrub/reap.h"
42 
43 /*
44  * CoW Fork Mapping Repair
45  * =======================
46  *
47  * Although CoW staging extents are owned by incore CoW inode forks, on disk
48  * they are owned by the refcount btree.  The ondisk metadata does not record
49  * any ownership information, which limits what we can do to repair the
50  * mappings in the CoW fork.  At most, we can replace ifork mappings that lack
51  * an entry in the refcount btree or are described by a reverse mapping record
52  * whose owner is not OWN_COW.
53  *
54  * Replacing extents is also tricky -- we can't touch written CoW fork extents
55  * since they are undergoing writeback, and delalloc extents do not require
56  * repair since they only exist incore.  Hence the most we can do is find the
57  * bad parts of unwritten mappings, allocate a replacement set of blocks, and
58  * replace the incore mapping.  We use the regular reaping process to unmap
59  * or free the discarded blocks, as appropriate.
60  */
61 struct xrep_cow {
62 	struct xfs_scrub	*sc;
63 
64 	/* Bitmap of file offset ranges that need replacing. */
65 	struct xoff_bitmap	bad_fileoffs;
66 
67 	/* Bitmap of fsblocks that were removed from the CoW fork. */
68 	union {
69 		struct xfsb_bitmap	old_cowfork_fsblocks;
70 		struct xrtb_bitmap	old_cowfork_rtblocks;
71 	};
72 
73 	/* CoW fork mappings used to scan for bad CoW staging extents. */
74 	struct xfs_bmbt_irec	irec;
75 
76 	/* refcount btree block number of irec.br_startblock */
77 	unsigned int		irec_startbno;
78 
79 	/* refcount btree block number of the next refcount record we expect */
80 	unsigned int		next_bno;
81 };
82 
83 /* CoW staging extent. */
84 struct xrep_cow_extent {
85 	xfs_fsblock_t		fsbno;
86 	xfs_extlen_t		len;
87 };
88 
89 /*
90  * Mark the part of the file range that corresponds to the given physical
91  * space.  Caller must ensure that the physical range is within xc->irec.
92  */
93 STATIC int
xrep_cow_mark_file_range(struct xrep_cow * xc,xfs_fsblock_t startblock,xfs_filblks_t blockcount)94 xrep_cow_mark_file_range(
95 	struct xrep_cow		*xc,
96 	xfs_fsblock_t		startblock,
97 	xfs_filblks_t		blockcount)
98 {
99 	xfs_fileoff_t		startoff;
100 
101 	startoff = xc->irec.br_startoff +
102 				(startblock - xc->irec.br_startblock);
103 
104 	trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
105 			blockcount);
106 
107 	return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
108 }
109 
110 /*
111  * Trim @src to fit within the CoW fork mapping being examined, and put the
112  * result in @dst.
113  */
114 static inline void
xrep_cow_trim_refcount(struct xrep_cow * xc,struct xfs_refcount_irec * dst,const struct xfs_refcount_irec * src)115 xrep_cow_trim_refcount(
116 	struct xrep_cow			*xc,
117 	struct xfs_refcount_irec	*dst,
118 	const struct xfs_refcount_irec	*src)
119 {
120 	unsigned int			adj;
121 
122 	memcpy(dst, src, sizeof(*dst));
123 
124 	if (dst->rc_startblock < xc->irec_startbno) {
125 		adj = xc->irec_startbno - dst->rc_startblock;
126 		dst->rc_blockcount -= adj;
127 		dst->rc_startblock += adj;
128 	}
129 
130 	if (dst->rc_startblock + dst->rc_blockcount >
131 	    xc->irec_startbno + xc->irec.br_blockcount) {
132 		adj = (dst->rc_startblock + dst->rc_blockcount) -
133 		      (xc->irec_startbno + xc->irec.br_blockcount);
134 		dst->rc_blockcount -= adj;
135 	}
136 }
137 
138 /* Mark any shared CoW staging extents. */
139 STATIC int
xrep_cow_mark_shared_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)140 xrep_cow_mark_shared_staging(
141 	struct xfs_btree_cur		*cur,
142 	const struct xfs_refcount_irec	*rec,
143 	void				*priv)
144 {
145 	struct xrep_cow			*xc = priv;
146 	struct xfs_refcount_irec	rrec;
147 
148 	if (!xfs_refcount_check_domain(rec) ||
149 	    rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
150 		return -EFSCORRUPTED;
151 
152 	xrep_cow_trim_refcount(xc, &rrec, rec);
153 
154 	return xrep_cow_mark_file_range(xc,
155 			xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock),
156 			rrec.rc_blockcount);
157 }
158 
159 /*
160  * Mark any portion of the CoW fork file offset range where there is not a CoW
161  * staging extent record in the refcountbt, and keep a record of where we did
162  * find correct refcountbt records.  Staging records are always cleaned out at
163  * mount time, so any two inodes trying to map the same staging area would have
164  * already taken the fs down due to refcount btree verifier errors.  Hence this
165  * inode should be the sole creator of the staging extent records ondisk.
166  */
167 STATIC int
xrep_cow_mark_missing_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)168 xrep_cow_mark_missing_staging(
169 	struct xfs_btree_cur		*cur,
170 	const struct xfs_refcount_irec	*rec,
171 	void				*priv)
172 {
173 	struct xrep_cow			*xc = priv;
174 	struct xfs_refcount_irec	rrec;
175 	int				error;
176 
177 	if (!xfs_refcount_check_domain(rec) ||
178 	    rec->rc_domain != XFS_REFC_DOMAIN_COW)
179 		return -EFSCORRUPTED;
180 
181 	xrep_cow_trim_refcount(xc, &rrec, rec);
182 
183 	if (xc->next_bno >= rrec.rc_startblock)
184 		goto next;
185 
186 	error = xrep_cow_mark_file_range(xc,
187 			xfs_gbno_to_fsb(cur->bc_group, xc->next_bno),
188 			rrec.rc_startblock - xc->next_bno);
189 	if (error)
190 		return error;
191 
192 next:
193 	xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
194 	return 0;
195 }
196 
197 /*
198  * Mark any area that does not correspond to a CoW staging rmap.  These are
199  * cross-linked areas that must be avoided.
200  */
201 STATIC int
xrep_cow_mark_missing_staging_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)202 xrep_cow_mark_missing_staging_rmap(
203 	struct xfs_btree_cur		*cur,
204 	const struct xfs_rmap_irec	*rec,
205 	void				*priv)
206 {
207 	struct xrep_cow			*xc = priv;
208 	xfs_agblock_t			rec_bno;
209 	xfs_extlen_t			rec_len;
210 	unsigned int			adj;
211 
212 	if (rec->rm_owner == XFS_RMAP_OWN_COW)
213 		return 0;
214 
215 	rec_bno = rec->rm_startblock;
216 	rec_len = rec->rm_blockcount;
217 	if (rec_bno < xc->irec_startbno) {
218 		adj = xc->irec_startbno - rec_bno;
219 		rec_len -= adj;
220 		rec_bno += adj;
221 	}
222 
223 	if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
224 		adj = (rec_bno + rec_len) -
225 		      (xc->irec_startbno + xc->irec.br_blockcount);
226 		rec_len -= adj;
227 	}
228 
229 	return xrep_cow_mark_file_range(xc,
230 			xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len);
231 }
232 
233 /*
234  * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
235  * extent and mark the corresponding part of the file range in the bitmap.
236  */
237 STATIC int
xrep_cow_find_bad(struct xrep_cow * xc)238 xrep_cow_find_bad(
239 	struct xrep_cow			*xc)
240 {
241 	struct xfs_refcount_irec	rc_low = { 0 };
242 	struct xfs_refcount_irec	rc_high = { 0 };
243 	struct xfs_rmap_irec		rm_low = { 0 };
244 	struct xfs_rmap_irec		rm_high = { 0 };
245 	struct xfs_perag		*pag;
246 	struct xfs_scrub		*sc = xc->sc;
247 	xfs_agnumber_t			agno;
248 	int				error;
249 
250 	agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
251 	xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
252 
253 	pag = xfs_perag_get(sc->mp, agno);
254 	if (!pag)
255 		return -EFSCORRUPTED;
256 
257 	error = xrep_ag_init(sc, pag, &sc->sa);
258 	if (error)
259 		goto out_pag;
260 
261 	/* Mark any CoW fork extents that are shared. */
262 	rc_low.rc_startblock = xc->irec_startbno;
263 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
264 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
265 	error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
266 			xrep_cow_mark_shared_staging, xc);
267 	if (error)
268 		goto out_sa;
269 
270 	/* Make sure there are CoW staging extents for the whole mapping. */
271 	rc_low.rc_startblock = xc->irec_startbno;
272 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
273 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
274 	xc->next_bno = xc->irec_startbno;
275 	error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
276 			xrep_cow_mark_missing_staging, xc);
277 	if (error)
278 		goto out_sa;
279 
280 	if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
281 		error = xrep_cow_mark_file_range(xc,
282 				xfs_agbno_to_fsb(pag, xc->next_bno),
283 				xc->irec_startbno + xc->irec.br_blockcount -
284 				xc->next_bno);
285 		if (error)
286 			goto out_sa;
287 	}
288 
289 	/* Mark any area has an rmap that isn't a COW staging extent. */
290 	rm_low.rm_startblock = xc->irec_startbno;
291 	memset(&rm_high, 0xFF, sizeof(rm_high));
292 	rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
293 	error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
294 			xrep_cow_mark_missing_staging_rmap, xc);
295 	if (error)
296 		goto out_sa;
297 
298 	/*
299 	 * If userspace is forcing us to rebuild the CoW fork or someone turned
300 	 * on the debugging knob, replace everything in the CoW fork.
301 	 */
302 	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
303 	    XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
304 		error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
305 				xc->irec.br_blockcount);
306 
307 out_sa:
308 	xchk_ag_free(sc, &sc->sa);
309 out_pag:
310 	xfs_perag_put(pag);
311 	return error;
312 }
313 
314 /*
315  * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
316  * extent and mark the corresponding part of the file range in the bitmap.
317  */
318 STATIC int
xrep_cow_find_bad_rt(struct xrep_cow * xc)319 xrep_cow_find_bad_rt(
320 	struct xrep_cow			*xc)
321 {
322 	struct xfs_refcount_irec	rc_low = { 0 };
323 	struct xfs_refcount_irec	rc_high = { 0 };
324 	struct xfs_rmap_irec		rm_low = { 0 };
325 	struct xfs_rmap_irec		rm_high = { 0 };
326 	struct xfs_scrub		*sc = xc->sc;
327 	struct xfs_rtgroup		*rtg;
328 	int				error = 0;
329 
330 	xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock);
331 
332 	rtg = xfs_rtgroup_get(sc->mp,
333 			xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock));
334 	if (!rtg)
335 		return -EFSCORRUPTED;
336 
337 	error = xrep_rtgroup_init(sc, rtg, &sc->sr,
338 			XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
339 	if (error)
340 		goto out_rtg;
341 
342 	/* Mark any CoW fork extents that are shared. */
343 	rc_low.rc_startblock = xc->irec_startbno;
344 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
345 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
346 	error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
347 			xrep_cow_mark_shared_staging, xc);
348 	if (error)
349 		goto out_sr;
350 
351 	/* Make sure there are CoW staging extents for the whole mapping. */
352 	rc_low.rc_startblock = xc->irec_startbno;
353 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
354 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
355 	xc->next_bno = xc->irec_startbno;
356 	error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
357 			xrep_cow_mark_missing_staging, xc);
358 	if (error)
359 		goto out_sr;
360 
361 	if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
362 		error = xrep_cow_mark_file_range(xc,
363 				xfs_rgbno_to_rtb(rtg, xc->next_bno),
364 				xc->irec_startbno + xc->irec.br_blockcount -
365 				xc->next_bno);
366 		if (error)
367 			goto out_sr;
368 	}
369 
370 	/* Mark any area has an rmap that isn't a COW staging extent. */
371 	rm_low.rm_startblock = xc->irec_startbno;
372 	memset(&rm_high, 0xFF, sizeof(rm_high));
373 	rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
374 	error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
375 			xrep_cow_mark_missing_staging_rmap, xc);
376 	if (error)
377 		goto out_sr;
378 
379 	/*
380 	 * If userspace is forcing us to rebuild the CoW fork or someone
381 	 * turned on the debugging knob, replace everything in the
382 	 * CoW fork and then scan for staging extents in the refcountbt.
383 	 */
384 	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
385 	    XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
386 		error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
387 				xc->irec.br_blockcount);
388 
389 out_sr:
390 	xchk_rtgroup_btcur_free(&sc->sr);
391 	xchk_rtgroup_free(sc, &sc->sr);
392 out_rtg:
393 	xfs_rtgroup_put(rtg);
394 	return error;
395 }
396 
397 /*
398  * Allocate a replacement CoW staging extent of up to the given number of
399  * blocks, and fill out the mapping.
400  */
401 STATIC int
xrep_cow_alloc(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)402 xrep_cow_alloc(
403 	struct xfs_scrub	*sc,
404 	xfs_extlen_t		maxlen,
405 	struct xrep_cow_extent	*repl)
406 {
407 	struct xfs_alloc_arg	args = {
408 		.tp		= sc->tp,
409 		.mp		= sc->mp,
410 		.oinfo		= XFS_RMAP_OINFO_SKIP_UPDATE,
411 		.minlen		= 1,
412 		.maxlen		= maxlen,
413 		.prod		= 1,
414 		.resv		= XFS_AG_RESV_NONE,
415 		.datatype	= XFS_ALLOC_USERDATA,
416 	};
417 	int			error;
418 
419 	error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
420 	if (error)
421 		return error;
422 
423 	error = xfs_alloc_vextent_start_ag(&args,
424 			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
425 	if (error)
426 		return error;
427 	if (args.fsbno == NULLFSBLOCK)
428 		return -ENOSPC;
429 
430 	xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
431 
432 	repl->fsbno = args.fsbno;
433 	repl->len = args.len;
434 	return 0;
435 }
436 
437 /*
438  * Allocate a replacement rt CoW staging extent of up to the given number of
439  * blocks, and fill out the mapping.
440  */
441 STATIC int
xrep_cow_alloc_rt(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)442 xrep_cow_alloc_rt(
443 	struct xfs_scrub	*sc,
444 	xfs_extlen_t		maxlen,
445 	struct xrep_cow_extent	*repl)
446 {
447 	xfs_rtxlen_t		maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen);
448 	int			error;
449 
450 	error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
451 	if (error)
452 		return error;
453 
454 	error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false,
455 			false, &repl->fsbno, &repl->len);
456 	if (error)
457 		return error;
458 
459 	xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len);
460 	return 0;
461 }
462 
463 /*
464  * Look up the current CoW fork mapping so that we only allocate enough to
465  * replace a single mapping.  If we don't find a mapping that covers the start
466  * of the file range, or we find a delalloc or written extent, something is
467  * seriously wrong, since we didn't drop the ILOCK.
468  */
469 static inline int
xrep_cow_find_mapping(struct xrep_cow * xc,struct xfs_iext_cursor * icur,xfs_fileoff_t startoff,struct xfs_bmbt_irec * got)470 xrep_cow_find_mapping(
471 	struct xrep_cow		*xc,
472 	struct xfs_iext_cursor	*icur,
473 	xfs_fileoff_t		startoff,
474 	struct xfs_bmbt_irec	*got)
475 {
476 	struct xfs_inode	*ip = xc->sc->ip;
477 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
478 
479 	if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
480 		goto bad;
481 
482 	if (got->br_startoff > startoff)
483 		goto bad;
484 
485 	if (got->br_blockcount == 0)
486 		goto bad;
487 
488 	if (isnullstartblock(got->br_startblock))
489 		goto bad;
490 
491 	if (xfs_bmap_is_written_extent(got))
492 		goto bad;
493 
494 	return 0;
495 bad:
496 	ASSERT(0);
497 	return -EFSCORRUPTED;
498 }
499 
500 #define REPLACE_LEFT_SIDE	(1U << 0)
501 #define REPLACE_RIGHT_SIDE	(1U << 1)
502 
503 /*
504  * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
505  * beginning of @got with the space described by @rep.
506  */
507 static inline void
xrep_cow_replace_mapping(struct xfs_inode * ip,struct xfs_iext_cursor * icur,const struct xfs_bmbt_irec * got,const struct xrep_cow_extent * repl)508 xrep_cow_replace_mapping(
509 	struct xfs_inode		*ip,
510 	struct xfs_iext_cursor		*icur,
511 	const struct xfs_bmbt_irec	*got,
512 	const struct xrep_cow_extent	*repl)
513 {
514 	struct xfs_bmbt_irec		new = *got; /* struct copy */
515 
516 	ASSERT(repl->len > 0);
517 	ASSERT(!isnullstartblock(got->br_startblock));
518 
519 	trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
520 
521 	if (got->br_blockcount == repl->len) {
522 		/*
523 		 * The new extent is a complete replacement for the existing
524 		 * extent.  Update the COW fork record.
525 		 */
526 		new.br_startblock = repl->fsbno;
527 		xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
528 		return;
529 	}
530 
531 	/*
532 	 * The new extent can replace the beginning of the COW fork record.
533 	 * Move the left side of @got upwards, then insert the new record.
534 	 */
535 	new.br_startoff += repl->len;
536 	new.br_startblock += repl->len;
537 	new.br_blockcount -= repl->len;
538 	xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
539 
540 	new.br_startoff = got->br_startoff;
541 	new.br_startblock = repl->fsbno;
542 	new.br_blockcount = repl->len;
543 	xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
544 }
545 
546 /*
547  * Replace the unwritten CoW staging extent backing the given file range with a
548  * new space extent that isn't as problematic.
549  */
550 STATIC int
xrep_cow_replace_range(struct xrep_cow * xc,xfs_fileoff_t startoff,xfs_extlen_t * blockcount)551 xrep_cow_replace_range(
552 	struct xrep_cow		*xc,
553 	xfs_fileoff_t		startoff,
554 	xfs_extlen_t		*blockcount)
555 {
556 	struct xfs_iext_cursor	icur;
557 	struct xrep_cow_extent	repl;
558 	struct xfs_bmbt_irec	got;
559 	struct xfs_scrub	*sc = xc->sc;
560 	xfs_fileoff_t		nextoff;
561 	xfs_extlen_t		alloc_len;
562 	int			error;
563 
564 	/*
565 	 * Put the existing CoW fork mapping in @got.  If @got ends before
566 	 * @rep, truncate @rep so we only replace one extent mapping at a time.
567 	 */
568 	error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
569 	if (error)
570 		return error;
571 	nextoff = min(startoff + *blockcount,
572 		      got.br_startoff + got.br_blockcount);
573 
574 	/*
575 	 * Allocate a replacement extent.  If we don't fill all the blocks,
576 	 * shorten the quantity that will be deleted in this step.
577 	 */
578 	alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
579 			  nextoff - startoff);
580 	if (XFS_IS_REALTIME_INODE(sc->ip))
581 		error = xrep_cow_alloc_rt(sc, alloc_len, &repl);
582 	else
583 		error = xrep_cow_alloc(sc, alloc_len, &repl);
584 	if (error)
585 		return error;
586 
587 	/*
588 	 * Replace the old mapping with the new one, and commit the metadata
589 	 * changes made so far.
590 	 */
591 	xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
592 
593 	xfs_inode_set_cowblocks_tag(sc->ip);
594 	error = xfs_defer_finish(&sc->tp);
595 	if (error)
596 		return error;
597 
598 	/* Note the old CoW staging extents; we'll reap them all later. */
599 	if (XFS_IS_REALTIME_INODE(sc->ip))
600 		error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
601 				got.br_startblock, repl.len);
602 	else
603 		error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
604 				got.br_startblock, repl.len);
605 	if (error)
606 		return error;
607 
608 	*blockcount = repl.len;
609 	return 0;
610 }
611 
612 /*
613  * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
614  * reservation.
615  */
616 STATIC int
xrep_cow_replace(uint64_t startoff,uint64_t blockcount,void * priv)617 xrep_cow_replace(
618 	uint64_t		startoff,
619 	uint64_t		blockcount,
620 	void			*priv)
621 {
622 	struct xrep_cow		*xc = priv;
623 	int			error = 0;
624 
625 	while (blockcount > 0) {
626 		xfs_extlen_t	len = min_t(xfs_filblks_t, blockcount,
627 					    XFS_MAX_BMBT_EXTLEN);
628 
629 		error = xrep_cow_replace_range(xc, startoff, &len);
630 		if (error)
631 			break;
632 
633 		blockcount -= len;
634 		startoff += len;
635 	}
636 
637 	return error;
638 }
639 
640 /*
641  * Repair an inode's CoW fork.  The CoW fork is an in-core structure, so
642  * there's no btree to rebuid.  Instead, we replace any mappings that are
643  * cross-linked or lack ondisk CoW fork records in the refcount btree.
644  */
645 int
xrep_bmap_cow(struct xfs_scrub * sc)646 xrep_bmap_cow(
647 	struct xfs_scrub	*sc)
648 {
649 	struct xrep_cow		*xc;
650 	struct xfs_iext_cursor	icur;
651 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
652 	int			error;
653 
654 	if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
655 		return -EOPNOTSUPP;
656 
657 	if (!ifp)
658 		return 0;
659 
660 	/*
661 	 * Realtime files with large extent sizes are not supported because
662 	 * we could encounter an CoW mapping that has been partially written
663 	 * out *and* requires replacement, and there's no solution to that.
664 	 */
665 	if (xfs_inode_has_bigrtalloc(sc->ip))
666 		return -EOPNOTSUPP;
667 
668 	/* Metadata inodes aren't supposed to have data on the rt volume. */
669 	if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip))
670 		return -EOPNOTSUPP;
671 
672 	/*
673 	 * If we're somehow not in extents format, then reinitialize it to
674 	 * an empty extent mapping fork and exit.
675 	 */
676 	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
677 		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
678 		ifp->if_nextents = 0;
679 		return 0;
680 	}
681 
682 	xc = kzalloc_obj(struct xrep_cow, XCHK_GFP_FLAGS);
683 	if (!xc)
684 		return -ENOMEM;
685 
686 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
687 
688 	xc->sc = sc;
689 	xoff_bitmap_init(&xc->bad_fileoffs);
690 	if (XFS_IS_REALTIME_INODE(sc->ip))
691 		xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
692 	else
693 		xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
694 
695 	for_each_xfs_iext(ifp, &icur, &xc->irec) {
696 		if (xchk_should_terminate(sc, &error))
697 			goto out_bitmap;
698 
699 		/*
700 		 * delalloc reservations only exist incore, so there is no
701 		 * ondisk metadata that we can examine.  Hence we leave them
702 		 * alone.
703 		 */
704 		if (isnullstartblock(xc->irec.br_startblock))
705 			continue;
706 
707 		/*
708 		 * COW fork extents are only in the written state if writeback
709 		 * is actively writing to disk.  We cannot restart the write
710 		 * at a different disk address since we've already issued the
711 		 * IO, so we leave these alone and hope for the best.
712 		 */
713 		if (xfs_bmap_is_written_extent(&xc->irec))
714 			continue;
715 
716 		if (XFS_IS_REALTIME_INODE(sc->ip))
717 			error = xrep_cow_find_bad_rt(xc);
718 		else
719 			error = xrep_cow_find_bad(xc);
720 		if (error)
721 			goto out_bitmap;
722 	}
723 
724 	/* Replace any bad unwritten mappings with fresh reservations. */
725 	error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
726 	if (error)
727 		goto out_bitmap;
728 
729 	/*
730 	 * Reap as many of the old CoW blocks as we can.  They are owned ondisk
731 	 * by the refcount btree, not the inode, so it is correct to treat them
732 	 * like inode metadata.
733 	 */
734 	if (XFS_IS_REALTIME_INODE(sc->ip))
735 		error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
736 				&XFS_RMAP_OINFO_COW);
737 	else
738 		error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
739 				&XFS_RMAP_OINFO_COW);
740 	if (error)
741 		goto out_bitmap;
742 
743 out_bitmap:
744 	if (XFS_IS_REALTIME_INODE(sc->ip))
745 		xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
746 	else
747 		xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
748 	xoff_bitmap_destroy(&xc->bad_fileoffs);
749 	kfree(xc);
750 	return error;
751 }
752