xref: /linux/fs/xfs/scrub/cow_repair.c (revision b477ff98d903618a1ab8247861f2ea6e70c0f0f8)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_alloc.h"
19 #include "xfs_bmap.h"
20 #include "xfs_rmap.h"
21 #include "xfs_refcount.h"
22 #include "xfs_quota.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ag.h"
25 #include "xfs_error.h"
26 #include "xfs_errortag.h"
27 #include "xfs_icache.h"
28 #include "xfs_refcount_btree.h"
29 #include "xfs_rtalloc.h"
30 #include "xfs_rtbitmap.h"
31 #include "xfs_rtgroup.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/bitmap.h"
38 #include "scrub/off_bitmap.h"
39 #include "scrub/fsb_bitmap.h"
40 #include "scrub/rtb_bitmap.h"
41 #include "scrub/reap.h"
42 
43 /*
44  * CoW Fork Mapping Repair
45  * =======================
46  *
47  * Although CoW staging extents are owned by incore CoW inode forks, on disk
48  * they are owned by the refcount btree.  The ondisk metadata does not record
49  * any ownership information, which limits what we can do to repair the
50  * mappings in the CoW fork.  At most, we can replace ifork mappings that lack
51  * an entry in the refcount btree or are described by a reverse mapping record
52  * whose owner is not OWN_COW.
53  *
54  * Replacing extents is also tricky -- we can't touch written CoW fork extents
55  * since they are undergoing writeback, and delalloc extents do not require
56  * repair since they only exist incore.  Hence the most we can do is find the
57  * bad parts of unwritten mappings, allocate a replacement set of blocks, and
58  * replace the incore mapping.  We use the regular reaping process to unmap
59  * or free the discarded blocks, as appropriate.
60  */
61 struct xrep_cow {
62 	struct xfs_scrub	*sc;
63 
64 	/* Bitmap of file offset ranges that need replacing. */
65 	struct xoff_bitmap	bad_fileoffs;
66 
67 	/* Bitmap of fsblocks that were removed from the CoW fork. */
68 	union {
69 		struct xfsb_bitmap	old_cowfork_fsblocks;
70 		struct xrtb_bitmap	old_cowfork_rtblocks;
71 	};
72 
73 	/* CoW fork mappings used to scan for bad CoW staging extents. */
74 	struct xfs_bmbt_irec	irec;
75 
76 	/* refcount btree block number of irec.br_startblock */
77 	unsigned int		irec_startbno;
78 
79 	/* refcount btree block number of the next refcount record we expect */
80 	unsigned int		next_bno;
81 };
82 
83 /* CoW staging extent. */
84 struct xrep_cow_extent {
85 	xfs_fsblock_t		fsbno;
86 	xfs_extlen_t		len;
87 };
88 
89 /*
90  * Mark the part of the file range that corresponds to the given physical
91  * space.  Caller must ensure that the physical range is within xc->irec.
92  */
93 STATIC int
xrep_cow_mark_file_range(struct xrep_cow * xc,xfs_fsblock_t startblock,xfs_filblks_t blockcount)94 xrep_cow_mark_file_range(
95 	struct xrep_cow		*xc,
96 	xfs_fsblock_t		startblock,
97 	xfs_filblks_t		blockcount)
98 {
99 	xfs_fileoff_t		startoff;
100 
101 	startoff = xc->irec.br_startoff +
102 				(startblock - xc->irec.br_startblock);
103 
104 	trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
105 			blockcount);
106 
107 	return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
108 }
109 
110 /*
111  * Trim @src to fit within the CoW fork mapping being examined, and put the
112  * result in @dst.
113  */
114 static inline void
xrep_cow_trim_refcount(struct xrep_cow * xc,struct xfs_refcount_irec * dst,const struct xfs_refcount_irec * src)115 xrep_cow_trim_refcount(
116 	struct xrep_cow			*xc,
117 	struct xfs_refcount_irec	*dst,
118 	const struct xfs_refcount_irec	*src)
119 {
120 	unsigned int			adj;
121 
122 	memcpy(dst, src, sizeof(*dst));
123 
124 	if (dst->rc_startblock < xc->irec_startbno) {
125 		adj = xc->irec_startbno - dst->rc_startblock;
126 		dst->rc_blockcount -= adj;
127 		dst->rc_startblock += adj;
128 	}
129 
130 	if (dst->rc_startblock + dst->rc_blockcount >
131 	    xc->irec_startbno + xc->irec.br_blockcount) {
132 		adj = (dst->rc_startblock + dst->rc_blockcount) -
133 		      (xc->irec_startbno + xc->irec.br_blockcount);
134 		dst->rc_blockcount -= adj;
135 	}
136 }
137 
138 /* Mark any shared CoW staging extents. */
139 STATIC int
xrep_cow_mark_shared_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)140 xrep_cow_mark_shared_staging(
141 	struct xfs_btree_cur		*cur,
142 	const struct xfs_refcount_irec	*rec,
143 	void				*priv)
144 {
145 	struct xrep_cow			*xc = priv;
146 	struct xfs_refcount_irec	rrec;
147 
148 	if (!xfs_refcount_check_domain(rec) ||
149 	    rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
150 		return -EFSCORRUPTED;
151 
152 	xrep_cow_trim_refcount(xc, &rrec, rec);
153 
154 	return xrep_cow_mark_file_range(xc,
155 			xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock),
156 			rrec.rc_blockcount);
157 }
158 
159 /*
160  * Mark any portion of the CoW fork file offset range where there is not a CoW
161  * staging extent record in the refcountbt, and keep a record of where we did
162  * find correct refcountbt records.  Staging records are always cleaned out at
163  * mount time, so any two inodes trying to map the same staging area would have
164  * already taken the fs down due to refcount btree verifier errors.  Hence this
165  * inode should be the sole creator of the staging extent records ondisk.
166  */
167 STATIC int
xrep_cow_mark_missing_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)168 xrep_cow_mark_missing_staging(
169 	struct xfs_btree_cur		*cur,
170 	const struct xfs_refcount_irec	*rec,
171 	void				*priv)
172 {
173 	struct xrep_cow			*xc = priv;
174 	struct xfs_refcount_irec	rrec;
175 	int				error;
176 
177 	if (!xfs_refcount_check_domain(rec) ||
178 	    rec->rc_domain != XFS_REFC_DOMAIN_COW)
179 		return -EFSCORRUPTED;
180 
181 	xrep_cow_trim_refcount(xc, &rrec, rec);
182 
183 	if (xc->next_bno >= rrec.rc_startblock)
184 		goto next;
185 
186 	error = xrep_cow_mark_file_range(xc,
187 			xfs_gbno_to_fsb(cur->bc_group, xc->next_bno),
188 			rrec.rc_startblock - xc->next_bno);
189 	if (error)
190 		return error;
191 
192 next:
193 	xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
194 	return 0;
195 }
196 
197 /*
198  * Mark any area that does not correspond to a CoW staging rmap.  These are
199  * cross-linked areas that must be avoided.
200  */
201 STATIC int
xrep_cow_mark_missing_staging_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)202 xrep_cow_mark_missing_staging_rmap(
203 	struct xfs_btree_cur		*cur,
204 	const struct xfs_rmap_irec	*rec,
205 	void				*priv)
206 {
207 	struct xrep_cow			*xc = priv;
208 	xfs_agblock_t			rec_bno;
209 	xfs_extlen_t			rec_len;
210 	unsigned int			adj;
211 
212 	if (rec->rm_owner == XFS_RMAP_OWN_COW)
213 		return 0;
214 
215 	rec_bno = rec->rm_startblock;
216 	rec_len = rec->rm_blockcount;
217 	if (rec_bno < xc->irec_startbno) {
218 		adj = xc->irec_startbno - rec_bno;
219 		rec_len -= adj;
220 		rec_bno += adj;
221 	}
222 
223 	if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
224 		adj = (rec_bno + rec_len) -
225 		      (xc->irec_startbno + xc->irec.br_blockcount);
226 		rec_len -= adj;
227 	}
228 
229 	return xrep_cow_mark_file_range(xc,
230 			xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len);
231 }
232 
233 /*
234  * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
235  * extent and mark the corresponding part of the file range in the bitmap.
236  */
237 STATIC int
xrep_cow_find_bad(struct xrep_cow * xc)238 xrep_cow_find_bad(
239 	struct xrep_cow			*xc)
240 {
241 	struct xfs_refcount_irec	rc_low = { 0 };
242 	struct xfs_refcount_irec	rc_high = { 0 };
243 	struct xfs_rmap_irec		rm_low = { 0 };
244 	struct xfs_rmap_irec		rm_high = { 0 };
245 	struct xfs_perag		*pag;
246 	struct xfs_scrub		*sc = xc->sc;
247 	xfs_agnumber_t			agno;
248 	int				error;
249 
250 	agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
251 	xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
252 
253 	pag = xfs_perag_get(sc->mp, agno);
254 	if (!pag)
255 		return -EFSCORRUPTED;
256 
257 	error = xrep_ag_init(sc, pag, &sc->sa);
258 	if (error)
259 		goto out_pag;
260 
261 	/* Mark any CoW fork extents that are shared. */
262 	rc_low.rc_startblock = xc->irec_startbno;
263 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
264 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
265 	error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
266 			xrep_cow_mark_shared_staging, xc);
267 	if (error)
268 		goto out_sa;
269 
270 	/* Make sure there are CoW staging extents for the whole mapping. */
271 	rc_low.rc_startblock = xc->irec_startbno;
272 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
273 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
274 	xc->next_bno = xc->irec_startbno;
275 	error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
276 			xrep_cow_mark_missing_staging, xc);
277 	if (error)
278 		goto out_sa;
279 
280 	if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
281 		error = xrep_cow_mark_file_range(xc,
282 				xfs_agbno_to_fsb(pag, xc->next_bno),
283 				xc->irec_startbno + xc->irec.br_blockcount -
284 				xc->next_bno);
285 		if (error)
286 			goto out_sa;
287 	}
288 
289 	/* Mark any area has an rmap that isn't a COW staging extent. */
290 	rm_low.rm_startblock = xc->irec_startbno;
291 	memset(&rm_high, 0xFF, sizeof(rm_high));
292 	rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
293 	error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
294 			xrep_cow_mark_missing_staging_rmap, xc);
295 	if (error)
296 		goto out_sa;
297 
298 	/*
299 	 * If userspace is forcing us to rebuild the CoW fork or someone turned
300 	 * on the debugging knob, replace everything in the CoW fork.
301 	 */
302 	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
303 	    XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
304 		error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
305 				xc->irec.br_blockcount);
306 		if (error)
307 			return error;
308 	}
309 
310 out_sa:
311 	xchk_ag_free(sc, &sc->sa);
312 out_pag:
313 	xfs_perag_put(pag);
314 	return 0;
315 }
316 
317 /*
318  * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
319  * extent and mark the corresponding part of the file range in the bitmap.
320  */
321 STATIC int
xrep_cow_find_bad_rt(struct xrep_cow * xc)322 xrep_cow_find_bad_rt(
323 	struct xrep_cow			*xc)
324 {
325 	struct xfs_refcount_irec	rc_low = { 0 };
326 	struct xfs_refcount_irec	rc_high = { 0 };
327 	struct xfs_rmap_irec		rm_low = { 0 };
328 	struct xfs_rmap_irec		rm_high = { 0 };
329 	struct xfs_scrub		*sc = xc->sc;
330 	struct xfs_rtgroup		*rtg;
331 	int				error = 0;
332 
333 	xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock);
334 
335 	rtg = xfs_rtgroup_get(sc->mp,
336 			xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock));
337 	if (!rtg)
338 		return -EFSCORRUPTED;
339 
340 	error = xrep_rtgroup_init(sc, rtg, &sc->sr,
341 			XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
342 	if (error)
343 		goto out_rtg;
344 
345 	/* Mark any CoW fork extents that are shared. */
346 	rc_low.rc_startblock = xc->irec_startbno;
347 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
348 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
349 	error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
350 			xrep_cow_mark_shared_staging, xc);
351 	if (error)
352 		goto out_sr;
353 
354 	/* Make sure there are CoW staging extents for the whole mapping. */
355 	rc_low.rc_startblock = xc->irec_startbno;
356 	rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
357 	rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
358 	xc->next_bno = xc->irec_startbno;
359 	error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
360 			xrep_cow_mark_missing_staging, xc);
361 	if (error)
362 		goto out_sr;
363 
364 	if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
365 		error = xrep_cow_mark_file_range(xc,
366 				xfs_rgbno_to_rtb(rtg, xc->next_bno),
367 				xc->irec_startbno + xc->irec.br_blockcount -
368 				xc->next_bno);
369 		if (error)
370 			goto out_sr;
371 	}
372 
373 	/* Mark any area has an rmap that isn't a COW staging extent. */
374 	rm_low.rm_startblock = xc->irec_startbno;
375 	memset(&rm_high, 0xFF, sizeof(rm_high));
376 	rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
377 	error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
378 			xrep_cow_mark_missing_staging_rmap, xc);
379 	if (error)
380 		goto out_sr;
381 
382 	/*
383 	 * If userspace is forcing us to rebuild the CoW fork or someone
384 	 * turned on the debugging knob, replace everything in the
385 	 * CoW fork and then scan for staging extents in the refcountbt.
386 	 */
387 	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
388 	    XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
389 		error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
390 				xc->irec.br_blockcount);
391 		if (error)
392 			goto out_rtg;
393 	}
394 
395 out_sr:
396 	xchk_rtgroup_btcur_free(&sc->sr);
397 	xchk_rtgroup_free(sc, &sc->sr);
398 out_rtg:
399 	xfs_rtgroup_put(rtg);
400 	return error;
401 }
402 
403 /*
404  * Allocate a replacement CoW staging extent of up to the given number of
405  * blocks, and fill out the mapping.
406  */
407 STATIC int
xrep_cow_alloc(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)408 xrep_cow_alloc(
409 	struct xfs_scrub	*sc,
410 	xfs_extlen_t		maxlen,
411 	struct xrep_cow_extent	*repl)
412 {
413 	struct xfs_alloc_arg	args = {
414 		.tp		= sc->tp,
415 		.mp		= sc->mp,
416 		.oinfo		= XFS_RMAP_OINFO_SKIP_UPDATE,
417 		.minlen		= 1,
418 		.maxlen		= maxlen,
419 		.prod		= 1,
420 		.resv		= XFS_AG_RESV_NONE,
421 		.datatype	= XFS_ALLOC_USERDATA,
422 	};
423 	int			error;
424 
425 	error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
426 	if (error)
427 		return error;
428 
429 	error = xfs_alloc_vextent_start_ag(&args,
430 			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
431 	if (error)
432 		return error;
433 	if (args.fsbno == NULLFSBLOCK)
434 		return -ENOSPC;
435 
436 	xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
437 
438 	repl->fsbno = args.fsbno;
439 	repl->len = args.len;
440 	return 0;
441 }
442 
443 /*
444  * Allocate a replacement rt CoW staging extent of up to the given number of
445  * blocks, and fill out the mapping.
446  */
447 STATIC int
xrep_cow_alloc_rt(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)448 xrep_cow_alloc_rt(
449 	struct xfs_scrub	*sc,
450 	xfs_extlen_t		maxlen,
451 	struct xrep_cow_extent	*repl)
452 {
453 	xfs_rtxlen_t		maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen);
454 	int			error;
455 
456 	error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
457 	if (error)
458 		return error;
459 
460 	error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false,
461 			false, &repl->fsbno, &repl->len);
462 	if (error)
463 		return error;
464 
465 	xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len);
466 	return 0;
467 }
468 
469 /*
470  * Look up the current CoW fork mapping so that we only allocate enough to
471  * replace a single mapping.  If we don't find a mapping that covers the start
472  * of the file range, or we find a delalloc or written extent, something is
473  * seriously wrong, since we didn't drop the ILOCK.
474  */
475 static inline int
xrep_cow_find_mapping(struct xrep_cow * xc,struct xfs_iext_cursor * icur,xfs_fileoff_t startoff,struct xfs_bmbt_irec * got)476 xrep_cow_find_mapping(
477 	struct xrep_cow		*xc,
478 	struct xfs_iext_cursor	*icur,
479 	xfs_fileoff_t		startoff,
480 	struct xfs_bmbt_irec	*got)
481 {
482 	struct xfs_inode	*ip = xc->sc->ip;
483 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
484 
485 	if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
486 		goto bad;
487 
488 	if (got->br_startoff > startoff)
489 		goto bad;
490 
491 	if (got->br_blockcount == 0)
492 		goto bad;
493 
494 	if (isnullstartblock(got->br_startblock))
495 		goto bad;
496 
497 	if (xfs_bmap_is_written_extent(got))
498 		goto bad;
499 
500 	return 0;
501 bad:
502 	ASSERT(0);
503 	return -EFSCORRUPTED;
504 }
505 
506 #define REPLACE_LEFT_SIDE	(1U << 0)
507 #define REPLACE_RIGHT_SIDE	(1U << 1)
508 
509 /*
510  * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
511  * beginning of @got with the space described by @rep.
512  */
513 static inline void
xrep_cow_replace_mapping(struct xfs_inode * ip,struct xfs_iext_cursor * icur,const struct xfs_bmbt_irec * got,const struct xrep_cow_extent * repl)514 xrep_cow_replace_mapping(
515 	struct xfs_inode		*ip,
516 	struct xfs_iext_cursor		*icur,
517 	const struct xfs_bmbt_irec	*got,
518 	const struct xrep_cow_extent	*repl)
519 {
520 	struct xfs_bmbt_irec		new = *got; /* struct copy */
521 
522 	ASSERT(repl->len > 0);
523 	ASSERT(!isnullstartblock(got->br_startblock));
524 
525 	trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
526 
527 	if (got->br_blockcount == repl->len) {
528 		/*
529 		 * The new extent is a complete replacement for the existing
530 		 * extent.  Update the COW fork record.
531 		 */
532 		new.br_startblock = repl->fsbno;
533 		xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
534 		return;
535 	}
536 
537 	/*
538 	 * The new extent can replace the beginning of the COW fork record.
539 	 * Move the left side of @got upwards, then insert the new record.
540 	 */
541 	new.br_startoff += repl->len;
542 	new.br_startblock += repl->len;
543 	new.br_blockcount -= repl->len;
544 	xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
545 
546 	new.br_startoff = got->br_startoff;
547 	new.br_startblock = repl->fsbno;
548 	new.br_blockcount = repl->len;
549 	xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
550 }
551 
552 /*
553  * Replace the unwritten CoW staging extent backing the given file range with a
554  * new space extent that isn't as problematic.
555  */
556 STATIC int
xrep_cow_replace_range(struct xrep_cow * xc,xfs_fileoff_t startoff,xfs_extlen_t * blockcount)557 xrep_cow_replace_range(
558 	struct xrep_cow		*xc,
559 	xfs_fileoff_t		startoff,
560 	xfs_extlen_t		*blockcount)
561 {
562 	struct xfs_iext_cursor	icur;
563 	struct xrep_cow_extent	repl;
564 	struct xfs_bmbt_irec	got;
565 	struct xfs_scrub	*sc = xc->sc;
566 	xfs_fileoff_t		nextoff;
567 	xfs_extlen_t		alloc_len;
568 	int			error;
569 
570 	/*
571 	 * Put the existing CoW fork mapping in @got.  If @got ends before
572 	 * @rep, truncate @rep so we only replace one extent mapping at a time.
573 	 */
574 	error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
575 	if (error)
576 		return error;
577 	nextoff = min(startoff + *blockcount,
578 		      got.br_startoff + got.br_blockcount);
579 
580 	/*
581 	 * Allocate a replacement extent.  If we don't fill all the blocks,
582 	 * shorten the quantity that will be deleted in this step.
583 	 */
584 	alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
585 			  nextoff - startoff);
586 	if (XFS_IS_REALTIME_INODE(sc->ip))
587 		error = xrep_cow_alloc_rt(sc, alloc_len, &repl);
588 	else
589 		error = xrep_cow_alloc(sc, alloc_len, &repl);
590 	if (error)
591 		return error;
592 
593 	/*
594 	 * Replace the old mapping with the new one, and commit the metadata
595 	 * changes made so far.
596 	 */
597 	xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
598 
599 	xfs_inode_set_cowblocks_tag(sc->ip);
600 	error = xfs_defer_finish(&sc->tp);
601 	if (error)
602 		return error;
603 
604 	/* Note the old CoW staging extents; we'll reap them all later. */
605 	if (XFS_IS_REALTIME_INODE(sc->ip))
606 		error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
607 				got.br_startblock, repl.len);
608 	else
609 		error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
610 				got.br_startblock, repl.len);
611 	if (error)
612 		return error;
613 
614 	*blockcount = repl.len;
615 	return 0;
616 }
617 
618 /*
619  * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
620  * reservation.
621  */
622 STATIC int
xrep_cow_replace(uint64_t startoff,uint64_t blockcount,void * priv)623 xrep_cow_replace(
624 	uint64_t		startoff,
625 	uint64_t		blockcount,
626 	void			*priv)
627 {
628 	struct xrep_cow		*xc = priv;
629 	int			error = 0;
630 
631 	while (blockcount > 0) {
632 		xfs_extlen_t	len = min_t(xfs_filblks_t, blockcount,
633 					    XFS_MAX_BMBT_EXTLEN);
634 
635 		error = xrep_cow_replace_range(xc, startoff, &len);
636 		if (error)
637 			break;
638 
639 		blockcount -= len;
640 		startoff += len;
641 	}
642 
643 	return error;
644 }
645 
646 /*
647  * Repair an inode's CoW fork.  The CoW fork is an in-core structure, so
648  * there's no btree to rebuid.  Instead, we replace any mappings that are
649  * cross-linked or lack ondisk CoW fork records in the refcount btree.
650  */
651 int
xrep_bmap_cow(struct xfs_scrub * sc)652 xrep_bmap_cow(
653 	struct xfs_scrub	*sc)
654 {
655 	struct xrep_cow		*xc;
656 	struct xfs_iext_cursor	icur;
657 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
658 	int			error;
659 
660 	if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
661 		return -EOPNOTSUPP;
662 
663 	if (!ifp)
664 		return 0;
665 
666 	/*
667 	 * Realtime files with large extent sizes are not supported because
668 	 * we could encounter an CoW mapping that has been partially written
669 	 * out *and* requires replacement, and there's no solution to that.
670 	 */
671 	if (xfs_inode_has_bigrtalloc(sc->ip))
672 		return -EOPNOTSUPP;
673 
674 	/* Metadata inodes aren't supposed to have data on the rt volume. */
675 	if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip))
676 		return -EOPNOTSUPP;
677 
678 	/*
679 	 * If we're somehow not in extents format, then reinitialize it to
680 	 * an empty extent mapping fork and exit.
681 	 */
682 	if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
683 		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
684 		ifp->if_nextents = 0;
685 		return 0;
686 	}
687 
688 	xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
689 	if (!xc)
690 		return -ENOMEM;
691 
692 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
693 
694 	xc->sc = sc;
695 	xoff_bitmap_init(&xc->bad_fileoffs);
696 	if (XFS_IS_REALTIME_INODE(sc->ip))
697 		xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
698 	else
699 		xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
700 
701 	for_each_xfs_iext(ifp, &icur, &xc->irec) {
702 		if (xchk_should_terminate(sc, &error))
703 			goto out_bitmap;
704 
705 		/*
706 		 * delalloc reservations only exist incore, so there is no
707 		 * ondisk metadata that we can examine.  Hence we leave them
708 		 * alone.
709 		 */
710 		if (isnullstartblock(xc->irec.br_startblock))
711 			continue;
712 
713 		/*
714 		 * COW fork extents are only in the written state if writeback
715 		 * is actively writing to disk.  We cannot restart the write
716 		 * at a different disk address since we've already issued the
717 		 * IO, so we leave these alone and hope for the best.
718 		 */
719 		if (xfs_bmap_is_written_extent(&xc->irec))
720 			continue;
721 
722 		if (XFS_IS_REALTIME_INODE(sc->ip))
723 			error = xrep_cow_find_bad_rt(xc);
724 		else
725 			error = xrep_cow_find_bad(xc);
726 		if (error)
727 			goto out_bitmap;
728 	}
729 
730 	/* Replace any bad unwritten mappings with fresh reservations. */
731 	error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
732 	if (error)
733 		goto out_bitmap;
734 
735 	/*
736 	 * Reap as many of the old CoW blocks as we can.  They are owned ondisk
737 	 * by the refcount btree, not the inode, so it is correct to treat them
738 	 * like inode metadata.
739 	 */
740 	if (XFS_IS_REALTIME_INODE(sc->ip))
741 		error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
742 				&XFS_RMAP_OINFO_COW);
743 	else
744 		error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
745 				&XFS_RMAP_OINFO_COW);
746 	if (error)
747 		goto out_bitmap;
748 
749 out_bitmap:
750 	if (XFS_IS_REALTIME_INODE(sc->ip))
751 		xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
752 	else
753 		xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
754 	xoff_bitmap_destroy(&xc->bad_fileoffs);
755 	kfree(xc);
756 	return error;
757 }
758