1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_alloc.h"
19 #include "xfs_bmap.h"
20 #include "xfs_rmap.h"
21 #include "xfs_refcount.h"
22 #include "xfs_quota.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ag.h"
25 #include "xfs_error.h"
26 #include "xfs_errortag.h"
27 #include "xfs_icache.h"
28 #include "xfs_refcount_btree.h"
29 #include "xfs_rtalloc.h"
30 #include "xfs_rtbitmap.h"
31 #include "xfs_rtgroup.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/bitmap.h"
38 #include "scrub/off_bitmap.h"
39 #include "scrub/fsb_bitmap.h"
40 #include "scrub/rtb_bitmap.h"
41 #include "scrub/reap.h"
42
43 /*
44 * CoW Fork Mapping Repair
45 * =======================
46 *
47 * Although CoW staging extents are owned by incore CoW inode forks, on disk
48 * they are owned by the refcount btree. The ondisk metadata does not record
49 * any ownership information, which limits what we can do to repair the
50 * mappings in the CoW fork. At most, we can replace ifork mappings that lack
51 * an entry in the refcount btree or are described by a reverse mapping record
52 * whose owner is not OWN_COW.
53 *
54 * Replacing extents is also tricky -- we can't touch written CoW fork extents
55 * since they are undergoing writeback, and delalloc extents do not require
56 * repair since they only exist incore. Hence the most we can do is find the
57 * bad parts of unwritten mappings, allocate a replacement set of blocks, and
58 * replace the incore mapping. We use the regular reaping process to unmap
59 * or free the discarded blocks, as appropriate.
60 */
61 struct xrep_cow {
62 struct xfs_scrub *sc;
63
64 /* Bitmap of file offset ranges that need replacing. */
65 struct xoff_bitmap bad_fileoffs;
66
67 /* Bitmap of fsblocks that were removed from the CoW fork. */
68 union {
69 struct xfsb_bitmap old_cowfork_fsblocks;
70 struct xrtb_bitmap old_cowfork_rtblocks;
71 };
72
73 /* CoW fork mappings used to scan for bad CoW staging extents. */
74 struct xfs_bmbt_irec irec;
75
76 /* refcount btree block number of irec.br_startblock */
77 unsigned int irec_startbno;
78
79 /* refcount btree block number of the next refcount record we expect */
80 unsigned int next_bno;
81 };
82
83 /* CoW staging extent. */
84 struct xrep_cow_extent {
85 xfs_fsblock_t fsbno;
86 xfs_extlen_t len;
87 };
88
89 /*
90 * Mark the part of the file range that corresponds to the given physical
91 * space. Caller must ensure that the physical range is within xc->irec.
92 */
93 STATIC int
xrep_cow_mark_file_range(struct xrep_cow * xc,xfs_fsblock_t startblock,xfs_filblks_t blockcount)94 xrep_cow_mark_file_range(
95 struct xrep_cow *xc,
96 xfs_fsblock_t startblock,
97 xfs_filblks_t blockcount)
98 {
99 xfs_fileoff_t startoff;
100
101 startoff = xc->irec.br_startoff +
102 (startblock - xc->irec.br_startblock);
103
104 trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
105 blockcount);
106
107 return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
108 }
109
110 /*
111 * Trim @src to fit within the CoW fork mapping being examined, and put the
112 * result in @dst.
113 */
114 static inline void
xrep_cow_trim_refcount(struct xrep_cow * xc,struct xfs_refcount_irec * dst,const struct xfs_refcount_irec * src)115 xrep_cow_trim_refcount(
116 struct xrep_cow *xc,
117 struct xfs_refcount_irec *dst,
118 const struct xfs_refcount_irec *src)
119 {
120 unsigned int adj;
121
122 memcpy(dst, src, sizeof(*dst));
123
124 if (dst->rc_startblock < xc->irec_startbno) {
125 adj = xc->irec_startbno - dst->rc_startblock;
126 dst->rc_blockcount -= adj;
127 dst->rc_startblock += adj;
128 }
129
130 if (dst->rc_startblock + dst->rc_blockcount >
131 xc->irec_startbno + xc->irec.br_blockcount) {
132 adj = (dst->rc_startblock + dst->rc_blockcount) -
133 (xc->irec_startbno + xc->irec.br_blockcount);
134 dst->rc_blockcount -= adj;
135 }
136 }
137
138 /* Mark any shared CoW staging extents. */
139 STATIC int
xrep_cow_mark_shared_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)140 xrep_cow_mark_shared_staging(
141 struct xfs_btree_cur *cur,
142 const struct xfs_refcount_irec *rec,
143 void *priv)
144 {
145 struct xrep_cow *xc = priv;
146 struct xfs_refcount_irec rrec;
147
148 if (!xfs_refcount_check_domain(rec) ||
149 rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
150 return -EFSCORRUPTED;
151
152 xrep_cow_trim_refcount(xc, &rrec, rec);
153
154 return xrep_cow_mark_file_range(xc,
155 xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock),
156 rrec.rc_blockcount);
157 }
158
159 /*
160 * Mark any portion of the CoW fork file offset range where there is not a CoW
161 * staging extent record in the refcountbt, and keep a record of where we did
162 * find correct refcountbt records. Staging records are always cleaned out at
163 * mount time, so any two inodes trying to map the same staging area would have
164 * already taken the fs down due to refcount btree verifier errors. Hence this
165 * inode should be the sole creator of the staging extent records ondisk.
166 */
167 STATIC int
xrep_cow_mark_missing_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)168 xrep_cow_mark_missing_staging(
169 struct xfs_btree_cur *cur,
170 const struct xfs_refcount_irec *rec,
171 void *priv)
172 {
173 struct xrep_cow *xc = priv;
174 struct xfs_refcount_irec rrec;
175 int error;
176
177 if (!xfs_refcount_check_domain(rec) ||
178 rec->rc_domain != XFS_REFC_DOMAIN_COW)
179 return -EFSCORRUPTED;
180
181 xrep_cow_trim_refcount(xc, &rrec, rec);
182
183 if (xc->next_bno >= rrec.rc_startblock)
184 goto next;
185
186 error = xrep_cow_mark_file_range(xc,
187 xfs_gbno_to_fsb(cur->bc_group, xc->next_bno),
188 rrec.rc_startblock - xc->next_bno);
189 if (error)
190 return error;
191
192 next:
193 xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
194 return 0;
195 }
196
197 /*
198 * Mark any area that does not correspond to a CoW staging rmap. These are
199 * cross-linked areas that must be avoided.
200 */
201 STATIC int
xrep_cow_mark_missing_staging_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)202 xrep_cow_mark_missing_staging_rmap(
203 struct xfs_btree_cur *cur,
204 const struct xfs_rmap_irec *rec,
205 void *priv)
206 {
207 struct xrep_cow *xc = priv;
208 xfs_agblock_t rec_bno;
209 xfs_extlen_t rec_len;
210 unsigned int adj;
211
212 if (rec->rm_owner == XFS_RMAP_OWN_COW)
213 return 0;
214
215 rec_bno = rec->rm_startblock;
216 rec_len = rec->rm_blockcount;
217 if (rec_bno < xc->irec_startbno) {
218 adj = xc->irec_startbno - rec_bno;
219 rec_len -= adj;
220 rec_bno += adj;
221 }
222
223 if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
224 adj = (rec_bno + rec_len) -
225 (xc->irec_startbno + xc->irec.br_blockcount);
226 rec_len -= adj;
227 }
228
229 return xrep_cow_mark_file_range(xc,
230 xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len);
231 }
232
233 /*
234 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
235 * extent and mark the corresponding part of the file range in the bitmap.
236 */
237 STATIC int
xrep_cow_find_bad(struct xrep_cow * xc)238 xrep_cow_find_bad(
239 struct xrep_cow *xc)
240 {
241 struct xfs_refcount_irec rc_low = { 0 };
242 struct xfs_refcount_irec rc_high = { 0 };
243 struct xfs_rmap_irec rm_low = { 0 };
244 struct xfs_rmap_irec rm_high = { 0 };
245 struct xfs_perag *pag;
246 struct xfs_scrub *sc = xc->sc;
247 xfs_agnumber_t agno;
248 int error;
249
250 agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
251 xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
252
253 pag = xfs_perag_get(sc->mp, agno);
254 if (!pag)
255 return -EFSCORRUPTED;
256
257 error = xrep_ag_init(sc, pag, &sc->sa);
258 if (error)
259 goto out_pag;
260
261 /* Mark any CoW fork extents that are shared. */
262 rc_low.rc_startblock = xc->irec_startbno;
263 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
264 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
265 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
266 xrep_cow_mark_shared_staging, xc);
267 if (error)
268 goto out_sa;
269
270 /* Make sure there are CoW staging extents for the whole mapping. */
271 rc_low.rc_startblock = xc->irec_startbno;
272 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
273 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
274 xc->next_bno = xc->irec_startbno;
275 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
276 xrep_cow_mark_missing_staging, xc);
277 if (error)
278 goto out_sa;
279
280 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
281 error = xrep_cow_mark_file_range(xc,
282 xfs_agbno_to_fsb(pag, xc->next_bno),
283 xc->irec_startbno + xc->irec.br_blockcount -
284 xc->next_bno);
285 if (error)
286 goto out_sa;
287 }
288
289 /* Mark any area has an rmap that isn't a COW staging extent. */
290 rm_low.rm_startblock = xc->irec_startbno;
291 memset(&rm_high, 0xFF, sizeof(rm_high));
292 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
293 error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
294 xrep_cow_mark_missing_staging_rmap, xc);
295 if (error)
296 goto out_sa;
297
298 /*
299 * If userspace is forcing us to rebuild the CoW fork or someone turned
300 * on the debugging knob, replace everything in the CoW fork.
301 */
302 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
303 XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
304 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
305 xc->irec.br_blockcount);
306
307 out_sa:
308 xchk_ag_free(sc, &sc->sa);
309 out_pag:
310 xfs_perag_put(pag);
311 return error;
312 }
313
314 /*
315 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
316 * extent and mark the corresponding part of the file range in the bitmap.
317 */
318 STATIC int
xrep_cow_find_bad_rt(struct xrep_cow * xc)319 xrep_cow_find_bad_rt(
320 struct xrep_cow *xc)
321 {
322 struct xfs_refcount_irec rc_low = { 0 };
323 struct xfs_refcount_irec rc_high = { 0 };
324 struct xfs_rmap_irec rm_low = { 0 };
325 struct xfs_rmap_irec rm_high = { 0 };
326 struct xfs_scrub *sc = xc->sc;
327 struct xfs_rtgroup *rtg;
328 int error = 0;
329
330 xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock);
331
332 rtg = xfs_rtgroup_get(sc->mp,
333 xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock));
334 if (!rtg)
335 return -EFSCORRUPTED;
336
337 error = xrep_rtgroup_init(sc, rtg, &sc->sr,
338 XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
339 if (error)
340 goto out_rtg;
341
342 /* Mark any CoW fork extents that are shared. */
343 rc_low.rc_startblock = xc->irec_startbno;
344 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
345 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
346 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
347 xrep_cow_mark_shared_staging, xc);
348 if (error)
349 goto out_sr;
350
351 /* Make sure there are CoW staging extents for the whole mapping. */
352 rc_low.rc_startblock = xc->irec_startbno;
353 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
354 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
355 xc->next_bno = xc->irec_startbno;
356 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
357 xrep_cow_mark_missing_staging, xc);
358 if (error)
359 goto out_sr;
360
361 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
362 error = xrep_cow_mark_file_range(xc,
363 xfs_rgbno_to_rtb(rtg, xc->next_bno),
364 xc->irec_startbno + xc->irec.br_blockcount -
365 xc->next_bno);
366 if (error)
367 goto out_sr;
368 }
369
370 /* Mark any area has an rmap that isn't a COW staging extent. */
371 rm_low.rm_startblock = xc->irec_startbno;
372 memset(&rm_high, 0xFF, sizeof(rm_high));
373 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
374 error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
375 xrep_cow_mark_missing_staging_rmap, xc);
376 if (error)
377 goto out_sr;
378
379 /*
380 * If userspace is forcing us to rebuild the CoW fork or someone
381 * turned on the debugging knob, replace everything in the
382 * CoW fork and then scan for staging extents in the refcountbt.
383 */
384 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
385 XFS_TEST_ERROR(sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
386 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
387 xc->irec.br_blockcount);
388
389 out_sr:
390 xchk_rtgroup_btcur_free(&sc->sr);
391 xchk_rtgroup_free(sc, &sc->sr);
392 out_rtg:
393 xfs_rtgroup_put(rtg);
394 return error;
395 }
396
397 /*
398 * Allocate a replacement CoW staging extent of up to the given number of
399 * blocks, and fill out the mapping.
400 */
401 STATIC int
xrep_cow_alloc(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)402 xrep_cow_alloc(
403 struct xfs_scrub *sc,
404 xfs_extlen_t maxlen,
405 struct xrep_cow_extent *repl)
406 {
407 struct xfs_alloc_arg args = {
408 .tp = sc->tp,
409 .mp = sc->mp,
410 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
411 .minlen = 1,
412 .maxlen = maxlen,
413 .prod = 1,
414 .resv = XFS_AG_RESV_NONE,
415 .datatype = XFS_ALLOC_USERDATA,
416 };
417 int error;
418
419 error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
420 if (error)
421 return error;
422
423 error = xfs_alloc_vextent_start_ag(&args,
424 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
425 if (error)
426 return error;
427 if (args.fsbno == NULLFSBLOCK)
428 return -ENOSPC;
429
430 xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
431
432 repl->fsbno = args.fsbno;
433 repl->len = args.len;
434 return 0;
435 }
436
437 /*
438 * Allocate a replacement rt CoW staging extent of up to the given number of
439 * blocks, and fill out the mapping.
440 */
441 STATIC int
xrep_cow_alloc_rt(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)442 xrep_cow_alloc_rt(
443 struct xfs_scrub *sc,
444 xfs_extlen_t maxlen,
445 struct xrep_cow_extent *repl)
446 {
447 xfs_rtxlen_t maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen);
448 int error;
449
450 error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
451 if (error)
452 return error;
453
454 error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false,
455 false, &repl->fsbno, &repl->len);
456 if (error)
457 return error;
458
459 xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len);
460 return 0;
461 }
462
463 /*
464 * Look up the current CoW fork mapping so that we only allocate enough to
465 * replace a single mapping. If we don't find a mapping that covers the start
466 * of the file range, or we find a delalloc or written extent, something is
467 * seriously wrong, since we didn't drop the ILOCK.
468 */
469 static inline int
xrep_cow_find_mapping(struct xrep_cow * xc,struct xfs_iext_cursor * icur,xfs_fileoff_t startoff,struct xfs_bmbt_irec * got)470 xrep_cow_find_mapping(
471 struct xrep_cow *xc,
472 struct xfs_iext_cursor *icur,
473 xfs_fileoff_t startoff,
474 struct xfs_bmbt_irec *got)
475 {
476 struct xfs_inode *ip = xc->sc->ip;
477 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
478
479 if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
480 goto bad;
481
482 if (got->br_startoff > startoff)
483 goto bad;
484
485 if (got->br_blockcount == 0)
486 goto bad;
487
488 if (isnullstartblock(got->br_startblock))
489 goto bad;
490
491 if (xfs_bmap_is_written_extent(got))
492 goto bad;
493
494 return 0;
495 bad:
496 ASSERT(0);
497 return -EFSCORRUPTED;
498 }
499
500 #define REPLACE_LEFT_SIDE (1U << 0)
501 #define REPLACE_RIGHT_SIDE (1U << 1)
502
503 /*
504 * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
505 * beginning of @got with the space described by @rep.
506 */
507 static inline void
xrep_cow_replace_mapping(struct xfs_inode * ip,struct xfs_iext_cursor * icur,const struct xfs_bmbt_irec * got,const struct xrep_cow_extent * repl)508 xrep_cow_replace_mapping(
509 struct xfs_inode *ip,
510 struct xfs_iext_cursor *icur,
511 const struct xfs_bmbt_irec *got,
512 const struct xrep_cow_extent *repl)
513 {
514 struct xfs_bmbt_irec new = *got; /* struct copy */
515
516 ASSERT(repl->len > 0);
517 ASSERT(!isnullstartblock(got->br_startblock));
518
519 trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
520
521 if (got->br_blockcount == repl->len) {
522 /*
523 * The new extent is a complete replacement for the existing
524 * extent. Update the COW fork record.
525 */
526 new.br_startblock = repl->fsbno;
527 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
528 return;
529 }
530
531 /*
532 * The new extent can replace the beginning of the COW fork record.
533 * Move the left side of @got upwards, then insert the new record.
534 */
535 new.br_startoff += repl->len;
536 new.br_startblock += repl->len;
537 new.br_blockcount -= repl->len;
538 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
539
540 new.br_startoff = got->br_startoff;
541 new.br_startblock = repl->fsbno;
542 new.br_blockcount = repl->len;
543 xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
544 }
545
546 /*
547 * Replace the unwritten CoW staging extent backing the given file range with a
548 * new space extent that isn't as problematic.
549 */
550 STATIC int
xrep_cow_replace_range(struct xrep_cow * xc,xfs_fileoff_t startoff,xfs_extlen_t * blockcount)551 xrep_cow_replace_range(
552 struct xrep_cow *xc,
553 xfs_fileoff_t startoff,
554 xfs_extlen_t *blockcount)
555 {
556 struct xfs_iext_cursor icur;
557 struct xrep_cow_extent repl;
558 struct xfs_bmbt_irec got;
559 struct xfs_scrub *sc = xc->sc;
560 xfs_fileoff_t nextoff;
561 xfs_extlen_t alloc_len;
562 int error;
563
564 /*
565 * Put the existing CoW fork mapping in @got. If @got ends before
566 * @rep, truncate @rep so we only replace one extent mapping at a time.
567 */
568 error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
569 if (error)
570 return error;
571 nextoff = min(startoff + *blockcount,
572 got.br_startoff + got.br_blockcount);
573
574 /*
575 * Allocate a replacement extent. If we don't fill all the blocks,
576 * shorten the quantity that will be deleted in this step.
577 */
578 alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
579 nextoff - startoff);
580 if (XFS_IS_REALTIME_INODE(sc->ip))
581 error = xrep_cow_alloc_rt(sc, alloc_len, &repl);
582 else
583 error = xrep_cow_alloc(sc, alloc_len, &repl);
584 if (error)
585 return error;
586
587 /*
588 * Replace the old mapping with the new one, and commit the metadata
589 * changes made so far.
590 */
591 xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
592
593 xfs_inode_set_cowblocks_tag(sc->ip);
594 error = xfs_defer_finish(&sc->tp);
595 if (error)
596 return error;
597
598 /* Note the old CoW staging extents; we'll reap them all later. */
599 if (XFS_IS_REALTIME_INODE(sc->ip))
600 error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
601 got.br_startblock, repl.len);
602 else
603 error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
604 got.br_startblock, repl.len);
605 if (error)
606 return error;
607
608 *blockcount = repl.len;
609 return 0;
610 }
611
612 /*
613 * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
614 * reservation.
615 */
616 STATIC int
xrep_cow_replace(uint64_t startoff,uint64_t blockcount,void * priv)617 xrep_cow_replace(
618 uint64_t startoff,
619 uint64_t blockcount,
620 void *priv)
621 {
622 struct xrep_cow *xc = priv;
623 int error = 0;
624
625 while (blockcount > 0) {
626 xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
627 XFS_MAX_BMBT_EXTLEN);
628
629 error = xrep_cow_replace_range(xc, startoff, &len);
630 if (error)
631 break;
632
633 blockcount -= len;
634 startoff += len;
635 }
636
637 return error;
638 }
639
640 /*
641 * Repair an inode's CoW fork. The CoW fork is an in-core structure, so
642 * there's no btree to rebuid. Instead, we replace any mappings that are
643 * cross-linked or lack ondisk CoW fork records in the refcount btree.
644 */
645 int
xrep_bmap_cow(struct xfs_scrub * sc)646 xrep_bmap_cow(
647 struct xfs_scrub *sc)
648 {
649 struct xrep_cow *xc;
650 struct xfs_iext_cursor icur;
651 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
652 int error;
653
654 if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
655 return -EOPNOTSUPP;
656
657 if (!ifp)
658 return 0;
659
660 /*
661 * Realtime files with large extent sizes are not supported because
662 * we could encounter an CoW mapping that has been partially written
663 * out *and* requires replacement, and there's no solution to that.
664 */
665 if (xfs_inode_has_bigrtalloc(sc->ip))
666 return -EOPNOTSUPP;
667
668 /* Metadata inodes aren't supposed to have data on the rt volume. */
669 if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip))
670 return -EOPNOTSUPP;
671
672 /*
673 * If we're somehow not in extents format, then reinitialize it to
674 * an empty extent mapping fork and exit.
675 */
676 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
677 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
678 ifp->if_nextents = 0;
679 return 0;
680 }
681
682 xc = kzalloc_obj(struct xrep_cow, XCHK_GFP_FLAGS);
683 if (!xc)
684 return -ENOMEM;
685
686 xfs_trans_ijoin(sc->tp, sc->ip, 0);
687
688 xc->sc = sc;
689 xoff_bitmap_init(&xc->bad_fileoffs);
690 if (XFS_IS_REALTIME_INODE(sc->ip))
691 xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
692 else
693 xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
694
695 for_each_xfs_iext(ifp, &icur, &xc->irec) {
696 if (xchk_should_terminate(sc, &error))
697 goto out_bitmap;
698
699 /*
700 * delalloc reservations only exist incore, so there is no
701 * ondisk metadata that we can examine. Hence we leave them
702 * alone.
703 */
704 if (isnullstartblock(xc->irec.br_startblock))
705 continue;
706
707 /*
708 * COW fork extents are only in the written state if writeback
709 * is actively writing to disk. We cannot restart the write
710 * at a different disk address since we've already issued the
711 * IO, so we leave these alone and hope for the best.
712 */
713 if (xfs_bmap_is_written_extent(&xc->irec))
714 continue;
715
716 if (XFS_IS_REALTIME_INODE(sc->ip))
717 error = xrep_cow_find_bad_rt(xc);
718 else
719 error = xrep_cow_find_bad(xc);
720 if (error)
721 goto out_bitmap;
722 }
723
724 /* Replace any bad unwritten mappings with fresh reservations. */
725 error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
726 if (error)
727 goto out_bitmap;
728
729 /*
730 * Reap as many of the old CoW blocks as we can. They are owned ondisk
731 * by the refcount btree, not the inode, so it is correct to treat them
732 * like inode metadata.
733 */
734 if (XFS_IS_REALTIME_INODE(sc->ip))
735 error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
736 &XFS_RMAP_OINFO_COW);
737 else
738 error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
739 &XFS_RMAP_OINFO_COW);
740 if (error)
741 goto out_bitmap;
742
743 out_bitmap:
744 if (XFS_IS_REALTIME_INODE(sc->ip))
745 xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
746 else
747 xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
748 xoff_bitmap_destroy(&xc->bad_fileoffs);
749 kfree(xc);
750 return error;
751 }
752