1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_inode_fork.h"
18 #include "xfs_alloc.h"
19 #include "xfs_bmap.h"
20 #include "xfs_rmap.h"
21 #include "xfs_refcount.h"
22 #include "xfs_quota.h"
23 #include "xfs_ialloc.h"
24 #include "xfs_ag.h"
25 #include "xfs_error.h"
26 #include "xfs_errortag.h"
27 #include "xfs_icache.h"
28 #include "xfs_refcount_btree.h"
29 #include "xfs_rtalloc.h"
30 #include "xfs_rtbitmap.h"
31 #include "xfs_rtgroup.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/bitmap.h"
38 #include "scrub/off_bitmap.h"
39 #include "scrub/fsb_bitmap.h"
40 #include "scrub/rtb_bitmap.h"
41 #include "scrub/reap.h"
42
43 /*
44 * CoW Fork Mapping Repair
45 * =======================
46 *
47 * Although CoW staging extents are owned by incore CoW inode forks, on disk
48 * they are owned by the refcount btree. The ondisk metadata does not record
49 * any ownership information, which limits what we can do to repair the
50 * mappings in the CoW fork. At most, we can replace ifork mappings that lack
51 * an entry in the refcount btree or are described by a reverse mapping record
52 * whose owner is not OWN_COW.
53 *
54 * Replacing extents is also tricky -- we can't touch written CoW fork extents
55 * since they are undergoing writeback, and delalloc extents do not require
56 * repair since they only exist incore. Hence the most we can do is find the
57 * bad parts of unwritten mappings, allocate a replacement set of blocks, and
58 * replace the incore mapping. We use the regular reaping process to unmap
59 * or free the discarded blocks, as appropriate.
60 */
61 struct xrep_cow {
62 struct xfs_scrub *sc;
63
64 /* Bitmap of file offset ranges that need replacing. */
65 struct xoff_bitmap bad_fileoffs;
66
67 /* Bitmap of fsblocks that were removed from the CoW fork. */
68 union {
69 struct xfsb_bitmap old_cowfork_fsblocks;
70 struct xrtb_bitmap old_cowfork_rtblocks;
71 };
72
73 /* CoW fork mappings used to scan for bad CoW staging extents. */
74 struct xfs_bmbt_irec irec;
75
76 /* refcount btree block number of irec.br_startblock */
77 unsigned int irec_startbno;
78
79 /* refcount btree block number of the next refcount record we expect */
80 unsigned int next_bno;
81 };
82
83 /* CoW staging extent. */
84 struct xrep_cow_extent {
85 xfs_fsblock_t fsbno;
86 xfs_extlen_t len;
87 };
88
89 /*
90 * Mark the part of the file range that corresponds to the given physical
91 * space. Caller must ensure that the physical range is within xc->irec.
92 */
93 STATIC int
xrep_cow_mark_file_range(struct xrep_cow * xc,xfs_fsblock_t startblock,xfs_filblks_t blockcount)94 xrep_cow_mark_file_range(
95 struct xrep_cow *xc,
96 xfs_fsblock_t startblock,
97 xfs_filblks_t blockcount)
98 {
99 xfs_fileoff_t startoff;
100
101 startoff = xc->irec.br_startoff +
102 (startblock - xc->irec.br_startblock);
103
104 trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff,
105 blockcount);
106
107 return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount);
108 }
109
110 /*
111 * Trim @src to fit within the CoW fork mapping being examined, and put the
112 * result in @dst.
113 */
114 static inline void
xrep_cow_trim_refcount(struct xrep_cow * xc,struct xfs_refcount_irec * dst,const struct xfs_refcount_irec * src)115 xrep_cow_trim_refcount(
116 struct xrep_cow *xc,
117 struct xfs_refcount_irec *dst,
118 const struct xfs_refcount_irec *src)
119 {
120 unsigned int adj;
121
122 memcpy(dst, src, sizeof(*dst));
123
124 if (dst->rc_startblock < xc->irec_startbno) {
125 adj = xc->irec_startbno - dst->rc_startblock;
126 dst->rc_blockcount -= adj;
127 dst->rc_startblock += adj;
128 }
129
130 if (dst->rc_startblock + dst->rc_blockcount >
131 xc->irec_startbno + xc->irec.br_blockcount) {
132 adj = (dst->rc_startblock + dst->rc_blockcount) -
133 (xc->irec_startbno + xc->irec.br_blockcount);
134 dst->rc_blockcount -= adj;
135 }
136 }
137
138 /* Mark any shared CoW staging extents. */
139 STATIC int
xrep_cow_mark_shared_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)140 xrep_cow_mark_shared_staging(
141 struct xfs_btree_cur *cur,
142 const struct xfs_refcount_irec *rec,
143 void *priv)
144 {
145 struct xrep_cow *xc = priv;
146 struct xfs_refcount_irec rrec;
147
148 if (!xfs_refcount_check_domain(rec) ||
149 rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
150 return -EFSCORRUPTED;
151
152 xrep_cow_trim_refcount(xc, &rrec, rec);
153
154 return xrep_cow_mark_file_range(xc,
155 xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock),
156 rrec.rc_blockcount);
157 }
158
159 /*
160 * Mark any portion of the CoW fork file offset range where there is not a CoW
161 * staging extent record in the refcountbt, and keep a record of where we did
162 * find correct refcountbt records. Staging records are always cleaned out at
163 * mount time, so any two inodes trying to map the same staging area would have
164 * already taken the fs down due to refcount btree verifier errors. Hence this
165 * inode should be the sole creator of the staging extent records ondisk.
166 */
167 STATIC int
xrep_cow_mark_missing_staging(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * rec,void * priv)168 xrep_cow_mark_missing_staging(
169 struct xfs_btree_cur *cur,
170 const struct xfs_refcount_irec *rec,
171 void *priv)
172 {
173 struct xrep_cow *xc = priv;
174 struct xfs_refcount_irec rrec;
175 int error;
176
177 if (!xfs_refcount_check_domain(rec) ||
178 rec->rc_domain != XFS_REFC_DOMAIN_COW)
179 return -EFSCORRUPTED;
180
181 xrep_cow_trim_refcount(xc, &rrec, rec);
182
183 if (xc->next_bno >= rrec.rc_startblock)
184 goto next;
185
186 error = xrep_cow_mark_file_range(xc,
187 xfs_gbno_to_fsb(cur->bc_group, xc->next_bno),
188 rrec.rc_startblock - xc->next_bno);
189 if (error)
190 return error;
191
192 next:
193 xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount;
194 return 0;
195 }
196
197 /*
198 * Mark any area that does not correspond to a CoW staging rmap. These are
199 * cross-linked areas that must be avoided.
200 */
201 STATIC int
xrep_cow_mark_missing_staging_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)202 xrep_cow_mark_missing_staging_rmap(
203 struct xfs_btree_cur *cur,
204 const struct xfs_rmap_irec *rec,
205 void *priv)
206 {
207 struct xrep_cow *xc = priv;
208 xfs_agblock_t rec_bno;
209 xfs_extlen_t rec_len;
210 unsigned int adj;
211
212 if (rec->rm_owner == XFS_RMAP_OWN_COW)
213 return 0;
214
215 rec_bno = rec->rm_startblock;
216 rec_len = rec->rm_blockcount;
217 if (rec_bno < xc->irec_startbno) {
218 adj = xc->irec_startbno - rec_bno;
219 rec_len -= adj;
220 rec_bno += adj;
221 }
222
223 if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) {
224 adj = (rec_bno + rec_len) -
225 (xc->irec_startbno + xc->irec.br_blockcount);
226 rec_len -= adj;
227 }
228
229 return xrep_cow_mark_file_range(xc,
230 xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len);
231 }
232
233 /*
234 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
235 * extent and mark the corresponding part of the file range in the bitmap.
236 */
237 STATIC int
xrep_cow_find_bad(struct xrep_cow * xc)238 xrep_cow_find_bad(
239 struct xrep_cow *xc)
240 {
241 struct xfs_refcount_irec rc_low = { 0 };
242 struct xfs_refcount_irec rc_high = { 0 };
243 struct xfs_rmap_irec rm_low = { 0 };
244 struct xfs_rmap_irec rm_high = { 0 };
245 struct xfs_perag *pag;
246 struct xfs_scrub *sc = xc->sc;
247 xfs_agnumber_t agno;
248 int error;
249
250 agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock);
251 xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock);
252
253 pag = xfs_perag_get(sc->mp, agno);
254 if (!pag)
255 return -EFSCORRUPTED;
256
257 error = xrep_ag_init(sc, pag, &sc->sa);
258 if (error)
259 goto out_pag;
260
261 /* Mark any CoW fork extents that are shared. */
262 rc_low.rc_startblock = xc->irec_startbno;
263 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
264 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
265 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
266 xrep_cow_mark_shared_staging, xc);
267 if (error)
268 goto out_sa;
269
270 /* Make sure there are CoW staging extents for the whole mapping. */
271 rc_low.rc_startblock = xc->irec_startbno;
272 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
273 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
274 xc->next_bno = xc->irec_startbno;
275 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high,
276 xrep_cow_mark_missing_staging, xc);
277 if (error)
278 goto out_sa;
279
280 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
281 error = xrep_cow_mark_file_range(xc,
282 xfs_agbno_to_fsb(pag, xc->next_bno),
283 xc->irec_startbno + xc->irec.br_blockcount -
284 xc->next_bno);
285 if (error)
286 goto out_sa;
287 }
288
289 /* Mark any area has an rmap that isn't a COW staging extent. */
290 rm_low.rm_startblock = xc->irec_startbno;
291 memset(&rm_high, 0xFF, sizeof(rm_high));
292 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
293 error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high,
294 xrep_cow_mark_missing_staging_rmap, xc);
295 if (error)
296 goto out_sa;
297
298 /*
299 * If userspace is forcing us to rebuild the CoW fork or someone turned
300 * on the debugging knob, replace everything in the CoW fork.
301 */
302 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
303 XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
304 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
305 xc->irec.br_blockcount);
306 if (error)
307 return error;
308 }
309
310 out_sa:
311 xchk_ag_free(sc, &sc->sa);
312 out_pag:
313 xfs_perag_put(pag);
314 return 0;
315 }
316
317 /*
318 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
319 * extent and mark the corresponding part of the file range in the bitmap.
320 */
321 STATIC int
xrep_cow_find_bad_rt(struct xrep_cow * xc)322 xrep_cow_find_bad_rt(
323 struct xrep_cow *xc)
324 {
325 struct xfs_refcount_irec rc_low = { 0 };
326 struct xfs_refcount_irec rc_high = { 0 };
327 struct xfs_rmap_irec rm_low = { 0 };
328 struct xfs_rmap_irec rm_high = { 0 };
329 struct xfs_scrub *sc = xc->sc;
330 struct xfs_rtgroup *rtg;
331 int error = 0;
332
333 xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock);
334
335 rtg = xfs_rtgroup_get(sc->mp,
336 xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock));
337 if (!rtg)
338 return -EFSCORRUPTED;
339
340 error = xrep_rtgroup_init(sc, rtg, &sc->sr,
341 XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
342 if (error)
343 goto out_rtg;
344
345 /* Mark any CoW fork extents that are shared. */
346 rc_low.rc_startblock = xc->irec_startbno;
347 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
348 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
349 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
350 xrep_cow_mark_shared_staging, xc);
351 if (error)
352 goto out_sr;
353
354 /* Make sure there are CoW staging extents for the whole mapping. */
355 rc_low.rc_startblock = xc->irec_startbno;
356 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
357 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
358 xc->next_bno = xc->irec_startbno;
359 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
360 xrep_cow_mark_missing_staging, xc);
361 if (error)
362 goto out_sr;
363
364 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
365 error = xrep_cow_mark_file_range(xc,
366 xfs_rgbno_to_rtb(rtg, xc->next_bno),
367 xc->irec_startbno + xc->irec.br_blockcount -
368 xc->next_bno);
369 if (error)
370 goto out_sr;
371 }
372
373 /* Mark any area has an rmap that isn't a COW staging extent. */
374 rm_low.rm_startblock = xc->irec_startbno;
375 memset(&rm_high, 0xFF, sizeof(rm_high));
376 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
377 error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
378 xrep_cow_mark_missing_staging_rmap, xc);
379 if (error)
380 goto out_sr;
381
382 /*
383 * If userspace is forcing us to rebuild the CoW fork or someone
384 * turned on the debugging knob, replace everything in the
385 * CoW fork and then scan for staging extents in the refcountbt.
386 */
387 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
388 XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
389 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
390 xc->irec.br_blockcount);
391 if (error)
392 goto out_rtg;
393 }
394
395 out_sr:
396 xchk_rtgroup_btcur_free(&sc->sr);
397 xchk_rtgroup_free(sc, &sc->sr);
398 out_rtg:
399 xfs_rtgroup_put(rtg);
400 return error;
401 }
402
403 /*
404 * Allocate a replacement CoW staging extent of up to the given number of
405 * blocks, and fill out the mapping.
406 */
407 STATIC int
xrep_cow_alloc(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)408 xrep_cow_alloc(
409 struct xfs_scrub *sc,
410 xfs_extlen_t maxlen,
411 struct xrep_cow_extent *repl)
412 {
413 struct xfs_alloc_arg args = {
414 .tp = sc->tp,
415 .mp = sc->mp,
416 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE,
417 .minlen = 1,
418 .maxlen = maxlen,
419 .prod = 1,
420 .resv = XFS_AG_RESV_NONE,
421 .datatype = XFS_ALLOC_USERDATA,
422 };
423 int error;
424
425 error = xfs_trans_reserve_more(sc->tp, maxlen, 0);
426 if (error)
427 return error;
428
429 error = xfs_alloc_vextent_start_ag(&args,
430 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino));
431 if (error)
432 return error;
433 if (args.fsbno == NULLFSBLOCK)
434 return -ENOSPC;
435
436 xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
437
438 repl->fsbno = args.fsbno;
439 repl->len = args.len;
440 return 0;
441 }
442
443 /*
444 * Allocate a replacement rt CoW staging extent of up to the given number of
445 * blocks, and fill out the mapping.
446 */
447 STATIC int
xrep_cow_alloc_rt(struct xfs_scrub * sc,xfs_extlen_t maxlen,struct xrep_cow_extent * repl)448 xrep_cow_alloc_rt(
449 struct xfs_scrub *sc,
450 xfs_extlen_t maxlen,
451 struct xrep_cow_extent *repl)
452 {
453 xfs_rtxlen_t maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen);
454 int error;
455
456 error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
457 if (error)
458 return error;
459
460 error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false,
461 false, &repl->fsbno, &repl->len);
462 if (error)
463 return error;
464
465 xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len);
466 return 0;
467 }
468
469 /*
470 * Look up the current CoW fork mapping so that we only allocate enough to
471 * replace a single mapping. If we don't find a mapping that covers the start
472 * of the file range, or we find a delalloc or written extent, something is
473 * seriously wrong, since we didn't drop the ILOCK.
474 */
475 static inline int
xrep_cow_find_mapping(struct xrep_cow * xc,struct xfs_iext_cursor * icur,xfs_fileoff_t startoff,struct xfs_bmbt_irec * got)476 xrep_cow_find_mapping(
477 struct xrep_cow *xc,
478 struct xfs_iext_cursor *icur,
479 xfs_fileoff_t startoff,
480 struct xfs_bmbt_irec *got)
481 {
482 struct xfs_inode *ip = xc->sc->ip;
483 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
484
485 if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got))
486 goto bad;
487
488 if (got->br_startoff > startoff)
489 goto bad;
490
491 if (got->br_blockcount == 0)
492 goto bad;
493
494 if (isnullstartblock(got->br_startblock))
495 goto bad;
496
497 if (xfs_bmap_is_written_extent(got))
498 goto bad;
499
500 return 0;
501 bad:
502 ASSERT(0);
503 return -EFSCORRUPTED;
504 }
505
506 #define REPLACE_LEFT_SIDE (1U << 0)
507 #define REPLACE_RIGHT_SIDE (1U << 1)
508
509 /*
510 * Given a CoW fork mapping @got and a replacement mapping @repl, remap the
511 * beginning of @got with the space described by @rep.
512 */
513 static inline void
xrep_cow_replace_mapping(struct xfs_inode * ip,struct xfs_iext_cursor * icur,const struct xfs_bmbt_irec * got,const struct xrep_cow_extent * repl)514 xrep_cow_replace_mapping(
515 struct xfs_inode *ip,
516 struct xfs_iext_cursor *icur,
517 const struct xfs_bmbt_irec *got,
518 const struct xrep_cow_extent *repl)
519 {
520 struct xfs_bmbt_irec new = *got; /* struct copy */
521
522 ASSERT(repl->len > 0);
523 ASSERT(!isnullstartblock(got->br_startblock));
524
525 trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len);
526
527 if (got->br_blockcount == repl->len) {
528 /*
529 * The new extent is a complete replacement for the existing
530 * extent. Update the COW fork record.
531 */
532 new.br_startblock = repl->fsbno;
533 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
534 return;
535 }
536
537 /*
538 * The new extent can replace the beginning of the COW fork record.
539 * Move the left side of @got upwards, then insert the new record.
540 */
541 new.br_startoff += repl->len;
542 new.br_startblock += repl->len;
543 new.br_blockcount -= repl->len;
544 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new);
545
546 new.br_startoff = got->br_startoff;
547 new.br_startblock = repl->fsbno;
548 new.br_blockcount = repl->len;
549 xfs_iext_insert(ip, icur, &new, BMAP_COWFORK);
550 }
551
552 /*
553 * Replace the unwritten CoW staging extent backing the given file range with a
554 * new space extent that isn't as problematic.
555 */
556 STATIC int
xrep_cow_replace_range(struct xrep_cow * xc,xfs_fileoff_t startoff,xfs_extlen_t * blockcount)557 xrep_cow_replace_range(
558 struct xrep_cow *xc,
559 xfs_fileoff_t startoff,
560 xfs_extlen_t *blockcount)
561 {
562 struct xfs_iext_cursor icur;
563 struct xrep_cow_extent repl;
564 struct xfs_bmbt_irec got;
565 struct xfs_scrub *sc = xc->sc;
566 xfs_fileoff_t nextoff;
567 xfs_extlen_t alloc_len;
568 int error;
569
570 /*
571 * Put the existing CoW fork mapping in @got. If @got ends before
572 * @rep, truncate @rep so we only replace one extent mapping at a time.
573 */
574 error = xrep_cow_find_mapping(xc, &icur, startoff, &got);
575 if (error)
576 return error;
577 nextoff = min(startoff + *blockcount,
578 got.br_startoff + got.br_blockcount);
579
580 /*
581 * Allocate a replacement extent. If we don't fill all the blocks,
582 * shorten the quantity that will be deleted in this step.
583 */
584 alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
585 nextoff - startoff);
586 if (XFS_IS_REALTIME_INODE(sc->ip))
587 error = xrep_cow_alloc_rt(sc, alloc_len, &repl);
588 else
589 error = xrep_cow_alloc(sc, alloc_len, &repl);
590 if (error)
591 return error;
592
593 /*
594 * Replace the old mapping with the new one, and commit the metadata
595 * changes made so far.
596 */
597 xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl);
598
599 xfs_inode_set_cowblocks_tag(sc->ip);
600 error = xfs_defer_finish(&sc->tp);
601 if (error)
602 return error;
603
604 /* Note the old CoW staging extents; we'll reap them all later. */
605 if (XFS_IS_REALTIME_INODE(sc->ip))
606 error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
607 got.br_startblock, repl.len);
608 else
609 error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
610 got.br_startblock, repl.len);
611 if (error)
612 return error;
613
614 *blockcount = repl.len;
615 return 0;
616 }
617
618 /*
619 * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc
620 * reservation.
621 */
622 STATIC int
xrep_cow_replace(uint64_t startoff,uint64_t blockcount,void * priv)623 xrep_cow_replace(
624 uint64_t startoff,
625 uint64_t blockcount,
626 void *priv)
627 {
628 struct xrep_cow *xc = priv;
629 int error = 0;
630
631 while (blockcount > 0) {
632 xfs_extlen_t len = min_t(xfs_filblks_t, blockcount,
633 XFS_MAX_BMBT_EXTLEN);
634
635 error = xrep_cow_replace_range(xc, startoff, &len);
636 if (error)
637 break;
638
639 blockcount -= len;
640 startoff += len;
641 }
642
643 return error;
644 }
645
646 /*
647 * Repair an inode's CoW fork. The CoW fork is an in-core structure, so
648 * there's no btree to rebuid. Instead, we replace any mappings that are
649 * cross-linked or lack ondisk CoW fork records in the refcount btree.
650 */
651 int
xrep_bmap_cow(struct xfs_scrub * sc)652 xrep_bmap_cow(
653 struct xfs_scrub *sc)
654 {
655 struct xrep_cow *xc;
656 struct xfs_iext_cursor icur;
657 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK);
658 int error;
659
660 if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp))
661 return -EOPNOTSUPP;
662
663 if (!ifp)
664 return 0;
665
666 /*
667 * Realtime files with large extent sizes are not supported because
668 * we could encounter an CoW mapping that has been partially written
669 * out *and* requires replacement, and there's no solution to that.
670 */
671 if (xfs_inode_has_bigrtalloc(sc->ip))
672 return -EOPNOTSUPP;
673
674 /* Metadata inodes aren't supposed to have data on the rt volume. */
675 if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip))
676 return -EOPNOTSUPP;
677
678 /*
679 * If we're somehow not in extents format, then reinitialize it to
680 * an empty extent mapping fork and exit.
681 */
682 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
683 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
684 ifp->if_nextents = 0;
685 return 0;
686 }
687
688 xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS);
689 if (!xc)
690 return -ENOMEM;
691
692 xfs_trans_ijoin(sc->tp, sc->ip, 0);
693
694 xc->sc = sc;
695 xoff_bitmap_init(&xc->bad_fileoffs);
696 if (XFS_IS_REALTIME_INODE(sc->ip))
697 xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
698 else
699 xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
700
701 for_each_xfs_iext(ifp, &icur, &xc->irec) {
702 if (xchk_should_terminate(sc, &error))
703 goto out_bitmap;
704
705 /*
706 * delalloc reservations only exist incore, so there is no
707 * ondisk metadata that we can examine. Hence we leave them
708 * alone.
709 */
710 if (isnullstartblock(xc->irec.br_startblock))
711 continue;
712
713 /*
714 * COW fork extents are only in the written state if writeback
715 * is actively writing to disk. We cannot restart the write
716 * at a different disk address since we've already issued the
717 * IO, so we leave these alone and hope for the best.
718 */
719 if (xfs_bmap_is_written_extent(&xc->irec))
720 continue;
721
722 if (XFS_IS_REALTIME_INODE(sc->ip))
723 error = xrep_cow_find_bad_rt(xc);
724 else
725 error = xrep_cow_find_bad(xc);
726 if (error)
727 goto out_bitmap;
728 }
729
730 /* Replace any bad unwritten mappings with fresh reservations. */
731 error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc);
732 if (error)
733 goto out_bitmap;
734
735 /*
736 * Reap as many of the old CoW blocks as we can. They are owned ondisk
737 * by the refcount btree, not the inode, so it is correct to treat them
738 * like inode metadata.
739 */
740 if (XFS_IS_REALTIME_INODE(sc->ip))
741 error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
742 &XFS_RMAP_OINFO_COW);
743 else
744 error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
745 &XFS_RMAP_OINFO_COW);
746 if (error)
747 goto out_bitmap;
748
749 out_bitmap:
750 if (XFS_IS_REALTIME_INODE(sc->ip))
751 xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
752 else
753 xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
754 xoff_bitmap_destroy(&xc->bad_fileoffs);
755 kfree(xc);
756 return error;
757 }
758