1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_inode.h"
20 #include "xfs_inode_fork.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rtalloc.h"
23 #include "xfs_bmap.h"
24 #include "xfs_bmap_util.h"
25 #include "xfs_bmap_btree.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_refcount.h"
29 #include "xfs_quota.h"
30 #include "xfs_ialloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_reflink.h"
33 #include "scrub/xfs_scrub.h"
34 #include "scrub/scrub.h"
35 #include "scrub/common.h"
36 #include "scrub/btree.h"
37 #include "scrub/trace.h"
38 #include "scrub/repair.h"
39 #include "scrub/bitmap.h"
40 #include "scrub/fsb_bitmap.h"
41 #include "scrub/xfile.h"
42 #include "scrub/xfarray.h"
43 #include "scrub/newbt.h"
44 #include "scrub/reap.h"
45
46 /*
47 * Inode Fork Block Mapping (BMBT) Repair
48 * ======================================
49 *
50 * Gather all the rmap records for the inode and fork we're fixing, reset the
51 * incore fork, then recreate the btree.
52 */
53
54 enum reflink_scan_state {
55 RLS_IRRELEVANT = -1, /* not applicable to this file */
56 RLS_UNKNOWN, /* shared extent scans required */
57 RLS_SET_IFLAG, /* iflag must be set */
58 };
59
60 struct xrep_bmap {
61 /* Old bmbt blocks */
62 struct xfsb_bitmap old_bmbt_blocks;
63
64 /* New fork. */
65 struct xrep_newbt new_bmapbt;
66
67 /* List of new bmap records. */
68 struct xfarray *bmap_records;
69
70 struct xfs_scrub *sc;
71
72 /* How many blocks did we find allocated to this file? */
73 xfs_rfsblock_t nblocks;
74
75 /* How many bmbt blocks did we find for this fork? */
76 xfs_rfsblock_t old_bmbt_block_count;
77
78 /* get_records()'s position in the free space record array. */
79 xfarray_idx_t array_cur;
80
81 /* How many real (non-hole, non-delalloc) mappings do we have? */
82 uint64_t real_mappings;
83
84 /* Which fork are we fixing? */
85 int whichfork;
86
87 /* What d the REFLINK flag be set when the repair is over? */
88 enum reflink_scan_state reflink_scan;
89
90 /* Do we allow unwritten extents? */
91 bool allow_unwritten;
92 };
93
94 /* Is this space extent shared? Flag the inode if it is. */
95 STATIC int
xrep_bmap_discover_shared(struct xrep_bmap * rb,xfs_fsblock_t startblock,xfs_filblks_t blockcount)96 xrep_bmap_discover_shared(
97 struct xrep_bmap *rb,
98 xfs_fsblock_t startblock,
99 xfs_filblks_t blockcount)
100 {
101 struct xfs_scrub *sc = rb->sc;
102 xfs_agblock_t agbno;
103 xfs_agblock_t fbno;
104 xfs_extlen_t flen;
105 int error;
106
107 agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
108 error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount,
109 &fbno, &flen, false);
110 if (error)
111 return error;
112
113 if (fbno != NULLAGBLOCK)
114 rb->reflink_scan = RLS_SET_IFLAG;
115
116 return 0;
117 }
118
119 /* Remember this reverse-mapping as a series of bmap records. */
120 STATIC int
xrep_bmap_from_rmap(struct xrep_bmap * rb,xfs_fileoff_t startoff,xfs_fsblock_t startblock,xfs_filblks_t blockcount,bool unwritten)121 xrep_bmap_from_rmap(
122 struct xrep_bmap *rb,
123 xfs_fileoff_t startoff,
124 xfs_fsblock_t startblock,
125 xfs_filblks_t blockcount,
126 bool unwritten)
127 {
128 struct xfs_bmbt_irec irec = {
129 .br_startoff = startoff,
130 .br_startblock = startblock,
131 .br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
132 };
133 struct xfs_bmbt_rec rbe;
134 struct xfs_scrub *sc = rb->sc;
135 int error = 0;
136
137 /*
138 * If we're repairing the data fork of a non-reflinked regular file on
139 * a reflink filesystem, we need to figure out if this space extent is
140 * shared.
141 */
142 if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
143 error = xrep_bmap_discover_shared(rb, startblock, blockcount);
144 if (error)
145 return error;
146 }
147
148 do {
149 xfs_failaddr_t fa;
150
151 irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
152 XFS_MAX_BMBT_EXTLEN);
153
154 fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
155 if (fa)
156 return -EFSCORRUPTED;
157
158 xfs_bmbt_disk_set_all(&rbe, &irec);
159
160 trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
161
162 if (xchk_should_terminate(sc, &error))
163 return error;
164
165 error = xfarray_append(rb->bmap_records, &rbe);
166 if (error)
167 return error;
168
169 rb->real_mappings++;
170
171 irec.br_startblock += irec.br_blockcount;
172 irec.br_startoff += irec.br_blockcount;
173 blockcount -= irec.br_blockcount;
174 } while (blockcount > 0);
175
176 return 0;
177 }
178
179 /* Check for any obvious errors or conflicts in the file mapping. */
180 STATIC int
xrep_bmap_check_fork_rmap(struct xrep_bmap * rb,struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec)181 xrep_bmap_check_fork_rmap(
182 struct xrep_bmap *rb,
183 struct xfs_btree_cur *cur,
184 const struct xfs_rmap_irec *rec)
185 {
186 struct xfs_scrub *sc = rb->sc;
187 enum xbtree_recpacking outcome;
188 int error;
189
190 /*
191 * Data extents for rt files are never stored on the data device, but
192 * everything else (xattrs, bmbt blocks) can be.
193 */
194 if (XFS_IS_REALTIME_INODE(sc->ip) &&
195 !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
196 return -EFSCORRUPTED;
197
198 /* Check that this is within the AG. */
199 if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
200 rec->rm_blockcount))
201 return -EFSCORRUPTED;
202
203 /* Check the file offset range. */
204 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
205 !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
206 return -EFSCORRUPTED;
207
208 /* No contradictory flags. */
209 if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
210 (rec->rm_flags & XFS_RMAP_UNWRITTEN))
211 return -EFSCORRUPTED;
212
213 /* Make sure this isn't free space. */
214 error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
215 rec->rm_blockcount, &outcome);
216 if (error)
217 return error;
218 if (outcome != XBTREE_RECPACKING_EMPTY)
219 return -EFSCORRUPTED;
220
221 /* Must not be an inode chunk. */
222 error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
223 rec->rm_startblock, rec->rm_blockcount, &outcome);
224 if (error)
225 return error;
226 if (outcome != XBTREE_RECPACKING_EMPTY)
227 return -EFSCORRUPTED;
228
229 return 0;
230 }
231
232 /* Record extents that belong to this inode's fork. */
233 STATIC int
xrep_bmap_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)234 xrep_bmap_walk_rmap(
235 struct xfs_btree_cur *cur,
236 const struct xfs_rmap_irec *rec,
237 void *priv)
238 {
239 struct xrep_bmap *rb = priv;
240 struct xfs_mount *mp = cur->bc_mp;
241 xfs_fsblock_t fsbno;
242 int error = 0;
243
244 if (xchk_should_terminate(rb->sc, &error))
245 return error;
246
247 if (rec->rm_owner != rb->sc->ip->i_ino)
248 return 0;
249
250 error = xrep_bmap_check_fork_rmap(rb, cur, rec);
251 if (error)
252 return error;
253
254 /*
255 * Record all blocks allocated to this file even if the extent isn't
256 * for the fork we're rebuilding so that we can reset di_nblocks later.
257 */
258 rb->nblocks += rec->rm_blockcount;
259
260 /* If this rmap isn't for the fork we want, we're done. */
261 if (rb->whichfork == XFS_DATA_FORK &&
262 (rec->rm_flags & XFS_RMAP_ATTR_FORK))
263 return 0;
264 if (rb->whichfork == XFS_ATTR_FORK &&
265 !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
266 return 0;
267
268 /* Reject unwritten extents if we don't allow those. */
269 if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
270 return -EFSCORRUPTED;
271
272 fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
273 rec->rm_startblock);
274
275 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
276 rb->old_bmbt_block_count += rec->rm_blockcount;
277 return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
278 rec->rm_blockcount);
279 }
280
281 return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
282 rec->rm_blockcount,
283 rec->rm_flags & XFS_RMAP_UNWRITTEN);
284 }
285
286 /*
287 * Compare two block mapping records. We want to sort in order of increasing
288 * file offset.
289 */
290 static int
xrep_bmap_extent_cmp(const void * a,const void * b)291 xrep_bmap_extent_cmp(
292 const void *a,
293 const void *b)
294 {
295 const struct xfs_bmbt_rec *ba = a;
296 const struct xfs_bmbt_rec *bb = b;
297 xfs_fileoff_t ao = xfs_bmbt_disk_get_startoff(ba);
298 xfs_fileoff_t bo = xfs_bmbt_disk_get_startoff(bb);
299
300 if (ao > bo)
301 return 1;
302 else if (ao < bo)
303 return -1;
304 return 0;
305 }
306
307 /*
308 * Sort the bmap extents by fork offset or else the records will be in the
309 * wrong order. Ensure there are no overlaps in the file offset ranges.
310 */
311 STATIC int
xrep_bmap_sort_records(struct xrep_bmap * rb)312 xrep_bmap_sort_records(
313 struct xrep_bmap *rb)
314 {
315 struct xfs_bmbt_irec irec;
316 xfs_fileoff_t next_off = 0;
317 xfarray_idx_t array_cur;
318 int error;
319
320 error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
321 XFARRAY_SORT_KILLABLE);
322 if (error)
323 return error;
324
325 foreach_xfarray_idx(rb->bmap_records, array_cur) {
326 struct xfs_bmbt_rec rec;
327
328 if (xchk_should_terminate(rb->sc, &error))
329 return error;
330
331 error = xfarray_load(rb->bmap_records, array_cur, &rec);
332 if (error)
333 return error;
334
335 xfs_bmbt_disk_get_all(&rec, &irec);
336
337 if (irec.br_startoff < next_off)
338 return -EFSCORRUPTED;
339
340 next_off = irec.br_startoff + irec.br_blockcount;
341 }
342
343 return 0;
344 }
345
346 /* Scan one AG for reverse mappings that we can turn into extent maps. */
347 STATIC int
xrep_bmap_scan_ag(struct xrep_bmap * rb,struct xfs_perag * pag)348 xrep_bmap_scan_ag(
349 struct xrep_bmap *rb,
350 struct xfs_perag *pag)
351 {
352 struct xfs_scrub *sc = rb->sc;
353 int error;
354
355 error = xrep_ag_init(sc, pag, &sc->sa);
356 if (error)
357 return error;
358
359 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
360 xchk_ag_free(sc, &sc->sa);
361 return error;
362 }
363
364 /* Find the delalloc extents from the old incore extent tree. */
365 STATIC int
xrep_bmap_find_delalloc(struct xrep_bmap * rb)366 xrep_bmap_find_delalloc(
367 struct xrep_bmap *rb)
368 {
369 struct xfs_bmbt_irec irec;
370 struct xfs_iext_cursor icur;
371 struct xfs_bmbt_rec rbe;
372 struct xfs_inode *ip = rb->sc->ip;
373 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, rb->whichfork);
374 int error = 0;
375
376 /*
377 * Skip this scan if we don't expect to find delayed allocation
378 * reservations in this fork.
379 */
380 if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
381 return 0;
382
383 for_each_xfs_iext(ifp, &icur, &irec) {
384 if (!isnullstartblock(irec.br_startblock))
385 continue;
386
387 xfs_bmbt_disk_set_all(&rbe, &irec);
388
389 trace_xrep_bmap_found(ip, rb->whichfork, &irec);
390
391 if (xchk_should_terminate(rb->sc, &error))
392 return error;
393
394 error = xfarray_append(rb->bmap_records, &rbe);
395 if (error)
396 return error;
397 }
398
399 return 0;
400 }
401
402 /*
403 * Collect block mappings for this fork of this inode and decide if we have
404 * enough space to rebuild. Caller is responsible for cleaning up the list if
405 * anything goes wrong.
406 */
407 STATIC int
xrep_bmap_find_mappings(struct xrep_bmap * rb)408 xrep_bmap_find_mappings(
409 struct xrep_bmap *rb)
410 {
411 struct xfs_scrub *sc = rb->sc;
412 struct xfs_perag *pag;
413 xfs_agnumber_t agno;
414 int error = 0;
415
416 /* Iterate the rmaps for extents. */
417 for_each_perag(sc->mp, agno, pag) {
418 error = xrep_bmap_scan_ag(rb, pag);
419 if (error) {
420 xfs_perag_rele(pag);
421 return error;
422 }
423 }
424
425 return xrep_bmap_find_delalloc(rb);
426 }
427
428 /* Retrieve real extent mappings for bulk loading the bmap btree. */
429 STATIC int
xrep_bmap_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)430 xrep_bmap_get_records(
431 struct xfs_btree_cur *cur,
432 unsigned int idx,
433 struct xfs_btree_block *block,
434 unsigned int nr_wanted,
435 void *priv)
436 {
437 struct xfs_bmbt_rec rec;
438 struct xfs_bmbt_irec *irec = &cur->bc_rec.b;
439 struct xrep_bmap *rb = priv;
440 union xfs_btree_rec *block_rec;
441 unsigned int loaded;
442 int error;
443
444 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
445 do {
446 error = xfarray_load(rb->bmap_records, rb->array_cur++,
447 &rec);
448 if (error)
449 return error;
450
451 xfs_bmbt_disk_get_all(&rec, irec);
452 } while (isnullstartblock(irec->br_startblock));
453
454 block_rec = xfs_btree_rec_addr(cur, idx, block);
455 cur->bc_ops->init_rec_from_cur(cur, block_rec);
456 }
457
458 return loaded;
459 }
460
461 /* Feed one of the new btree blocks to the bulk loader. */
462 STATIC int
xrep_bmap_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)463 xrep_bmap_claim_block(
464 struct xfs_btree_cur *cur,
465 union xfs_btree_ptr *ptr,
466 void *priv)
467 {
468 struct xrep_bmap *rb = priv;
469
470 return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
471 }
472
473 /* Figure out how much space we need to create the incore btree root block. */
474 STATIC size_t
xrep_bmap_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)475 xrep_bmap_iroot_size(
476 struct xfs_btree_cur *cur,
477 unsigned int level,
478 unsigned int nr_this_level,
479 void *priv)
480 {
481 ASSERT(level > 0);
482
483 return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level);
484 }
485
486 /* Update the inode counters. */
487 STATIC int
xrep_bmap_reset_counters(struct xrep_bmap * rb)488 xrep_bmap_reset_counters(
489 struct xrep_bmap *rb)
490 {
491 struct xfs_scrub *sc = rb->sc;
492 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
493 int64_t delta;
494
495 if (rb->reflink_scan == RLS_SET_IFLAG)
496 sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
497
498 /*
499 * Update the inode block counts to reflect the extents we found in the
500 * rmapbt.
501 */
502 delta = ifake->if_blocks - rb->old_bmbt_block_count;
503 sc->ip->i_nblocks = rb->nblocks + delta;
504 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
505
506 /*
507 * Adjust the quota counts by the difference in size between the old
508 * and new bmbt.
509 */
510 xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
511 return 0;
512 }
513
514 /*
515 * Create a new iext tree and load it with block mappings. If the inode is
516 * in extents format, that's all we need to do to commit the new mappings.
517 * If it is in btree format, this takes care of preloading the incore tree.
518 */
519 STATIC int
xrep_bmap_extents_load(struct xrep_bmap * rb)520 xrep_bmap_extents_load(
521 struct xrep_bmap *rb)
522 {
523 struct xfs_iext_cursor icur;
524 struct xfs_bmbt_irec irec;
525 struct xfs_ifork *ifp = rb->new_bmapbt.ifake.if_fork;
526 xfarray_idx_t array_cur;
527 int error;
528
529 ASSERT(ifp->if_bytes == 0);
530
531 /* Add all the mappings (incl. delalloc) to the incore extent tree. */
532 xfs_iext_first(ifp, &icur);
533 foreach_xfarray_idx(rb->bmap_records, array_cur) {
534 struct xfs_bmbt_rec rec;
535
536 error = xfarray_load(rb->bmap_records, array_cur, &rec);
537 if (error)
538 return error;
539
540 xfs_bmbt_disk_get_all(&rec, &irec);
541
542 xfs_iext_insert_raw(ifp, &icur, &irec);
543 if (!isnullstartblock(irec.br_startblock))
544 ifp->if_nextents++;
545
546 xfs_iext_next(ifp, &icur);
547 }
548
549 return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
550 ifp->if_nextents);
551 }
552
553 /*
554 * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
555 * and load the incore extent tree.
556 */
557 STATIC int
xrep_bmap_btree_load(struct xrep_bmap * rb,struct xfs_btree_cur * bmap_cur)558 xrep_bmap_btree_load(
559 struct xrep_bmap *rb,
560 struct xfs_btree_cur *bmap_cur)
561 {
562 struct xfs_scrub *sc = rb->sc;
563 int error;
564
565 /* Compute how many blocks we'll need. */
566 error = xfs_btree_bload_compute_geometry(bmap_cur,
567 &rb->new_bmapbt.bload, rb->real_mappings);
568 if (error)
569 return error;
570
571 /* Last chance to abort before we start committing fixes. */
572 if (xchk_should_terminate(sc, &error))
573 return error;
574
575 /*
576 * Guess how many blocks we're going to need to rebuild an entire bmap
577 * from the number of extents we found, and pump up our transaction to
578 * have sufficient block reservation. We're allowed to exceed file
579 * quota to repair inconsistent metadata.
580 */
581 error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
582 rb->new_bmapbt.bload.nr_blocks, 0, true);
583 if (error)
584 return error;
585
586 /* Reserve the space we'll need for the new btree. */
587 error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
588 rb->new_bmapbt.bload.nr_blocks);
589 if (error)
590 return error;
591
592 /* Add all observed bmap records. */
593 rb->array_cur = XFARRAY_CURSOR_INIT;
594 error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
595 if (error)
596 return error;
597
598 /*
599 * Load the new bmap records into the new incore extent tree to
600 * preserve delalloc reservations for regular files. The directory
601 * code loads the extent tree during xfs_dir_open and assumes
602 * thereafter that it remains loaded, so we must not violate that
603 * assumption.
604 */
605 return xrep_bmap_extents_load(rb);
606 }
607
608 /*
609 * Use the collected bmap information to stage a new bmap fork. If this is
610 * successful we'll return with the new fork information logged to the repair
611 * transaction but not yet committed. The caller must ensure that the inode
612 * is joined to the transaction; the inode will be joined to a clean
613 * transaction when the function returns.
614 */
615 STATIC int
xrep_bmap_build_new_fork(struct xrep_bmap * rb)616 xrep_bmap_build_new_fork(
617 struct xrep_bmap *rb)
618 {
619 struct xfs_owner_info oinfo;
620 struct xfs_scrub *sc = rb->sc;
621 struct xfs_btree_cur *bmap_cur;
622 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
623 int error;
624
625 error = xrep_bmap_sort_records(rb);
626 if (error)
627 return error;
628
629 /*
630 * Prepare to construct the new fork by initializing the new btree
631 * structure and creating a fake ifork in the ifakeroot structure.
632 */
633 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
634 error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
635 &oinfo);
636 if (error)
637 return error;
638
639 rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
640 rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
641 rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
642
643 /*
644 * Allocate a new bmap btree cursor for reloading an inode block mapping
645 * data structure.
646 */
647 bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
648 xfs_btree_stage_ifakeroot(bmap_cur, ifake);
649
650 /*
651 * Figure out the size and format of the new fork, then fill it with
652 * all the bmap records we've found. Join the inode to the transaction
653 * so that we can roll the transaction while holding the inode locked.
654 */
655 if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
656 ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
657 error = xrep_bmap_extents_load(rb);
658 } else {
659 ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
660 error = xrep_bmap_btree_load(rb, bmap_cur);
661 }
662 if (error)
663 goto err_cur;
664
665 /*
666 * Install the new fork in the inode. After this point the old mapping
667 * data are no longer accessible and the new tree is live. We delete
668 * the cursor immediately after committing the staged root because the
669 * staged fork might be in extents format.
670 */
671 xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
672 xfs_btree_del_cursor(bmap_cur, 0);
673
674 /* Reset the inode counters now that we've changed the fork. */
675 error = xrep_bmap_reset_counters(rb);
676 if (error)
677 goto err_newbt;
678
679 /* Dispose of any unused blocks and the accounting information. */
680 error = xrep_newbt_commit(&rb->new_bmapbt);
681 if (error)
682 return error;
683
684 return xrep_roll_trans(sc);
685
686 err_cur:
687 if (bmap_cur)
688 xfs_btree_del_cursor(bmap_cur, error);
689 err_newbt:
690 xrep_newbt_cancel(&rb->new_bmapbt);
691 return error;
692 }
693
694 /*
695 * Now that we've logged the new inode btree, invalidate all of the old blocks
696 * and free them, if there were any.
697 */
698 STATIC int
xrep_bmap_remove_old_tree(struct xrep_bmap * rb)699 xrep_bmap_remove_old_tree(
700 struct xrep_bmap *rb)
701 {
702 struct xfs_scrub *sc = rb->sc;
703 struct xfs_owner_info oinfo;
704
705 /* Free the old bmbt blocks if they're not in use. */
706 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
707 return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
708 }
709
710 /* Check for garbage inputs. Returns -ECANCELED if there's nothing to do. */
711 STATIC int
xrep_bmap_check_inputs(struct xfs_scrub * sc,int whichfork)712 xrep_bmap_check_inputs(
713 struct xfs_scrub *sc,
714 int whichfork)
715 {
716 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
717
718 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
719
720 if (!xfs_has_rmapbt(sc->mp))
721 return -EOPNOTSUPP;
722
723 /* No fork means nothing to rebuild. */
724 if (!ifp)
725 return -ECANCELED;
726
727 /*
728 * We only know how to repair extent mappings, which is to say that we
729 * only support extents and btree fork format. Repairs to a local
730 * format fork require a higher level repair function, so we do not
731 * have any work to do here.
732 */
733 switch (ifp->if_format) {
734 case XFS_DINODE_FMT_DEV:
735 case XFS_DINODE_FMT_LOCAL:
736 case XFS_DINODE_FMT_UUID:
737 return -ECANCELED;
738 case XFS_DINODE_FMT_EXTENTS:
739 case XFS_DINODE_FMT_BTREE:
740 break;
741 default:
742 return -EFSCORRUPTED;
743 }
744
745 if (whichfork == XFS_ATTR_FORK)
746 return 0;
747
748 /* Only files, symlinks, and directories get to have data forks. */
749 switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
750 case S_IFREG:
751 case S_IFDIR:
752 case S_IFLNK:
753 /* ok */
754 break;
755 default:
756 return -EINVAL;
757 }
758
759 /* Don't know how to rebuild realtime data forks. */
760 if (XFS_IS_REALTIME_INODE(sc->ip))
761 return -EOPNOTSUPP;
762
763 return 0;
764 }
765
766 /* Set up the initial state of the reflink scan. */
767 static inline enum reflink_scan_state
xrep_bmap_init_reflink_scan(struct xfs_scrub * sc,int whichfork)768 xrep_bmap_init_reflink_scan(
769 struct xfs_scrub *sc,
770 int whichfork)
771 {
772 /* cannot share on non-reflink filesystem */
773 if (!xfs_has_reflink(sc->mp))
774 return RLS_IRRELEVANT;
775
776 /* preserve flag if it's already set */
777 if (xfs_is_reflink_inode(sc->ip))
778 return RLS_SET_IFLAG;
779
780 /* can only share regular files */
781 if (!S_ISREG(VFS_I(sc->ip)->i_mode))
782 return RLS_IRRELEVANT;
783
784 /* cannot share attr fork extents */
785 if (whichfork != XFS_DATA_FORK)
786 return RLS_IRRELEVANT;
787
788 /* cannot share realtime extents */
789 if (XFS_IS_REALTIME_INODE(sc->ip))
790 return RLS_IRRELEVANT;
791
792 return RLS_UNKNOWN;
793 }
794
795 /* Repair an inode fork. */
796 int
xrep_bmap(struct xfs_scrub * sc,int whichfork,bool allow_unwritten)797 xrep_bmap(
798 struct xfs_scrub *sc,
799 int whichfork,
800 bool allow_unwritten)
801 {
802 struct xrep_bmap *rb;
803 char *descr;
804 xfs_extnum_t max_bmbt_recs;
805 bool large_extcount;
806 int error = 0;
807
808 error = xrep_bmap_check_inputs(sc, whichfork);
809 if (error == -ECANCELED)
810 return 0;
811 if (error)
812 return error;
813
814 rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
815 if (!rb)
816 return -ENOMEM;
817 rb->sc = sc;
818 rb->whichfork = whichfork;
819 rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
820 rb->allow_unwritten = allow_unwritten;
821
822 /* Set up enough storage to handle the max records for this fork. */
823 large_extcount = xfs_has_large_extent_counts(sc->mp);
824 max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
825 descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
826 whichfork == XFS_DATA_FORK ? "data" : "attr");
827 error = xfarray_create(descr, max_bmbt_recs,
828 sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
829 kfree(descr);
830 if (error)
831 goto out_rb;
832
833 /* Collect all reverse mappings for this fork's extents. */
834 xfsb_bitmap_init(&rb->old_bmbt_blocks);
835 error = xrep_bmap_find_mappings(rb);
836 if (error)
837 goto out_bitmap;
838
839 xfs_trans_ijoin(sc->tp, sc->ip, 0);
840
841 /* Rebuild the bmap information. */
842 error = xrep_bmap_build_new_fork(rb);
843 if (error)
844 goto out_bitmap;
845
846 /* Kill the old tree. */
847 error = xrep_bmap_remove_old_tree(rb);
848 if (error)
849 goto out_bitmap;
850
851 out_bitmap:
852 xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
853 xfarray_destroy(rb->bmap_records);
854 out_rb:
855 kfree(rb);
856 return error;
857 }
858
859 /* Repair an inode's data fork. */
860 int
xrep_bmap_data(struct xfs_scrub * sc)861 xrep_bmap_data(
862 struct xfs_scrub *sc)
863 {
864 return xrep_bmap(sc, XFS_DATA_FORK, true);
865 }
866
867 /* Repair an inode's attr fork. */
868 int
xrep_bmap_attr(struct xfs_scrub * sc)869 xrep_bmap_attr(
870 struct xfs_scrub *sc)
871 {
872 return xrep_bmap(sc, XFS_ATTR_FORK, false);
873 }
874