1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_inode.h"
20 #include "xfs_inode_fork.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rtalloc.h"
23 #include "xfs_bmap.h"
24 #include "xfs_bmap_util.h"
25 #include "xfs_bmap_btree.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_rtrmap_btree.h"
29 #include "xfs_refcount.h"
30 #include "xfs_quota.h"
31 #include "xfs_ialloc.h"
32 #include "xfs_ag.h"
33 #include "xfs_reflink.h"
34 #include "xfs_rtgroup.h"
35 #include "scrub/xfs_scrub.h"
36 #include "scrub/scrub.h"
37 #include "scrub/common.h"
38 #include "scrub/btree.h"
39 #include "scrub/trace.h"
40 #include "scrub/repair.h"
41 #include "scrub/bitmap.h"
42 #include "scrub/fsb_bitmap.h"
43 #include "scrub/xfile.h"
44 #include "scrub/xfarray.h"
45 #include "scrub/newbt.h"
46 #include "scrub/reap.h"
47
48 /*
49 * Inode Fork Block Mapping (BMBT) Repair
50 * ======================================
51 *
52 * Gather all the rmap records for the inode and fork we're fixing, reset the
53 * incore fork, then recreate the btree.
54 */
55
56 enum reflink_scan_state {
57 RLS_IRRELEVANT = -1, /* not applicable to this file */
58 RLS_UNKNOWN, /* shared extent scans required */
59 RLS_SET_IFLAG, /* iflag must be set */
60 };
61
62 struct xrep_bmap {
63 /* Old bmbt blocks */
64 struct xfsb_bitmap old_bmbt_blocks;
65
66 /* New fork. */
67 struct xrep_newbt new_bmapbt;
68
69 /* List of new bmap records. */
70 struct xfarray *bmap_records;
71
72 struct xfs_scrub *sc;
73
74 /* How many blocks did we find allocated to this file? */
75 xfs_rfsblock_t nblocks;
76
77 /* How many bmbt blocks did we find for this fork? */
78 xfs_rfsblock_t old_bmbt_block_count;
79
80 /* get_records()'s position in the free space record array. */
81 xfarray_idx_t array_cur;
82
83 /* How many real (non-hole, non-delalloc) mappings do we have? */
84 uint64_t real_mappings;
85
86 /* Which fork are we fixing? */
87 int whichfork;
88
89 /* What d the REFLINK flag be set when the repair is over? */
90 enum reflink_scan_state reflink_scan;
91
92 /* Do we allow unwritten extents? */
93 bool allow_unwritten;
94 };
95
96 /* Is this space extent shared? Flag the inode if it is. */
97 STATIC int
xrep_bmap_discover_shared(struct xrep_bmap * rb,xfs_fsblock_t startblock,xfs_filblks_t blockcount)98 xrep_bmap_discover_shared(
99 struct xrep_bmap *rb,
100 xfs_fsblock_t startblock,
101 xfs_filblks_t blockcount)
102 {
103 struct xfs_scrub *sc = rb->sc;
104 struct xfs_btree_cur *cur;
105 xfs_agblock_t agbno;
106 xfs_agblock_t fbno;
107 xfs_extlen_t flen;
108 int error;
109
110 if (XFS_IS_REALTIME_INODE(sc->ip)) {
111 agbno = xfs_rtb_to_rgbno(sc->mp, startblock);
112 cur = sc->sr.refc_cur;
113 } else {
114 agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
115 cur = sc->sa.refc_cur;
116 }
117 error = xfs_refcount_find_shared(cur, agbno, blockcount, &fbno, &flen,
118 false);
119 if (error)
120 return error;
121
122 if (fbno != NULLAGBLOCK)
123 rb->reflink_scan = RLS_SET_IFLAG;
124
125 return 0;
126 }
127
128 /* Remember this reverse-mapping as a series of bmap records. */
129 STATIC int
xrep_bmap_from_rmap(struct xrep_bmap * rb,xfs_fileoff_t startoff,xfs_fsblock_t startblock,xfs_filblks_t blockcount,bool unwritten)130 xrep_bmap_from_rmap(
131 struct xrep_bmap *rb,
132 xfs_fileoff_t startoff,
133 xfs_fsblock_t startblock,
134 xfs_filblks_t blockcount,
135 bool unwritten)
136 {
137 struct xfs_bmbt_irec irec = {
138 .br_startoff = startoff,
139 .br_startblock = startblock,
140 .br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM,
141 };
142 struct xfs_bmbt_rec rbe;
143 struct xfs_scrub *sc = rb->sc;
144 int error = 0;
145
146 /*
147 * If we're repairing the data fork of a non-reflinked regular file on
148 * a reflink filesystem, we need to figure out if this space extent is
149 * shared.
150 */
151 if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) {
152 error = xrep_bmap_discover_shared(rb, startblock, blockcount);
153 if (error)
154 return error;
155 }
156
157 do {
158 xfs_failaddr_t fa;
159
160 irec.br_blockcount = min_t(xfs_filblks_t, blockcount,
161 XFS_MAX_BMBT_EXTLEN);
162
163 fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec);
164 if (fa)
165 return -EFSCORRUPTED;
166
167 xfs_bmbt_disk_set_all(&rbe, &irec);
168
169 trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec);
170
171 if (xchk_should_terminate(sc, &error))
172 return error;
173
174 error = xfarray_append(rb->bmap_records, &rbe);
175 if (error)
176 return error;
177
178 rb->real_mappings++;
179
180 irec.br_startblock += irec.br_blockcount;
181 irec.br_startoff += irec.br_blockcount;
182 blockcount -= irec.br_blockcount;
183 } while (blockcount > 0);
184
185 return 0;
186 }
187
188 /* Check for any obvious errors or conflicts in the file mapping. */
189 STATIC int
xrep_bmap_check_fork_rmap(struct xrep_bmap * rb,struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec)190 xrep_bmap_check_fork_rmap(
191 struct xrep_bmap *rb,
192 struct xfs_btree_cur *cur,
193 const struct xfs_rmap_irec *rec)
194 {
195 struct xfs_scrub *sc = rb->sc;
196 enum xbtree_recpacking outcome;
197 int error;
198
199 /*
200 * Data extents for rt files are never stored on the data device, but
201 * everything else (xattrs, bmbt blocks) can be.
202 */
203 if (XFS_IS_REALTIME_INODE(sc->ip) &&
204 !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))
205 return -EFSCORRUPTED;
206
207 /* Check that this is within the AG. */
208 if (!xfs_verify_agbext(to_perag(cur->bc_group), rec->rm_startblock,
209 rec->rm_blockcount))
210 return -EFSCORRUPTED;
211
212 /* Check the file offset range. */
213 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
214 !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
215 return -EFSCORRUPTED;
216
217 /* No contradictory flags. */
218 if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) &&
219 (rec->rm_flags & XFS_RMAP_UNWRITTEN))
220 return -EFSCORRUPTED;
221
222 /* Make sure this isn't free space. */
223 error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
224 rec->rm_blockcount, &outcome);
225 if (error)
226 return error;
227 if (outcome != XBTREE_RECPACKING_EMPTY)
228 return -EFSCORRUPTED;
229
230 /* Must not be an inode chunk. */
231 error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
232 rec->rm_startblock, rec->rm_blockcount, &outcome);
233 if (error)
234 return error;
235 if (outcome != XBTREE_RECPACKING_EMPTY)
236 return -EFSCORRUPTED;
237
238 return 0;
239 }
240
241 /* Record extents that belong to this inode's fork. */
242 STATIC int
xrep_bmap_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)243 xrep_bmap_walk_rmap(
244 struct xfs_btree_cur *cur,
245 const struct xfs_rmap_irec *rec,
246 void *priv)
247 {
248 struct xrep_bmap *rb = priv;
249 xfs_fsblock_t fsbno;
250 int error = 0;
251
252 if (xchk_should_terminate(rb->sc, &error))
253 return error;
254
255 if (rec->rm_owner != rb->sc->ip->i_ino)
256 return 0;
257
258 error = xrep_bmap_check_fork_rmap(rb, cur, rec);
259 if (error)
260 return error;
261
262 /*
263 * Record all blocks allocated to this file even if the extent isn't
264 * for the fork we're rebuilding so that we can reset di_nblocks later.
265 */
266 rb->nblocks += rec->rm_blockcount;
267
268 /* If this rmap isn't for the fork we want, we're done. */
269 if (rb->whichfork == XFS_DATA_FORK &&
270 (rec->rm_flags & XFS_RMAP_ATTR_FORK))
271 return 0;
272 if (rb->whichfork == XFS_ATTR_FORK &&
273 !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
274 return 0;
275
276 /* Reject unwritten extents if we don't allow those. */
277 if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
278 return -EFSCORRUPTED;
279
280 fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), rec->rm_startblock);
281
282 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
283 rb->old_bmbt_block_count += rec->rm_blockcount;
284 return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno,
285 rec->rm_blockcount);
286 }
287
288 return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno,
289 rec->rm_blockcount,
290 rec->rm_flags & XFS_RMAP_UNWRITTEN);
291 }
292
293 /*
294 * Compare two block mapping records. We want to sort in order of increasing
295 * file offset.
296 */
297 static int
xrep_bmap_extent_cmp(const void * a,const void * b)298 xrep_bmap_extent_cmp(
299 const void *a,
300 const void *b)
301 {
302 const struct xfs_bmbt_rec *ba = a;
303 const struct xfs_bmbt_rec *bb = b;
304 xfs_fileoff_t ao = xfs_bmbt_disk_get_startoff(ba);
305 xfs_fileoff_t bo = xfs_bmbt_disk_get_startoff(bb);
306
307 if (ao > bo)
308 return 1;
309 else if (ao < bo)
310 return -1;
311 return 0;
312 }
313
314 /*
315 * Sort the bmap extents by fork offset or else the records will be in the
316 * wrong order. Ensure there are no overlaps in the file offset ranges.
317 */
318 STATIC int
xrep_bmap_sort_records(struct xrep_bmap * rb)319 xrep_bmap_sort_records(
320 struct xrep_bmap *rb)
321 {
322 struct xfs_bmbt_irec irec;
323 xfs_fileoff_t next_off = 0;
324 xfarray_idx_t array_cur;
325 int error;
326
327 error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp,
328 XFARRAY_SORT_KILLABLE);
329 if (error)
330 return error;
331
332 foreach_xfarray_idx(rb->bmap_records, array_cur) {
333 struct xfs_bmbt_rec rec;
334
335 if (xchk_should_terminate(rb->sc, &error))
336 return error;
337
338 error = xfarray_load(rb->bmap_records, array_cur, &rec);
339 if (error)
340 return error;
341
342 xfs_bmbt_disk_get_all(&rec, &irec);
343
344 if (irec.br_startoff < next_off)
345 return -EFSCORRUPTED;
346
347 next_off = irec.br_startoff + irec.br_blockcount;
348 }
349
350 return 0;
351 }
352
353 /* Scan one AG for reverse mappings that we can turn into extent maps. */
354 STATIC int
xrep_bmap_scan_ag(struct xrep_bmap * rb,struct xfs_perag * pag)355 xrep_bmap_scan_ag(
356 struct xrep_bmap *rb,
357 struct xfs_perag *pag)
358 {
359 struct xfs_scrub *sc = rb->sc;
360 int error;
361
362 error = xrep_ag_init(sc, pag, &sc->sa);
363 if (error)
364 return error;
365
366 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb);
367 xchk_ag_free(sc, &sc->sa);
368 return error;
369 }
370
371 #ifdef CONFIG_XFS_RT
372 /* Check for any obvious errors or conflicts in the file mapping. */
373 STATIC int
xrep_bmap_check_rtfork_rmap(struct xfs_scrub * sc,struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec)374 xrep_bmap_check_rtfork_rmap(
375 struct xfs_scrub *sc,
376 struct xfs_btree_cur *cur,
377 const struct xfs_rmap_irec *rec)
378 {
379 /* xattr extents are never stored on realtime devices */
380 if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
381 return -EFSCORRUPTED;
382
383 /* bmbt blocks are never stored on realtime devices */
384 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
385 return -EFSCORRUPTED;
386
387 /* Data extents for non-rt files are never stored on the rt device. */
388 if (!XFS_IS_REALTIME_INODE(sc->ip))
389 return -EFSCORRUPTED;
390
391 /* Check the file offsets and physical extents. */
392 if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
393 return -EFSCORRUPTED;
394
395 /* Check that this is within the rtgroup. */
396 if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock,
397 rec->rm_blockcount))
398 return -EFSCORRUPTED;
399
400 /* Make sure this isn't free space. */
401 return xrep_require_rtext_inuse(sc, rec->rm_startblock,
402 rec->rm_blockcount);
403 }
404
405 /* Record realtime extents that belong to this inode's fork. */
406 STATIC int
xrep_bmap_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)407 xrep_bmap_walk_rtrmap(
408 struct xfs_btree_cur *cur,
409 const struct xfs_rmap_irec *rec,
410 void *priv)
411 {
412 struct xrep_bmap *rb = priv;
413 int error = 0;
414
415 if (xchk_should_terminate(rb->sc, &error))
416 return error;
417
418 /* Skip extents which are not owned by this inode and fork. */
419 if (rec->rm_owner != rb->sc->ip->i_ino)
420 return 0;
421
422 error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec);
423 if (error)
424 return error;
425
426 /*
427 * Record all blocks allocated to this file even if the extent isn't
428 * for the fork we're rebuilding so that we can reset di_nblocks later.
429 */
430 rb->nblocks += rec->rm_blockcount;
431
432 /* If this rmap isn't for the fork we want, we're done. */
433 if (rb->whichfork == XFS_DATA_FORK &&
434 (rec->rm_flags & XFS_RMAP_ATTR_FORK))
435 return 0;
436 if (rb->whichfork == XFS_ATTR_FORK &&
437 !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
438 return 0;
439
440 return xrep_bmap_from_rmap(rb, rec->rm_offset,
441 xfs_rgbno_to_rtb(to_rtg(cur->bc_group),
442 rec->rm_startblock),
443 rec->rm_blockcount,
444 rec->rm_flags & XFS_RMAP_UNWRITTEN);
445 }
446
447 /* Scan the realtime reverse mappings to build the new extent map. */
448 STATIC int
xrep_bmap_scan_rtgroup(struct xrep_bmap * rb,struct xfs_rtgroup * rtg)449 xrep_bmap_scan_rtgroup(
450 struct xrep_bmap *rb,
451 struct xfs_rtgroup *rtg)
452 {
453 struct xfs_scrub *sc = rb->sc;
454 int error;
455
456 if (!xfs_has_rtrmapbt(sc->mp))
457 return 0;
458
459 error = xrep_rtgroup_init(sc, rtg, &sc->sr,
460 XFS_RTGLOCK_RMAP |
461 XFS_RTGLOCK_REFCOUNT |
462 XFS_RTGLOCK_BITMAP_SHARED);
463 if (error)
464 return error;
465
466 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
467 xchk_rtgroup_btcur_free(&sc->sr);
468 xchk_rtgroup_free(sc, &sc->sr);
469 return error;
470 }
471 #else
472 static inline int
xrep_bmap_scan_rtgroup(struct xrep_bmap * rb,struct xfs_rtgroup * rtg)473 xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg)
474 {
475 return -EFSCORRUPTED;
476 }
477 #endif
478
479 /* Find the delalloc extents from the old incore extent tree. */
480 STATIC int
xrep_bmap_find_delalloc(struct xrep_bmap * rb)481 xrep_bmap_find_delalloc(
482 struct xrep_bmap *rb)
483 {
484 struct xfs_bmbt_irec irec;
485 struct xfs_iext_cursor icur;
486 struct xfs_bmbt_rec rbe;
487 struct xfs_inode *ip = rb->sc->ip;
488 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, rb->whichfork);
489 int error = 0;
490
491 /*
492 * Skip this scan if we don't expect to find delayed allocation
493 * reservations in this fork.
494 */
495 if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0)
496 return 0;
497
498 for_each_xfs_iext(ifp, &icur, &irec) {
499 if (!isnullstartblock(irec.br_startblock))
500 continue;
501
502 xfs_bmbt_disk_set_all(&rbe, &irec);
503
504 trace_xrep_bmap_found(ip, rb->whichfork, &irec);
505
506 if (xchk_should_terminate(rb->sc, &error))
507 return error;
508
509 error = xfarray_append(rb->bmap_records, &rbe);
510 if (error)
511 return error;
512 }
513
514 return 0;
515 }
516
517 /*
518 * Collect block mappings for this fork of this inode and decide if we have
519 * enough space to rebuild. Caller is responsible for cleaning up the list if
520 * anything goes wrong.
521 */
522 STATIC int
xrep_bmap_find_mappings(struct xrep_bmap * rb)523 xrep_bmap_find_mappings(
524 struct xrep_bmap *rb)
525 {
526 struct xfs_scrub *sc = rb->sc;
527 struct xfs_perag *pag = NULL;
528 int error = 0;
529
530 /*
531 * Iterate the rtrmaps for extents. Metadata files never have content
532 * on the realtime device, so there's no need to scan them.
533 */
534 if (!xfs_is_metadir_inode(sc->ip)) {
535 struct xfs_rtgroup *rtg = NULL;
536
537 while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
538 error = xrep_bmap_scan_rtgroup(rb, rtg);
539 if (error) {
540 xfs_rtgroup_rele(rtg);
541 return error;
542 }
543 }
544 }
545
546 /* Iterate the rmaps for extents. */
547 while ((pag = xfs_perag_next(sc->mp, pag))) {
548 error = xrep_bmap_scan_ag(rb, pag);
549 if (error) {
550 xfs_perag_rele(pag);
551 return error;
552 }
553 }
554
555 return xrep_bmap_find_delalloc(rb);
556 }
557
558 /* Retrieve real extent mappings for bulk loading the bmap btree. */
559 STATIC int
xrep_bmap_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)560 xrep_bmap_get_records(
561 struct xfs_btree_cur *cur,
562 unsigned int idx,
563 struct xfs_btree_block *block,
564 unsigned int nr_wanted,
565 void *priv)
566 {
567 struct xfs_bmbt_rec rec;
568 struct xfs_bmbt_irec *irec = &cur->bc_rec.b;
569 struct xrep_bmap *rb = priv;
570 union xfs_btree_rec *block_rec;
571 unsigned int loaded;
572 int error;
573
574 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
575 do {
576 error = xfarray_load(rb->bmap_records, rb->array_cur++,
577 &rec);
578 if (error)
579 return error;
580
581 xfs_bmbt_disk_get_all(&rec, irec);
582 } while (isnullstartblock(irec->br_startblock));
583
584 block_rec = xfs_btree_rec_addr(cur, idx, block);
585 cur->bc_ops->init_rec_from_cur(cur, block_rec);
586 }
587
588 return loaded;
589 }
590
591 /* Feed one of the new btree blocks to the bulk loader. */
592 STATIC int
xrep_bmap_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)593 xrep_bmap_claim_block(
594 struct xfs_btree_cur *cur,
595 union xfs_btree_ptr *ptr,
596 void *priv)
597 {
598 struct xrep_bmap *rb = priv;
599
600 return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr);
601 }
602
603 /* Figure out how much space we need to create the incore btree root block. */
604 STATIC size_t
xrep_bmap_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)605 xrep_bmap_iroot_size(
606 struct xfs_btree_cur *cur,
607 unsigned int level,
608 unsigned int nr_this_level,
609 void *priv)
610 {
611 ASSERT(level > 0);
612
613 return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level);
614 }
615
616 /* Update the inode counters. */
617 STATIC int
xrep_bmap_reset_counters(struct xrep_bmap * rb)618 xrep_bmap_reset_counters(
619 struct xrep_bmap *rb)
620 {
621 struct xfs_scrub *sc = rb->sc;
622 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
623 int64_t delta;
624
625 if (rb->reflink_scan == RLS_SET_IFLAG)
626 sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
627
628 /*
629 * Update the inode block counts to reflect the extents we found in the
630 * rmapbt.
631 */
632 delta = ifake->if_blocks - rb->old_bmbt_block_count;
633 sc->ip->i_nblocks = rb->nblocks + delta;
634 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
635
636 /*
637 * Adjust the quota counts by the difference in size between the old
638 * and new bmbt.
639 */
640 xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta);
641 return 0;
642 }
643
644 /*
645 * Create a new iext tree and load it with block mappings. If the inode is
646 * in extents format, that's all we need to do to commit the new mappings.
647 * If it is in btree format, this takes care of preloading the incore tree.
648 */
649 STATIC int
xrep_bmap_extents_load(struct xrep_bmap * rb)650 xrep_bmap_extents_load(
651 struct xrep_bmap *rb)
652 {
653 struct xfs_iext_cursor icur;
654 struct xfs_bmbt_irec irec;
655 struct xfs_ifork *ifp = rb->new_bmapbt.ifake.if_fork;
656 xfarray_idx_t array_cur;
657 int error;
658
659 ASSERT(ifp->if_bytes == 0);
660
661 /* Add all the mappings (incl. delalloc) to the incore extent tree. */
662 xfs_iext_first(ifp, &icur);
663 foreach_xfarray_idx(rb->bmap_records, array_cur) {
664 struct xfs_bmbt_rec rec;
665
666 error = xfarray_load(rb->bmap_records, array_cur, &rec);
667 if (error)
668 return error;
669
670 xfs_bmbt_disk_get_all(&rec, &irec);
671
672 xfs_iext_insert_raw(ifp, &icur, &irec);
673 if (!isnullstartblock(irec.br_startblock))
674 ifp->if_nextents++;
675
676 xfs_iext_next(ifp, &icur);
677 }
678
679 return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork,
680 ifp->if_nextents);
681 }
682
683 /*
684 * Reserve new btree blocks, bulk load the bmap records into the ondisk btree,
685 * and load the incore extent tree.
686 */
687 STATIC int
xrep_bmap_btree_load(struct xrep_bmap * rb,struct xfs_btree_cur * bmap_cur)688 xrep_bmap_btree_load(
689 struct xrep_bmap *rb,
690 struct xfs_btree_cur *bmap_cur)
691 {
692 struct xfs_scrub *sc = rb->sc;
693 int error;
694
695 /* Compute how many blocks we'll need. */
696 error = xfs_btree_bload_compute_geometry(bmap_cur,
697 &rb->new_bmapbt.bload, rb->real_mappings);
698 if (error)
699 return error;
700
701 /* Last chance to abort before we start committing fixes. */
702 if (xchk_should_terminate(sc, &error))
703 return error;
704
705 /*
706 * Guess how many blocks we're going to need to rebuild an entire bmap
707 * from the number of extents we found, and pump up our transaction to
708 * have sufficient block reservation. We're allowed to exceed file
709 * quota to repair inconsistent metadata.
710 */
711 error = xfs_trans_reserve_more_inode(sc->tp, sc->ip,
712 rb->new_bmapbt.bload.nr_blocks, 0, true);
713 if (error)
714 return error;
715
716 /* Reserve the space we'll need for the new btree. */
717 error = xrep_newbt_alloc_blocks(&rb->new_bmapbt,
718 rb->new_bmapbt.bload.nr_blocks);
719 if (error)
720 return error;
721
722 /* Add all observed bmap records. */
723 rb->array_cur = XFARRAY_CURSOR_INIT;
724 error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb);
725 if (error)
726 return error;
727
728 /*
729 * Load the new bmap records into the new incore extent tree to
730 * preserve delalloc reservations for regular files. The directory
731 * code loads the extent tree during xfs_dir_open and assumes
732 * thereafter that it remains loaded, so we must not violate that
733 * assumption.
734 */
735 return xrep_bmap_extents_load(rb);
736 }
737
738 /*
739 * Use the collected bmap information to stage a new bmap fork. If this is
740 * successful we'll return with the new fork information logged to the repair
741 * transaction but not yet committed. The caller must ensure that the inode
742 * is joined to the transaction; the inode will be joined to a clean
743 * transaction when the function returns.
744 */
745 STATIC int
xrep_bmap_build_new_fork(struct xrep_bmap * rb)746 xrep_bmap_build_new_fork(
747 struct xrep_bmap *rb)
748 {
749 struct xfs_owner_info oinfo;
750 struct xfs_scrub *sc = rb->sc;
751 struct xfs_btree_cur *bmap_cur;
752 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake;
753 int error;
754
755 error = xrep_bmap_sort_records(rb);
756 if (error)
757 return error;
758
759 /*
760 * Prepare to construct the new fork by initializing the new btree
761 * structure and creating a fake ifork in the ifakeroot structure.
762 */
763 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
764 error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork,
765 &oinfo);
766 if (error)
767 return error;
768
769 rb->new_bmapbt.bload.get_records = xrep_bmap_get_records;
770 rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block;
771 rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size;
772
773 /*
774 * Allocate a new bmap btree cursor for reloading an inode block mapping
775 * data structure.
776 */
777 bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK);
778 xfs_btree_stage_ifakeroot(bmap_cur, ifake);
779
780 /*
781 * Figure out the size and format of the new fork, then fill it with
782 * all the bmap records we've found. Join the inode to the transaction
783 * so that we can roll the transaction while holding the inode locked.
784 */
785 if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) {
786 ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS;
787 error = xrep_bmap_extents_load(rb);
788 } else {
789 ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE;
790 error = xrep_bmap_btree_load(rb, bmap_cur);
791 }
792 if (error)
793 goto err_cur;
794
795 /*
796 * Install the new fork in the inode. After this point the old mapping
797 * data are no longer accessible and the new tree is live. We delete
798 * the cursor immediately after committing the staged root because the
799 * staged fork might be in extents format.
800 */
801 xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork);
802 xfs_btree_del_cursor(bmap_cur, 0);
803
804 /* Reset the inode counters now that we've changed the fork. */
805 error = xrep_bmap_reset_counters(rb);
806 if (error)
807 goto err_newbt;
808
809 /* Dispose of any unused blocks and the accounting information. */
810 error = xrep_newbt_commit(&rb->new_bmapbt);
811 if (error)
812 return error;
813
814 return xrep_roll_trans(sc);
815
816 err_cur:
817 if (bmap_cur)
818 xfs_btree_del_cursor(bmap_cur, error);
819 err_newbt:
820 xrep_newbt_cancel(&rb->new_bmapbt);
821 return error;
822 }
823
824 /*
825 * Now that we've logged the new inode btree, invalidate all of the old blocks
826 * and free them, if there were any.
827 */
828 STATIC int
xrep_bmap_remove_old_tree(struct xrep_bmap * rb)829 xrep_bmap_remove_old_tree(
830 struct xrep_bmap *rb)
831 {
832 struct xfs_scrub *sc = rb->sc;
833 struct xfs_owner_info oinfo;
834
835 /* Free the old bmbt blocks if they're not in use. */
836 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork);
837 return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo);
838 }
839
840 /* Check for garbage inputs. Returns -ECANCELED if there's nothing to do. */
841 STATIC int
xrep_bmap_check_inputs(struct xfs_scrub * sc,int whichfork)842 xrep_bmap_check_inputs(
843 struct xfs_scrub *sc,
844 int whichfork)
845 {
846 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
847
848 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
849
850 if (!xfs_has_rmapbt(sc->mp))
851 return -EOPNOTSUPP;
852
853 /* No fork means nothing to rebuild. */
854 if (!ifp)
855 return -ECANCELED;
856
857 /*
858 * We only know how to repair extent mappings, which is to say that we
859 * only support extents and btree fork format. Repairs to a local
860 * format fork require a higher level repair function, so we do not
861 * have any work to do here.
862 */
863 switch (ifp->if_format) {
864 case XFS_DINODE_FMT_DEV:
865 case XFS_DINODE_FMT_LOCAL:
866 case XFS_DINODE_FMT_UUID:
867 case XFS_DINODE_FMT_META_BTREE:
868 return -ECANCELED;
869 case XFS_DINODE_FMT_EXTENTS:
870 case XFS_DINODE_FMT_BTREE:
871 break;
872 default:
873 return -EFSCORRUPTED;
874 }
875
876 if (whichfork == XFS_ATTR_FORK)
877 return 0;
878
879 /* Only files, symlinks, and directories get to have data forks. */
880 switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
881 case S_IFREG:
882 case S_IFDIR:
883 case S_IFLNK:
884 /* ok */
885 break;
886 default:
887 return -EINVAL;
888 }
889
890 return 0;
891 }
892
893 /* Set up the initial state of the reflink scan. */
894 static inline enum reflink_scan_state
xrep_bmap_init_reflink_scan(struct xfs_scrub * sc,int whichfork)895 xrep_bmap_init_reflink_scan(
896 struct xfs_scrub *sc,
897 int whichfork)
898 {
899 /* cannot share on non-reflink filesystem */
900 if (!xfs_has_reflink(sc->mp))
901 return RLS_IRRELEVANT;
902
903 /* preserve flag if it's already set */
904 if (xfs_is_reflink_inode(sc->ip))
905 return RLS_SET_IFLAG;
906
907 /* can only share regular files */
908 if (!S_ISREG(VFS_I(sc->ip)->i_mode))
909 return RLS_IRRELEVANT;
910
911 /* cannot share attr fork extents */
912 if (whichfork != XFS_DATA_FORK)
913 return RLS_IRRELEVANT;
914
915 return RLS_UNKNOWN;
916 }
917
918 /* Repair an inode fork. */
919 int
xrep_bmap(struct xfs_scrub * sc,int whichfork,bool allow_unwritten)920 xrep_bmap(
921 struct xfs_scrub *sc,
922 int whichfork,
923 bool allow_unwritten)
924 {
925 struct xrep_bmap *rb;
926 char *descr;
927 xfs_extnum_t max_bmbt_recs;
928 bool large_extcount;
929 int error = 0;
930
931 error = xrep_bmap_check_inputs(sc, whichfork);
932 if (error == -ECANCELED)
933 return 0;
934 if (error)
935 return error;
936
937 rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS);
938 if (!rb)
939 return -ENOMEM;
940 rb->sc = sc;
941 rb->whichfork = whichfork;
942 rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
943 rb->allow_unwritten = allow_unwritten;
944
945 /* Set up enough storage to handle the max records for this fork. */
946 large_extcount = xfs_has_large_extent_counts(sc->mp);
947 max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork);
948 descr = xchk_xfile_ino_descr(sc, "%s fork mapping records",
949 whichfork == XFS_DATA_FORK ? "data" : "attr");
950 error = xfarray_create(descr, max_bmbt_recs,
951 sizeof(struct xfs_bmbt_rec), &rb->bmap_records);
952 kfree(descr);
953 if (error)
954 goto out_rb;
955
956 /* Collect all reverse mappings for this fork's extents. */
957 xfsb_bitmap_init(&rb->old_bmbt_blocks);
958 error = xrep_bmap_find_mappings(rb);
959 if (error)
960 goto out_bitmap;
961
962 xfs_trans_ijoin(sc->tp, sc->ip, 0);
963
964 /* Rebuild the bmap information. */
965 error = xrep_bmap_build_new_fork(rb);
966 if (error)
967 goto out_bitmap;
968
969 /* Kill the old tree. */
970 error = xrep_bmap_remove_old_tree(rb);
971 if (error)
972 goto out_bitmap;
973
974 out_bitmap:
975 xfsb_bitmap_destroy(&rb->old_bmbt_blocks);
976 xfarray_destroy(rb->bmap_records);
977 out_rb:
978 kfree(rb);
979 return error;
980 }
981
982 /* Repair an inode's data fork. */
983 int
xrep_bmap_data(struct xfs_scrub * sc)984 xrep_bmap_data(
985 struct xfs_scrub *sc)
986 {
987 return xrep_bmap(sc, XFS_DATA_FORK, true);
988 }
989
990 /* Repair an inode's attr fork. */
991 int
xrep_bmap_attr(struct xfs_scrub * sc)992 xrep_bmap_attr(
993 struct xfs_scrub *sc)
994 {
995 return xrep_bmap(sc, XFS_ATTR_FORK, false);
996 }
997