1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_buf_mem.h"
16 #include "xfs_btree_mem.h"
17 #include "xfs_bit.h"
18 #include "xfs_log_format.h"
19 #include "xfs_trans.h"
20 #include "xfs_sb.h"
21 #include "xfs_alloc.h"
22 #include "xfs_rmap.h"
23 #include "xfs_rmap_btree.h"
24 #include "xfs_rtrmap_btree.h"
25 #include "xfs_inode.h"
26 #include "xfs_icache.h"
27 #include "xfs_bmap.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_refcount.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/rgb_bitmap.h"
43 #include "scrub/xfile.h"
44 #include "scrub/xfarray.h"
45 #include "scrub/iscan.h"
46 #include "scrub/newbt.h"
47 #include "scrub/reap.h"
48
49 /*
50 * Realtime Reverse Mapping Btree Repair
51 * =====================================
52 *
53 * This isn't quite as difficult as repairing the rmap btree on the data
54 * device, since we only store the data fork extents of realtime files on the
55 * realtime device. We still have to freeze the filesystem and stop the
56 * background threads like we do for the rmap repair, but we only have to scan
57 * realtime inodes.
58 *
59 * Collecting entries for the new realtime rmap btree is easy -- all we have
60 * to do is generate rtrmap entries from the data fork mappings of all realtime
61 * files in the filesystem. We then scan the rmap btrees of the data device
62 * looking for extents belonging to the old btree and note them in a bitmap.
63 *
64 * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
65 * a new btree cursor and atomically swap that into the realtime inode. Then
66 * we can free the blocks from the old btree.
67 *
68 * We use the 'xrep_rtrmap' prefix for all the rmap functions.
69 */
70
71 /* Context for collecting rmaps */
72 struct xrep_rtrmap {
73 /* new rtrmapbt information */
74 struct xrep_newbt new_btree;
75
76 /* lock for the xfbtree and xfile */
77 struct mutex lock;
78
79 /* rmap records generated from primary metadata */
80 struct xfbtree rtrmap_btree;
81
82 struct xfs_scrub *sc;
83
84 /* bitmap of old rtrmapbt blocks */
85 struct xfsb_bitmap old_rtrmapbt_blocks;
86
87 /* Hooks into rtrmap update code. */
88 struct xfs_rmap_hook rhook;
89
90 /* inode scan cursor */
91 struct xchk_iscan iscan;
92
93 /* in-memory btree cursor for the ->get_blocks walk */
94 struct xfs_btree_cur *mcur;
95
96 /* Number of records we're staging in the new btree. */
97 uint64_t nr_records;
98 };
99
100 /* Set us up to repair rt reverse mapping btrees. */
101 int
xrep_setup_rtrmapbt(struct xfs_scrub * sc)102 xrep_setup_rtrmapbt(
103 struct xfs_scrub *sc)
104 {
105 struct xrep_rtrmap *rr;
106 char *descr;
107 int error;
108
109 xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
110
111 descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
112 error = xrep_setup_xfbtree(sc, descr);
113 kfree(descr);
114 if (error)
115 return error;
116
117 rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
118 if (!rr)
119 return -ENOMEM;
120
121 rr->sc = sc;
122 sc->buf = rr;
123 return 0;
124 }
125
126 /* Make sure there's nothing funny about this mapping. */
127 STATIC int
xrep_rtrmap_check_mapping(struct xfs_scrub * sc,const struct xfs_rmap_irec * rec)128 xrep_rtrmap_check_mapping(
129 struct xfs_scrub *sc,
130 const struct xfs_rmap_irec *rec)
131 {
132 if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
133 return -EFSCORRUPTED;
134
135 /* Make sure this isn't free space. */
136 return xrep_require_rtext_inuse(sc, rec->rm_startblock,
137 rec->rm_blockcount);
138 }
139
140 /* Store a reverse-mapping record. */
141 static inline int
xrep_rtrmap_stash(struct xrep_rtrmap * rr,xfs_rgblock_t startblock,xfs_extlen_t blockcount,uint64_t owner,uint64_t offset,unsigned int flags)142 xrep_rtrmap_stash(
143 struct xrep_rtrmap *rr,
144 xfs_rgblock_t startblock,
145 xfs_extlen_t blockcount,
146 uint64_t owner,
147 uint64_t offset,
148 unsigned int flags)
149 {
150 struct xfs_rmap_irec rmap = {
151 .rm_startblock = startblock,
152 .rm_blockcount = blockcount,
153 .rm_owner = owner,
154 .rm_offset = offset,
155 .rm_flags = flags,
156 };
157 struct xfs_scrub *sc = rr->sc;
158 struct xfs_btree_cur *mcur;
159 int error = 0;
160
161 if (xchk_should_terminate(sc, &error))
162 return error;
163
164 if (xchk_iscan_aborted(&rr->iscan))
165 return -EFSCORRUPTED;
166
167 trace_xrep_rtrmap_found(sc->mp, &rmap);
168
169 /* Add entry to in-memory btree. */
170 mutex_lock(&rr->lock);
171 mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
172 error = xfs_rmap_map_raw(mcur, &rmap);
173 xfs_btree_del_cursor(mcur, error);
174 if (error)
175 goto out_cancel;
176
177 error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
178 if (error)
179 goto out_abort;
180
181 mutex_unlock(&rr->lock);
182 return 0;
183
184 out_cancel:
185 xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
186 out_abort:
187 xchk_iscan_abort(&rr->iscan);
188 mutex_unlock(&rr->lock);
189 return error;
190 }
191
192 /* Finding all file and bmbt extents. */
193
194 /* Context for accumulating rmaps for an inode fork. */
195 struct xrep_rtrmap_ifork {
196 /*
197 * Accumulate rmap data here to turn multiple adjacent bmaps into a
198 * single rmap.
199 */
200 struct xfs_rmap_irec accum;
201
202 struct xrep_rtrmap *rr;
203 };
204
205 /* Stash an rmap that we accumulated while walking an inode fork. */
206 STATIC int
xrep_rtrmap_stash_accumulated(struct xrep_rtrmap_ifork * rf)207 xrep_rtrmap_stash_accumulated(
208 struct xrep_rtrmap_ifork *rf)
209 {
210 if (rf->accum.rm_blockcount == 0)
211 return 0;
212
213 return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
214 rf->accum.rm_blockcount, rf->accum.rm_owner,
215 rf->accum.rm_offset, rf->accum.rm_flags);
216 }
217
218 /* Accumulate a bmbt record. */
219 STATIC int
xrep_rtrmap_visit_bmbt(struct xfs_btree_cur * cur,struct xfs_bmbt_irec * rec,void * priv)220 xrep_rtrmap_visit_bmbt(
221 struct xfs_btree_cur *cur,
222 struct xfs_bmbt_irec *rec,
223 void *priv)
224 {
225 struct xrep_rtrmap_ifork *rf = priv;
226 struct xfs_rmap_irec *accum = &rf->accum;
227 struct xfs_mount *mp = rf->rr->sc->mp;
228 xfs_rgblock_t rgbno;
229 unsigned int rmap_flags = 0;
230 int error;
231
232 if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
233 rtg_rgno(rf->rr->sc->sr.rtg))
234 return 0;
235
236 if (rec->br_state == XFS_EXT_UNWRITTEN)
237 rmap_flags |= XFS_RMAP_UNWRITTEN;
238
239 /* If this bmap is adjacent to the previous one, just add it. */
240 rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
241 if (accum->rm_blockcount > 0 &&
242 rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
243 rgbno == accum->rm_startblock + accum->rm_blockcount &&
244 rmap_flags == accum->rm_flags) {
245 accum->rm_blockcount += rec->br_blockcount;
246 return 0;
247 }
248
249 /* Otherwise stash the old rmap and start accumulating a new one. */
250 error = xrep_rtrmap_stash_accumulated(rf);
251 if (error)
252 return error;
253
254 accum->rm_startblock = rgbno;
255 accum->rm_blockcount = rec->br_blockcount;
256 accum->rm_offset = rec->br_startoff;
257 accum->rm_flags = rmap_flags;
258 return 0;
259 }
260
261 /*
262 * Iterate the block mapping btree to collect rmap records for anything in this
263 * fork that maps to the rt volume. Sets @mappings_done to true if we've
264 * scanned the block mappings in this fork.
265 */
266 STATIC int
xrep_rtrmap_scan_bmbt(struct xrep_rtrmap_ifork * rf,struct xfs_inode * ip,bool * mappings_done)267 xrep_rtrmap_scan_bmbt(
268 struct xrep_rtrmap_ifork *rf,
269 struct xfs_inode *ip,
270 bool *mappings_done)
271 {
272 struct xrep_rtrmap *rr = rf->rr;
273 struct xfs_btree_cur *cur;
274 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
275 int error = 0;
276
277 *mappings_done = false;
278
279 /*
280 * If the incore extent cache is already loaded, we'll just use the
281 * incore extent scanner to record mappings. Don't bother walking the
282 * ondisk extent tree.
283 */
284 if (!xfs_need_iread_extents(ifp))
285 return 0;
286
287 /* Accumulate all the mappings in the bmap btree. */
288 cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
289 error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
290 xfs_btree_del_cursor(cur, error);
291 if (error)
292 return error;
293
294 /* Stash any remaining accumulated rmaps and exit. */
295 *mappings_done = true;
296 return xrep_rtrmap_stash_accumulated(rf);
297 }
298
299 /*
300 * Iterate the in-core extent cache to collect rmap records for anything in
301 * this fork that matches the AG.
302 */
303 STATIC int
xrep_rtrmap_scan_iext(struct xrep_rtrmap_ifork * rf,struct xfs_ifork * ifp)304 xrep_rtrmap_scan_iext(
305 struct xrep_rtrmap_ifork *rf,
306 struct xfs_ifork *ifp)
307 {
308 struct xfs_bmbt_irec rec;
309 struct xfs_iext_cursor icur;
310 int error;
311
312 for_each_xfs_iext(ifp, &icur, &rec) {
313 if (isnullstartblock(rec.br_startblock))
314 continue;
315 error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
316 if (error)
317 return error;
318 }
319
320 return xrep_rtrmap_stash_accumulated(rf);
321 }
322
323 /* Find all the extents on the realtime device mapped by an inode fork. */
324 STATIC int
xrep_rtrmap_scan_dfork(struct xrep_rtrmap * rr,struct xfs_inode * ip)325 xrep_rtrmap_scan_dfork(
326 struct xrep_rtrmap *rr,
327 struct xfs_inode *ip)
328 {
329 struct xrep_rtrmap_ifork rf = {
330 .accum = { .rm_owner = ip->i_ino, },
331 .rr = rr,
332 };
333 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
334 int error = 0;
335
336 if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
337 bool mappings_done;
338
339 /*
340 * Scan the bmbt for mappings. If the incore extent tree is
341 * loaded, we want to scan the cached mappings since that's
342 * faster when the extent counts are very high.
343 */
344 error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
345 if (error || mappings_done)
346 return error;
347 } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
348 /* realtime data forks should only be extents or btree */
349 return -EFSCORRUPTED;
350 }
351
352 /* Scan incore extent cache. */
353 return xrep_rtrmap_scan_iext(&rf, ifp);
354 }
355
356 /* Record reverse mappings for a file. */
357 STATIC int
xrep_rtrmap_scan_inode(struct xrep_rtrmap * rr,struct xfs_inode * ip)358 xrep_rtrmap_scan_inode(
359 struct xrep_rtrmap *rr,
360 struct xfs_inode *ip)
361 {
362 unsigned int lock_mode;
363 int error = 0;
364
365 /* Skip the rt rmap btree inode. */
366 if (rr->sc->ip == ip)
367 return 0;
368
369 lock_mode = xfs_ilock_data_map_shared(ip);
370
371 /* Check the data fork if it's on the realtime device. */
372 if (XFS_IS_REALTIME_INODE(ip)) {
373 error = xrep_rtrmap_scan_dfork(rr, ip);
374 if (error)
375 goto out_unlock;
376 }
377
378 xchk_iscan_mark_visited(&rr->iscan, ip);
379 out_unlock:
380 xfs_iunlock(ip, lock_mode);
381 return error;
382 }
383
384 /* Record extents that belong to the realtime rmap inode. */
385 STATIC int
xrep_rtrmap_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)386 xrep_rtrmap_walk_rmap(
387 struct xfs_btree_cur *cur,
388 const struct xfs_rmap_irec *rec,
389 void *priv)
390 {
391 struct xrep_rtrmap *rr = priv;
392 int error = 0;
393
394 if (xchk_should_terminate(rr->sc, &error))
395 return error;
396
397 /* Skip extents which are not owned by this inode and fork. */
398 if (rec->rm_owner != rr->sc->ip->i_ino)
399 return 0;
400
401 error = xrep_check_ino_btree_mapping(rr->sc, rec);
402 if (error)
403 return error;
404
405 return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
406 xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
407 rec->rm_blockcount);
408 }
409
410 /* Scan one AG for reverse mappings for the realtime rmap btree. */
411 STATIC int
xrep_rtrmap_scan_ag(struct xrep_rtrmap * rr,struct xfs_perag * pag)412 xrep_rtrmap_scan_ag(
413 struct xrep_rtrmap *rr,
414 struct xfs_perag *pag)
415 {
416 struct xfs_scrub *sc = rr->sc;
417 int error;
418
419 error = xrep_ag_init(sc, pag, &sc->sa);
420 if (error)
421 return error;
422
423 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
424 xchk_ag_free(sc, &sc->sa);
425 return error;
426 }
427
428 struct xrep_rtrmap_stash_run {
429 struct xrep_rtrmap *rr;
430 uint64_t owner;
431 };
432
433 static int
xrep_rtrmap_stash_run(uint32_t start,uint32_t len,void * priv)434 xrep_rtrmap_stash_run(
435 uint32_t start,
436 uint32_t len,
437 void *priv)
438 {
439 struct xrep_rtrmap_stash_run *rsr = priv;
440 struct xrep_rtrmap *rr = rsr->rr;
441 xfs_rgblock_t rgbno = start;
442
443 return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
444 }
445
446 /*
447 * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
448 * that the ranges are in units of FS blocks.
449 */
450 STATIC int
xrep_rtrmap_stash_bitmap(struct xrep_rtrmap * rr,struct xrgb_bitmap * bitmap,const struct xfs_owner_info * oinfo)451 xrep_rtrmap_stash_bitmap(
452 struct xrep_rtrmap *rr,
453 struct xrgb_bitmap *bitmap,
454 const struct xfs_owner_info *oinfo)
455 {
456 struct xrep_rtrmap_stash_run rsr = {
457 .rr = rr,
458 .owner = oinfo->oi_owner,
459 };
460
461 return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
462 }
463
464 /* Record a CoW staging extent. */
465 STATIC int
xrep_rtrmap_walk_cowblocks(struct xfs_btree_cur * cur,const struct xfs_refcount_irec * irec,void * priv)466 xrep_rtrmap_walk_cowblocks(
467 struct xfs_btree_cur *cur,
468 const struct xfs_refcount_irec *irec,
469 void *priv)
470 {
471 struct xrgb_bitmap *bitmap = priv;
472
473 if (!xfs_refcount_check_domain(irec) ||
474 irec->rc_domain != XFS_REFC_DOMAIN_COW)
475 return -EFSCORRUPTED;
476
477 return xrgb_bitmap_set(bitmap, irec->rc_startblock,
478 irec->rc_blockcount);
479 }
480
481 /*
482 * Collect rmaps for the blocks containing the refcount btree, and all CoW
483 * staging extents.
484 */
485 STATIC int
xrep_rtrmap_find_refcount_rmaps(struct xrep_rtrmap * rr)486 xrep_rtrmap_find_refcount_rmaps(
487 struct xrep_rtrmap *rr)
488 {
489 struct xrgb_bitmap cow_blocks; /* COWBIT */
490 struct xfs_refcount_irec low = {
491 .rc_startblock = 0,
492 .rc_domain = XFS_REFC_DOMAIN_COW,
493 };
494 struct xfs_refcount_irec high = {
495 .rc_startblock = -1U,
496 .rc_domain = XFS_REFC_DOMAIN_COW,
497 };
498 struct xfs_scrub *sc = rr->sc;
499 int error;
500
501 if (!xfs_has_rtreflink(sc->mp))
502 return 0;
503
504 xrgb_bitmap_init(&cow_blocks);
505
506 /* Collect rmaps for CoW staging extents. */
507 error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
508 xrep_rtrmap_walk_cowblocks, &cow_blocks);
509 if (error)
510 goto out_bitmap;
511
512 /* Generate rmaps for everything. */
513 error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
514 if (error)
515 goto out_bitmap;
516
517 out_bitmap:
518 xrgb_bitmap_destroy(&cow_blocks);
519 return error;
520 }
521
522 /* Count and check all collected records. */
523 STATIC int
xrep_rtrmap_check_record(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)524 xrep_rtrmap_check_record(
525 struct xfs_btree_cur *cur,
526 const struct xfs_rmap_irec *rec,
527 void *priv)
528 {
529 struct xrep_rtrmap *rr = priv;
530 int error;
531
532 error = xrep_rtrmap_check_mapping(rr->sc, rec);
533 if (error)
534 return error;
535
536 rr->nr_records++;
537 return 0;
538 }
539
540 /* Generate all the reverse-mappings for the realtime device. */
541 STATIC int
xrep_rtrmap_find_rmaps(struct xrep_rtrmap * rr)542 xrep_rtrmap_find_rmaps(
543 struct xrep_rtrmap *rr)
544 {
545 struct xfs_scrub *sc = rr->sc;
546 struct xfs_perag *pag = NULL;
547 struct xfs_inode *ip;
548 struct xfs_btree_cur *mcur;
549 int error;
550
551 /* Generate rmaps for the realtime superblock */
552 if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
553 error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
554 XFS_RMAP_OWN_FS, 0, 0);
555 if (error)
556 return error;
557 }
558
559 /* Find CoW staging extents. */
560 xrep_rtgroup_btcur_init(sc, &sc->sr);
561 error = xrep_rtrmap_find_refcount_rmaps(rr);
562 xchk_rtgroup_btcur_free(&sc->sr);
563 if (error)
564 return error;
565
566 /*
567 * Set up for a potentially lengthy filesystem scan by reducing our
568 * transaction resource usage for the duration. Specifically:
569 *
570 * Unlock the realtime metadata inodes and cancel the transaction to
571 * release the log grant space while we scan the filesystem.
572 *
573 * Create a new empty transaction to eliminate the possibility of the
574 * inode scan deadlocking on cyclical metadata.
575 *
576 * We pass the empty transaction to the file scanning function to avoid
577 * repeatedly cycling empty transactions. This can be done even though
578 * we take the IOLOCK to quiesce the file because empty transactions
579 * do not take sb_internal.
580 */
581 xchk_trans_cancel(sc);
582 xchk_rtgroup_unlock(&sc->sr);
583 error = xchk_trans_alloc_empty(sc);
584 if (error)
585 return error;
586
587 while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
588 error = xrep_rtrmap_scan_inode(rr, ip);
589 xchk_irele(sc, ip);
590 if (error)
591 break;
592
593 if (xchk_should_terminate(sc, &error))
594 break;
595 }
596 xchk_iscan_iter_finish(&rr->iscan);
597 if (error)
598 return error;
599
600 /*
601 * Switch out for a real transaction and lock the RT metadata in
602 * preparation for building a new tree.
603 */
604 xchk_trans_cancel(sc);
605 error = xchk_setup_rt(sc);
606 if (error)
607 return error;
608 error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
609 if (error)
610 return error;
611
612 /*
613 * If a hook failed to update the in-memory btree, we lack the data to
614 * continue the repair.
615 */
616 if (xchk_iscan_aborted(&rr->iscan))
617 return -EFSCORRUPTED;
618
619 /* Scan for old rtrmap blocks. */
620 while ((pag = xfs_perag_next(sc->mp, pag))) {
621 error = xrep_rtrmap_scan_ag(rr, pag);
622 if (error) {
623 xfs_perag_rele(pag);
624 return error;
625 }
626 }
627
628 /*
629 * Now that we have everything locked again, we need to count the
630 * number of rmap records stashed in the btree. This should reflect
631 * all actively-owned rt files in the filesystem. At the same time,
632 * check all our records before we start building a new btree, which
633 * requires the rtbitmap lock.
634 */
635 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
636 rr->nr_records = 0;
637 error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
638 xfs_btree_del_cursor(mcur, error);
639
640 return error;
641 }
642
643 /* Building the new rtrmap btree. */
644
645 /* Retrieve rtrmapbt data for bulk load. */
646 STATIC int
xrep_rtrmap_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)647 xrep_rtrmap_get_records(
648 struct xfs_btree_cur *cur,
649 unsigned int idx,
650 struct xfs_btree_block *block,
651 unsigned int nr_wanted,
652 void *priv)
653 {
654 struct xrep_rtrmap *rr = priv;
655 union xfs_btree_rec *block_rec;
656 unsigned int loaded;
657 int error;
658
659 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
660 int stat = 0;
661
662 error = xfs_btree_increment(rr->mcur, 0, &stat);
663 if (error)
664 return error;
665 if (!stat)
666 return -EFSCORRUPTED;
667
668 error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
669 if (error)
670 return error;
671 if (!stat)
672 return -EFSCORRUPTED;
673
674 block_rec = xfs_btree_rec_addr(cur, idx, block);
675 cur->bc_ops->init_rec_from_cur(cur, block_rec);
676 }
677
678 return loaded;
679 }
680
681 /* Feed one of the new btree blocks to the bulk loader. */
682 STATIC int
xrep_rtrmap_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)683 xrep_rtrmap_claim_block(
684 struct xfs_btree_cur *cur,
685 union xfs_btree_ptr *ptr,
686 void *priv)
687 {
688 struct xrep_rtrmap *rr = priv;
689
690 return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
691 }
692
693 /* Figure out how much space we need to create the incore btree root block. */
694 STATIC size_t
xrep_rtrmap_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)695 xrep_rtrmap_iroot_size(
696 struct xfs_btree_cur *cur,
697 unsigned int level,
698 unsigned int nr_this_level,
699 void *priv)
700 {
701 return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
702 }
703
704 /*
705 * Use the collected rmap information to stage a new rmap btree. If this is
706 * successful we'll return with the new btree root information logged to the
707 * repair transaction but not yet committed. This implements section (III)
708 * above.
709 */
710 STATIC int
xrep_rtrmap_build_new_tree(struct xrep_rtrmap * rr)711 xrep_rtrmap_build_new_tree(
712 struct xrep_rtrmap *rr)
713 {
714 struct xfs_scrub *sc = rr->sc;
715 struct xfs_rtgroup *rtg = sc->sr.rtg;
716 struct xfs_btree_cur *rmap_cur;
717 int error;
718
719 /*
720 * Prepare to construct the new btree by reserving disk space for the
721 * new btree and setting up all the accounting information we'll need
722 * to root the new btree while it's under construction and before we
723 * attach it to the realtime rmapbt inode.
724 */
725 error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
726 if (error)
727 return error;
728
729 rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
730 rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
731 rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
732
733 rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
734 xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
735
736 /* Compute how many blocks we'll need for the rmaps collected. */
737 error = xfs_btree_bload_compute_geometry(rmap_cur,
738 &rr->new_btree.bload, rr->nr_records);
739 if (error)
740 goto err_cur;
741
742 /* Last chance to abort before we start committing fixes. */
743 if (xchk_should_terminate(sc, &error))
744 goto err_cur;
745
746 /*
747 * Guess how many blocks we're going to need to rebuild an entire
748 * rtrmapbt from the number of extents we found, and pump up our
749 * transaction to have sufficient block reservation. We're allowed
750 * to exceed quota to repair inconsistent metadata, though this is
751 * unlikely.
752 */
753 error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
754 rr->new_btree.bload.nr_blocks, 0, true);
755 if (error)
756 goto err_cur;
757
758 /* Reserve the space we'll need for the new btree. */
759 error = xrep_newbt_alloc_blocks(&rr->new_btree,
760 rr->new_btree.bload.nr_blocks);
761 if (error)
762 goto err_cur;
763
764 /*
765 * Create a cursor to the in-memory btree so that we can bulk load the
766 * new btree.
767 */
768 rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
769 error = xfs_btree_goto_left_edge(rr->mcur);
770 if (error)
771 goto err_mcur;
772
773 /* Add all observed rmap records. */
774 rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
775 error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
776 if (error)
777 goto err_mcur;
778
779 /*
780 * Install the new rtrmap btree in the inode. After this point the old
781 * btree is no longer accessible, the new tree is live, and we can
782 * delete the cursor.
783 */
784 xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
785 xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
786 xfs_btree_del_cursor(rmap_cur, 0);
787 xfs_btree_del_cursor(rr->mcur, 0);
788 rr->mcur = NULL;
789
790 /*
791 * Now that we've written the new btree to disk, we don't need to keep
792 * updating the in-memory btree. Abort the scan to stop live updates.
793 */
794 xchk_iscan_abort(&rr->iscan);
795
796 /* Dispose of any unused blocks and the accounting information. */
797 error = xrep_newbt_commit(&rr->new_btree);
798 if (error)
799 return error;
800
801 return xrep_roll_trans(sc);
802
803 err_mcur:
804 xfs_btree_del_cursor(rr->mcur, error);
805 err_cur:
806 xfs_btree_del_cursor(rmap_cur, error);
807 xrep_newbt_cancel(&rr->new_btree);
808 return error;
809 }
810
811 /* Reaping the old btree. */
812
813 /* Reap the old rtrmapbt blocks. */
814 STATIC int
xrep_rtrmap_remove_old_tree(struct xrep_rtrmap * rr)815 xrep_rtrmap_remove_old_tree(
816 struct xrep_rtrmap *rr)
817 {
818 int error;
819
820 /*
821 * Free all the extents that were allocated to the former rtrmapbt and
822 * aren't cross-linked with something else.
823 */
824 error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
825 if (error)
826 return error;
827
828 /*
829 * Ensure the proper reservation for the rtrmap inode so that we don't
830 * fail to expand the new btree.
831 */
832 return xrep_reset_metafile_resv(rr->sc);
833 }
834
835 static inline bool
xrep_rtrmapbt_want_live_update(struct xchk_iscan * iscan,const struct xfs_owner_info * oi)836 xrep_rtrmapbt_want_live_update(
837 struct xchk_iscan *iscan,
838 const struct xfs_owner_info *oi)
839 {
840 if (xchk_iscan_aborted(iscan))
841 return false;
842
843 /*
844 * We scanned the CoW staging extents before we started the iscan, so
845 * we need all the updates.
846 */
847 if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
848 return true;
849
850 /* Ignore updates to files that the scanner hasn't visited yet. */
851 return xchk_iscan_want_live_update(iscan, oi->oi_owner);
852 }
853
854 /*
855 * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
856 * We're running from the thread that owns the rtrmap ILOCK and is generating
857 * the update, so we must be careful about which parts of the struct
858 * xrep_rtrmap that we change.
859 */
860 static int
xrep_rtrmapbt_live_update(struct notifier_block * nb,unsigned long action,void * data)861 xrep_rtrmapbt_live_update(
862 struct notifier_block *nb,
863 unsigned long action,
864 void *data)
865 {
866 struct xfs_rmap_update_params *p = data;
867 struct xrep_rtrmap *rr;
868 struct xfs_mount *mp;
869 struct xfs_btree_cur *mcur;
870 struct xfs_trans *tp;
871 void *txcookie;
872 int error;
873
874 rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
875 mp = rr->sc->mp;
876
877 if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
878 goto out_unlock;
879
880 trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
881
882 error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
883 if (error)
884 goto out_abort;
885
886 mutex_lock(&rr->lock);
887 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
888 error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
889 p->blockcount, &p->oinfo, p->unwritten);
890 xfs_btree_del_cursor(mcur, error);
891 if (error)
892 goto out_cancel;
893
894 error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
895 if (error)
896 goto out_cancel;
897
898 xrep_trans_cancel_hook_dummy(&txcookie, tp);
899 mutex_unlock(&rr->lock);
900 return NOTIFY_DONE;
901
902 out_cancel:
903 xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
904 xrep_trans_cancel_hook_dummy(&txcookie, tp);
905 out_abort:
906 xchk_iscan_abort(&rr->iscan);
907 mutex_unlock(&rr->lock);
908 out_unlock:
909 return NOTIFY_DONE;
910 }
911
912 /* Set up the filesystem scan components. */
913 STATIC int
xrep_rtrmap_setup_scan(struct xrep_rtrmap * rr)914 xrep_rtrmap_setup_scan(
915 struct xrep_rtrmap *rr)
916 {
917 struct xfs_scrub *sc = rr->sc;
918 int error;
919
920 mutex_init(&rr->lock);
921 xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
922
923 /* Set up some storage */
924 error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
925 rtg_rgno(sc->sr.rtg));
926 if (error)
927 goto out_bitmap;
928
929 /* Retry iget every tenth of a second for up to 30 seconds. */
930 xchk_iscan_start(sc, 30000, 100, &rr->iscan);
931
932 /*
933 * Hook into live rtrmap operations so that we can update our in-memory
934 * btree to reflect live changes on the filesystem. Since we drop the
935 * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
936 * installing a stale btree.
937 */
938 ASSERT(sc->flags & XCHK_FSGATES_RMAP);
939 xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
940 error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
941 if (error)
942 goto out_iscan;
943 return 0;
944
945 out_iscan:
946 xchk_iscan_teardown(&rr->iscan);
947 xfbtree_destroy(&rr->rtrmap_btree);
948 out_bitmap:
949 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
950 mutex_destroy(&rr->lock);
951 return error;
952 }
953
954 /* Tear down scan components. */
955 STATIC void
xrep_rtrmap_teardown(struct xrep_rtrmap * rr)956 xrep_rtrmap_teardown(
957 struct xrep_rtrmap *rr)
958 {
959 struct xfs_scrub *sc = rr->sc;
960
961 xchk_iscan_abort(&rr->iscan);
962 xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
963 xchk_iscan_teardown(&rr->iscan);
964 xfbtree_destroy(&rr->rtrmap_btree);
965 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
966 mutex_destroy(&rr->lock);
967 }
968
969 /* Repair the realtime rmap btree. */
970 int
xrep_rtrmapbt(struct xfs_scrub * sc)971 xrep_rtrmapbt(
972 struct xfs_scrub *sc)
973 {
974 struct xrep_rtrmap *rr = sc->buf;
975 int error;
976
977 /* Make sure any problems with the fork are fixed. */
978 error = xrep_metadata_inode_forks(sc);
979 if (error)
980 return error;
981
982 error = xrep_rtrmap_setup_scan(rr);
983 if (error)
984 return error;
985
986 /* Collect rmaps for realtime files. */
987 error = xrep_rtrmap_find_rmaps(rr);
988 if (error)
989 goto out_records;
990
991 xfs_trans_ijoin(sc->tp, sc->ip, 0);
992
993 /* Rebuild the rtrmap information. */
994 error = xrep_rtrmap_build_new_tree(rr);
995 if (error)
996 goto out_records;
997
998 /* Kill the old tree. */
999 error = xrep_rtrmap_remove_old_tree(rr);
1000 if (error)
1001 goto out_records;
1002
1003 out_records:
1004 xrep_rtrmap_teardown(rr);
1005 return error;
1006 }
1007