1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_btree_staging.h"
15 #include "xfs_bit.h"
16 #include "xfs_log_format.h"
17 #include "xfs_trans.h"
18 #include "xfs_sb.h"
19 #include "xfs_alloc.h"
20 #include "xfs_ialloc.h"
21 #include "xfs_rmap.h"
22 #include "xfs_rmap_btree.h"
23 #include "xfs_rtrmap_btree.h"
24 #include "xfs_refcount.h"
25 #include "xfs_rtrefcount_btree.h"
26 #include "xfs_error.h"
27 #include "xfs_health.h"
28 #include "xfs_inode.h"
29 #include "xfs_quota.h"
30 #include "xfs_rtalloc.h"
31 #include "xfs_ag.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_rtbitmap.h"
34 #include "scrub/xfs_scrub.h"
35 #include "scrub/scrub.h"
36 #include "scrub/common.h"
37 #include "scrub/btree.h"
38 #include "scrub/trace.h"
39 #include "scrub/repair.h"
40 #include "scrub/bitmap.h"
41 #include "scrub/fsb_bitmap.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/newbt.h"
45 #include "scrub/reap.h"
46 #include "scrub/rcbag.h"
47
48 /*
49 * Rebuilding the Reference Count Btree
50 * ====================================
51 *
52 * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
53 * entries as rectangles representing extents of physical blocks, and
54 * that the rectangles can be laid down to allow them to overlap each
55 * other; then we know that we must emit a refcnt btree entry wherever
56 * the amount of overlap changes, i.e. the emission stimulus is
57 * level-triggered:
58 *
59 * - ---
60 * -- ----- ---- --- ------
61 * -- ---- ----------- ---- ---------
62 * -------------------------------- -----------
63 * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
64 * 2 1 23 21 3 43 234 2123 1 01 2 3 0
65 *
66 * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
67 *
68 * Note that in the actual refcnt btree we don't store the refcount < 2
69 * cases because the bnobt tells us which blocks are free; single-use
70 * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
71 * supports storing multiple entries covering a given block we could
72 * theoretically dispense with the refcntbt and simply count rmaps, but
73 * that's inefficient in the (hot) write path, so we'll take the cost of
74 * the extra tree to save time. Also there's no guarantee that rmap
75 * will be enabled.
76 *
77 * Given an array of rmaps sorted by physical block number, a starting
78 * physical block (sp), a bag to hold rmaps that cover sp, and the next
79 * physical block where the level changes (np), we can reconstruct the
80 * rt refcount btree as follows:
81 *
82 * While there are still unprocessed rmaps in the array,
83 * - Set sp to the physical block (pblk) of the next unprocessed rmap.
84 * - Add to the bag all rmaps in the array where startblock == sp.
85 * - Set np to the physical block where the bag size will change. This
86 * is the minimum of (the pblk of the next unprocessed rmap) and
87 * (startblock + len of each rmap in the bag).
88 * - Record the bag size as old_bag_size.
89 *
90 * - While the bag isn't empty,
91 * - Remove from the bag all rmaps where startblock + len == np.
92 * - Add to the bag all rmaps in the array where startblock == np.
93 * - If the bag size isn't old_bag_size, store the refcount entry
94 * (sp, np - sp, bag_size) in the refcnt btree.
95 * - If the bag is empty, break out of the inner loop.
96 * - Set old_bag_size to the bag size
97 * - Set sp = np.
98 * - Set np to the physical block where the bag size will change.
99 * This is the minimum of (the pblk of the next unprocessed rmap)
100 * and (startblock + len of each rmap in the bag).
101 *
102 * Like all the other repairers, we make a list of all the refcount
103 * records we need, then reinitialize the rt refcount btree root and
104 * insert all the records.
105 */
106
107 struct xrep_rtrefc {
108 /* refcount extents */
109 struct xfarray *refcount_records;
110
111 /* new refcountbt information */
112 struct xrep_newbt new_btree;
113
114 /* old refcountbt blocks */
115 struct xfsb_bitmap old_rtrefcountbt_blocks;
116
117 struct xfs_scrub *sc;
118
119 /* get_records()'s position in the rt refcount record array. */
120 xfarray_idx_t array_cur;
121
122 /* # of refcountbt blocks */
123 xfs_filblks_t btblocks;
124 };
125
126 /* Set us up to repair refcount btrees. */
127 int
xrep_setup_rtrefcountbt(struct xfs_scrub * sc)128 xrep_setup_rtrefcountbt(
129 struct xfs_scrub *sc)
130 {
131 char *descr;
132 int error;
133
134 descr = xchk_xfile_ag_descr(sc, "rmap record bag");
135 error = xrep_setup_xfbtree(sc, descr);
136 kfree(descr);
137 return error;
138 }
139
140 /* Check for any obvious conflicts with this shared/CoW staging extent. */
141 STATIC int
xrep_rtrefc_check_ext(struct xfs_scrub * sc,const struct xfs_refcount_irec * rec)142 xrep_rtrefc_check_ext(
143 struct xfs_scrub *sc,
144 const struct xfs_refcount_irec *rec)
145 {
146 xfs_rgblock_t last;
147
148 if (xfs_rtrefcount_check_irec(sc->sr.rtg, rec) != NULL)
149 return -EFSCORRUPTED;
150
151 if (xfs_rgbno_to_rtxoff(sc->mp, rec->rc_startblock) != 0)
152 return -EFSCORRUPTED;
153
154 last = rec->rc_startblock + rec->rc_blockcount - 1;
155 if (xfs_rgbno_to_rtxoff(sc->mp, last) != sc->mp->m_sb.sb_rextsize - 1)
156 return -EFSCORRUPTED;
157
158 /* Make sure this isn't free space or misaligned. */
159 return xrep_require_rtext_inuse(sc, rec->rc_startblock,
160 rec->rc_blockcount);
161 }
162
163 /* Record a reference count extent. */
164 STATIC int
xrep_rtrefc_stash(struct xrep_rtrefc * rr,enum xfs_refc_domain domain,xfs_rgblock_t bno,xfs_extlen_t len,uint64_t refcount)165 xrep_rtrefc_stash(
166 struct xrep_rtrefc *rr,
167 enum xfs_refc_domain domain,
168 xfs_rgblock_t bno,
169 xfs_extlen_t len,
170 uint64_t refcount)
171 {
172 struct xfs_refcount_irec irec = {
173 .rc_startblock = bno,
174 .rc_blockcount = len,
175 .rc_refcount = refcount,
176 .rc_domain = domain,
177 };
178 int error = 0;
179
180 if (xchk_should_terminate(rr->sc, &error))
181 return error;
182
183 irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
184
185 error = xrep_rtrefc_check_ext(rr->sc, &irec);
186 if (error)
187 return error;
188
189 trace_xrep_refc_found(rtg_group(rr->sc->sr.rtg), &irec);
190
191 return xfarray_append(rr->refcount_records, &irec);
192 }
193
194 /* Record a CoW staging extent. */
195 STATIC int
xrep_rtrefc_stash_cow(struct xrep_rtrefc * rr,xfs_rgblock_t bno,xfs_extlen_t len)196 xrep_rtrefc_stash_cow(
197 struct xrep_rtrefc *rr,
198 xfs_rgblock_t bno,
199 xfs_extlen_t len)
200 {
201 return xrep_rtrefc_stash(rr, XFS_REFC_DOMAIN_COW, bno, len, 1);
202 }
203
204 /* Decide if an rmap could describe a shared extent. */
205 static inline bool
xrep_rtrefc_rmap_shareable(const struct xfs_rmap_irec * rmap)206 xrep_rtrefc_rmap_shareable(
207 const struct xfs_rmap_irec *rmap)
208 {
209 /* rt metadata are never sharable */
210 if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
211 return false;
212
213 /* Unwritten file blocks are not shareable. */
214 if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
215 return false;
216
217 return true;
218 }
219
220 /* Grab the next (abbreviated) rmap record from the rmapbt. */
221 STATIC int
xrep_rtrefc_walk_rmaps(struct xrep_rtrefc * rr,struct xfs_rmap_irec * rmap,bool * have_rec)222 xrep_rtrefc_walk_rmaps(
223 struct xrep_rtrefc *rr,
224 struct xfs_rmap_irec *rmap,
225 bool *have_rec)
226 {
227 struct xfs_btree_cur *cur = rr->sc->sr.rmap_cur;
228 struct xfs_mount *mp = cur->bc_mp;
229 int have_gt;
230 int error = 0;
231
232 *have_rec = false;
233
234 /*
235 * Loop through the remaining rmaps. Remember CoW staging
236 * extents and the refcountbt blocks from the old tree for later
237 * disposal. We can only share written data fork extents, so
238 * keep looping until we find an rmap for one.
239 */
240 do {
241 if (xchk_should_terminate(rr->sc, &error))
242 return error;
243
244 error = xfs_btree_increment(cur, 0, &have_gt);
245 if (error)
246 return error;
247 if (!have_gt)
248 return 0;
249
250 error = xfs_rmap_get_rec(cur, rmap, &have_gt);
251 if (error)
252 return error;
253 if (XFS_IS_CORRUPT(mp, !have_gt)) {
254 xfs_btree_mark_sick(cur);
255 return -EFSCORRUPTED;
256 }
257
258 if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
259 error = xrep_rtrefc_stash_cow(rr, rmap->rm_startblock,
260 rmap->rm_blockcount);
261 if (error)
262 return error;
263 } else if (xfs_is_sb_inum(mp, rmap->rm_owner) ||
264 (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
265 XFS_RMAP_BMBT_BLOCK))) {
266 xfs_btree_mark_sick(cur);
267 return -EFSCORRUPTED;
268 }
269 } while (!xrep_rtrefc_rmap_shareable(rmap));
270
271 *have_rec = true;
272 return 0;
273 }
274
275 static inline uint32_t
xrep_rtrefc_encode_startblock(const struct xfs_refcount_irec * irec)276 xrep_rtrefc_encode_startblock(
277 const struct xfs_refcount_irec *irec)
278 {
279 uint32_t start;
280
281 start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
282 if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
283 start |= XFS_REFC_COWFLAG;
284
285 return start;
286 }
287
288 /*
289 * Compare two refcount records. We want to sort in order of increasing block
290 * number.
291 */
292 static int
xrep_rtrefc_extent_cmp(const void * a,const void * b)293 xrep_rtrefc_extent_cmp(
294 const void *a,
295 const void *b)
296 {
297 const struct xfs_refcount_irec *ap = a;
298 const struct xfs_refcount_irec *bp = b;
299 uint32_t sa, sb;
300
301 sa = xrep_rtrefc_encode_startblock(ap);
302 sb = xrep_rtrefc_encode_startblock(bp);
303
304 if (sa > sb)
305 return 1;
306 if (sa < sb)
307 return -1;
308 return 0;
309 }
310
311 /*
312 * Sort the refcount extents by startblock or else the btree records will be in
313 * the wrong order. Make sure the records do not overlap in physical space.
314 */
315 STATIC int
xrep_rtrefc_sort_records(struct xrep_rtrefc * rr)316 xrep_rtrefc_sort_records(
317 struct xrep_rtrefc *rr)
318 {
319 struct xfs_refcount_irec irec;
320 xfarray_idx_t cur;
321 enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
322 xfs_rgblock_t next_rgbno = 0;
323 int error;
324
325 error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp,
326 XFARRAY_SORT_KILLABLE);
327 if (error)
328 return error;
329
330 foreach_xfarray_idx(rr->refcount_records, cur) {
331 if (xchk_should_terminate(rr->sc, &error))
332 return error;
333
334 error = xfarray_load(rr->refcount_records, cur, &irec);
335 if (error)
336 return error;
337
338 if (dom == XFS_REFC_DOMAIN_SHARED &&
339 irec.rc_domain == XFS_REFC_DOMAIN_COW) {
340 dom = irec.rc_domain;
341 next_rgbno = 0;
342 }
343
344 if (dom != irec.rc_domain)
345 return -EFSCORRUPTED;
346 if (irec.rc_startblock < next_rgbno)
347 return -EFSCORRUPTED;
348
349 next_rgbno = irec.rc_startblock + irec.rc_blockcount;
350 }
351
352 return error;
353 }
354
355 /* Record extents that belong to the realtime refcount inode. */
356 STATIC int
xrep_rtrefc_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)357 xrep_rtrefc_walk_rmap(
358 struct xfs_btree_cur *cur,
359 const struct xfs_rmap_irec *rec,
360 void *priv)
361 {
362 struct xrep_rtrefc *rr = priv;
363 int error = 0;
364
365 if (xchk_should_terminate(rr->sc, &error))
366 return error;
367
368 /* Skip extents which are not owned by this inode and fork. */
369 if (rec->rm_owner != rr->sc->ip->i_ino)
370 return 0;
371
372 error = xrep_check_ino_btree_mapping(rr->sc, rec);
373 if (error)
374 return error;
375
376 return xfsb_bitmap_set(&rr->old_rtrefcountbt_blocks,
377 xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
378 rec->rm_blockcount);
379 }
380
381 /*
382 * Walk forward through the rmap btree to collect all rmaps starting at
383 * @bno in @rmap_bag. These represent the file(s) that share ownership of
384 * the current block. Upon return, the rmap cursor points to the last record
385 * satisfying the startblock constraint.
386 */
387 static int
xrep_rtrefc_push_rmaps_at(struct xrep_rtrefc * rr,struct rcbag * rcstack,xfs_rgblock_t bno,struct xfs_rmap_irec * rmap,bool * have)388 xrep_rtrefc_push_rmaps_at(
389 struct xrep_rtrefc *rr,
390 struct rcbag *rcstack,
391 xfs_rgblock_t bno,
392 struct xfs_rmap_irec *rmap,
393 bool *have)
394 {
395 struct xfs_scrub *sc = rr->sc;
396 int have_gt;
397 int error;
398
399 while (*have && rmap->rm_startblock == bno) {
400 error = rcbag_add(rcstack, rr->sc->tp, rmap);
401 if (error)
402 return error;
403
404 error = xrep_rtrefc_walk_rmaps(rr, rmap, have);
405 if (error)
406 return error;
407 }
408
409 error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
410 if (error)
411 return error;
412 if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
413 xfs_btree_mark_sick(sc->sr.rmap_cur);
414 return -EFSCORRUPTED;
415 }
416
417 return 0;
418 }
419
420 /* Scan one AG for reverse mappings for the realtime refcount btree. */
421 STATIC int
xrep_rtrefc_scan_ag(struct xrep_rtrefc * rr,struct xfs_perag * pag)422 xrep_rtrefc_scan_ag(
423 struct xrep_rtrefc *rr,
424 struct xfs_perag *pag)
425 {
426 struct xfs_scrub *sc = rr->sc;
427 int error;
428
429 error = xrep_ag_init(sc, pag, &sc->sa);
430 if (error)
431 return error;
432
433 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
434 xchk_ag_free(sc, &sc->sa);
435 return error;
436 }
437
438 /* Iterate all the rmap records to generate reference count data. */
439 STATIC int
xrep_rtrefc_find_refcounts(struct xrep_rtrefc * rr)440 xrep_rtrefc_find_refcounts(
441 struct xrep_rtrefc *rr)
442 {
443 struct xfs_scrub *sc = rr->sc;
444 struct rcbag *rcstack;
445 struct xfs_perag *pag = NULL;
446 uint64_t old_stack_height;
447 xfs_rgblock_t sbno;
448 xfs_rgblock_t cbno;
449 xfs_rgblock_t nbno;
450 bool have;
451 int error;
452
453 /* Scan for old rtrefc btree blocks. */
454 while ((pag = xfs_perag_next(sc->mp, pag))) {
455 error = xrep_rtrefc_scan_ag(rr, pag);
456 if (error) {
457 xfs_perag_rele(pag);
458 return error;
459 }
460 }
461
462 xrep_rtgroup_btcur_init(sc, &sc->sr);
463
464 /*
465 * Set up a bag to store all the rmap records that we're tracking to
466 * generate a reference count record. If this exceeds
467 * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
468 */
469 error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
470 if (error)
471 goto out_cur;
472
473 /* Start the rtrmapbt cursor to the left of all records. */
474 error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
475 if (error)
476 goto out_bag;
477
478 /* Process reverse mappings into refcount data. */
479 while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
480 struct xfs_rmap_irec rmap;
481
482 /* Push all rmaps with pblk == sbno onto the stack */
483 error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
484 if (error)
485 goto out_bag;
486 if (!have)
487 break;
488 sbno = cbno = rmap.rm_startblock;
489 error = xrep_rtrefc_push_rmaps_at(rr, rcstack, sbno, &rmap,
490 &have);
491 if (error)
492 goto out_bag;
493
494 /* Set nbno to the bno of the next refcount change */
495 error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
496 if (error)
497 goto out_bag;
498
499 ASSERT(nbno > sbno);
500 old_stack_height = rcbag_count(rcstack);
501
502 /* While stack isn't empty... */
503 while (rcbag_count(rcstack) > 0) {
504 /* Pop all rmaps that end at nbno */
505 error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
506 if (error)
507 goto out_bag;
508
509 /* Push array items that start at nbno */
510 error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
511 if (error)
512 goto out_bag;
513 if (have) {
514 error = xrep_rtrefc_push_rmaps_at(rr, rcstack,
515 nbno, &rmap, &have);
516 if (error)
517 goto out_bag;
518 }
519
520 /* Emit refcount if necessary */
521 ASSERT(nbno > cbno);
522 if (rcbag_count(rcstack) != old_stack_height) {
523 if (old_stack_height > 1) {
524 error = xrep_rtrefc_stash(rr,
525 XFS_REFC_DOMAIN_SHARED,
526 cbno, nbno - cbno,
527 old_stack_height);
528 if (error)
529 goto out_bag;
530 }
531 cbno = nbno;
532 }
533
534 /* Stack empty, go find the next rmap */
535 if (rcbag_count(rcstack) == 0)
536 break;
537 old_stack_height = rcbag_count(rcstack);
538 sbno = nbno;
539
540 /* Set nbno to the bno of the next refcount change */
541 error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
542 &nbno);
543 if (error)
544 goto out_bag;
545
546 ASSERT(nbno > sbno);
547 }
548 }
549
550 ASSERT(rcbag_count(rcstack) == 0);
551 out_bag:
552 rcbag_free(&rcstack);
553 out_cur:
554 xchk_rtgroup_btcur_free(&sc->sr);
555 return error;
556 }
557
558 /* Retrieve refcountbt data for bulk load. */
559 STATIC int
xrep_rtrefc_get_records(struct xfs_btree_cur * cur,unsigned int idx,struct xfs_btree_block * block,unsigned int nr_wanted,void * priv)560 xrep_rtrefc_get_records(
561 struct xfs_btree_cur *cur,
562 unsigned int idx,
563 struct xfs_btree_block *block,
564 unsigned int nr_wanted,
565 void *priv)
566 {
567 struct xrep_rtrefc *rr = priv;
568 union xfs_btree_rec *block_rec;
569 unsigned int loaded;
570 int error;
571
572 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
573 error = xfarray_load(rr->refcount_records, rr->array_cur++,
574 &cur->bc_rec.rc);
575 if (error)
576 return error;
577
578 block_rec = xfs_btree_rec_addr(cur, idx, block);
579 cur->bc_ops->init_rec_from_cur(cur, block_rec);
580 }
581
582 return loaded;
583 }
584
585 /* Feed one of the new btree blocks to the bulk loader. */
586 STATIC int
xrep_rtrefc_claim_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,void * priv)587 xrep_rtrefc_claim_block(
588 struct xfs_btree_cur *cur,
589 union xfs_btree_ptr *ptr,
590 void *priv)
591 {
592 struct xrep_rtrefc *rr = priv;
593
594 return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
595 }
596
597 /* Figure out how much space we need to create the incore btree root block. */
598 STATIC size_t
xrep_rtrefc_iroot_size(struct xfs_btree_cur * cur,unsigned int level,unsigned int nr_this_level,void * priv)599 xrep_rtrefc_iroot_size(
600 struct xfs_btree_cur *cur,
601 unsigned int level,
602 unsigned int nr_this_level,
603 void *priv)
604 {
605 return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
606 nr_this_level);
607 }
608
609 /*
610 * Use the collected refcount information to stage a new rt refcount btree. If
611 * this is successful we'll return with the new btree root information logged
612 * to the repair transaction but not yet committed.
613 */
614 STATIC int
xrep_rtrefc_build_new_tree(struct xrep_rtrefc * rr)615 xrep_rtrefc_build_new_tree(
616 struct xrep_rtrefc *rr)
617 {
618 struct xfs_scrub *sc = rr->sc;
619 struct xfs_rtgroup *rtg = sc->sr.rtg;
620 struct xfs_btree_cur *refc_cur;
621 int error;
622
623 error = xrep_rtrefc_sort_records(rr);
624 if (error)
625 return error;
626
627 /*
628 * Prepare to construct the new btree by reserving disk space for the
629 * new btree and setting up all the accounting information we'll need
630 * to root the new btree while it's under construction and before we
631 * attach it to the realtime refcount inode.
632 */
633 error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
634 if (error)
635 return error;
636
637 rr->new_btree.bload.get_records = xrep_rtrefc_get_records;
638 rr->new_btree.bload.claim_block = xrep_rtrefc_claim_block;
639 rr->new_btree.bload.iroot_size = xrep_rtrefc_iroot_size;
640
641 refc_cur = xfs_rtrefcountbt_init_cursor(NULL, rtg);
642 xfs_btree_stage_ifakeroot(refc_cur, &rr->new_btree.ifake);
643
644 /* Compute how many blocks we'll need. */
645 error = xfs_btree_bload_compute_geometry(refc_cur, &rr->new_btree.bload,
646 xfarray_length(rr->refcount_records));
647 if (error)
648 goto err_cur;
649
650 /* Last chance to abort before we start committing fixes. */
651 if (xchk_should_terminate(sc, &error))
652 goto err_cur;
653
654 /*
655 * Guess how many blocks we're going to need to rebuild an entire
656 * rtrefcountbt from the number of extents we found, and pump up our
657 * transaction to have sufficient block reservation. We're allowed
658 * to exceed quota to repair inconsistent metadata, though this is
659 * unlikely.
660 */
661 error = xfs_trans_reserve_more_inode(sc->tp, rtg_refcount(rtg),
662 rr->new_btree.bload.nr_blocks, 0, true);
663 if (error)
664 goto err_cur;
665
666 /* Reserve the space we'll need for the new btree. */
667 error = xrep_newbt_alloc_blocks(&rr->new_btree,
668 rr->new_btree.bload.nr_blocks);
669 if (error)
670 goto err_cur;
671
672 /* Add all observed refcount records. */
673 rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
674 rr->array_cur = XFARRAY_CURSOR_INIT;
675 error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
676 if (error)
677 goto err_cur;
678
679 /*
680 * Install the new rtrefc btree in the inode. After this point the old
681 * btree is no longer accessible, the new tree is live, and we can
682 * delete the cursor.
683 */
684 xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
685 xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
686 xfs_btree_del_cursor(refc_cur, 0);
687
688 /* Dispose of any unused blocks and the accounting information. */
689 error = xrep_newbt_commit(&rr->new_btree);
690 if (error)
691 return error;
692
693 return xrep_roll_trans(sc);
694 err_cur:
695 xfs_btree_del_cursor(refc_cur, error);
696 xrep_newbt_cancel(&rr->new_btree);
697 return error;
698 }
699
700 /* Rebuild the rt refcount btree. */
701 int
xrep_rtrefcountbt(struct xfs_scrub * sc)702 xrep_rtrefcountbt(
703 struct xfs_scrub *sc)
704 {
705 struct xrep_rtrefc *rr;
706 struct xfs_mount *mp = sc->mp;
707 char *descr;
708 int error;
709
710 /* We require the rmapbt to rebuild anything. */
711 if (!xfs_has_rtrmapbt(mp))
712 return -EOPNOTSUPP;
713
714 /* Make sure any problems with the fork are fixed. */
715 error = xrep_metadata_inode_forks(sc);
716 if (error)
717 return error;
718
719 rr = kzalloc(sizeof(struct xrep_rtrefc), XCHK_GFP_FLAGS);
720 if (!rr)
721 return -ENOMEM;
722 rr->sc = sc;
723
724 /* Set up enough storage to handle one refcount record per rt extent. */
725 descr = xchk_xfile_ag_descr(sc, "reference count records");
726 error = xfarray_create(descr, mp->m_sb.sb_rextents,
727 sizeof(struct xfs_refcount_irec),
728 &rr->refcount_records);
729 kfree(descr);
730 if (error)
731 goto out_rr;
732
733 /* Collect all reference counts. */
734 xfsb_bitmap_init(&rr->old_rtrefcountbt_blocks);
735 error = xrep_rtrefc_find_refcounts(rr);
736 if (error)
737 goto out_bitmap;
738
739 xfs_trans_ijoin(sc->tp, sc->ip, 0);
740
741 /* Rebuild the refcount information. */
742 error = xrep_rtrefc_build_new_tree(rr);
743 if (error)
744 goto out_bitmap;
745
746 /*
747 * Free all the extents that were allocated to the former rtrefcountbt
748 * and aren't cross-linked with something else.
749 */
750 error = xrep_reap_metadir_fsblocks(rr->sc,
751 &rr->old_rtrefcountbt_blocks);
752 if (error)
753 goto out_bitmap;
754
755 out_bitmap:
756 xfsb_bitmap_destroy(&rr->old_rtrefcountbt_blocks);
757 xfarray_destroy(rr->refcount_records);
758 out_rr:
759 kfree(rr);
760 return error;
761 }
762