1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans.h"
15 #include "xfs_rtalloc.h"
16 #include "xfs_inode.h"
17 #include "xfs_bit.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rtrmap_btree.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_rtbitmap.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_extent_busy.h"
26 #include "xfs_refcount.h"
27 #include "scrub/scrub.h"
28 #include "scrub/common.h"
29 #include "scrub/trace.h"
30 #include "scrub/repair.h"
31 #include "scrub/xfile.h"
32 #include "scrub/tempfile.h"
33 #include "scrub/tempexch.h"
34 #include "scrub/reap.h"
35 #include "scrub/rtbitmap.h"
36
37 /* rt bitmap content repairs */
38
39 /* Set up to repair the realtime bitmap for this group. */
40 int
xrep_setup_rtbitmap(struct xfs_scrub * sc,struct xchk_rtbitmap * rtb)41 xrep_setup_rtbitmap(
42 struct xfs_scrub *sc,
43 struct xchk_rtbitmap *rtb)
44 {
45 struct xfs_mount *mp = sc->mp;
46 char *descr;
47 unsigned long long blocks = mp->m_sb.sb_rbmblocks;
48 int error;
49
50 error = xrep_tempfile_create(sc, S_IFREG);
51 if (error)
52 return error;
53
54 /* Create an xfile to hold our reconstructed bitmap. */
55 descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
56 error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
57 kfree(descr);
58 if (error)
59 return error;
60
61 /*
62 * Reserve enough blocks to write out a completely new bitmap file,
63 * plus twice as many blocks as we would need if we can only allocate
64 * one block per data fork mapping. This should cover the
65 * preallocation of the temporary file and exchanging the extent
66 * mappings.
67 *
68 * We cannot use xfs_exchmaps_estimate because we have not yet
69 * constructed the replacement bitmap and therefore do not know how
70 * many extents it will use. By the time we do, we will have a dirty
71 * transaction (which we cannot drop because we cannot drop the
72 * rtbitmap ILOCK) and cannot ask for more reservation.
73 */
74 blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
75 if (blocks > UINT_MAX)
76 return -EOPNOTSUPP;
77
78 rtb->resblks += blocks;
79 return 0;
80 }
81
82 static inline xrep_wordoff_t
rtx_to_wordoff(struct xfs_mount * mp,xfs_rtxnum_t rtx)83 rtx_to_wordoff(
84 struct xfs_mount *mp,
85 xfs_rtxnum_t rtx)
86 {
87 return rtx >> XFS_NBWORDLOG;
88 }
89
90 static inline xrep_wordcnt_t
rtxlen_to_wordcnt(xfs_rtxlen_t rtxlen)91 rtxlen_to_wordcnt(
92 xfs_rtxlen_t rtxlen)
93 {
94 return rtxlen >> XFS_NBWORDLOG;
95 }
96
97 /* Helper functions to record rtwords in an xfile. */
98
99 static inline int
xfbmp_load(struct xchk_rtbitmap * rtb,xrep_wordoff_t wordoff,xfs_rtword_t * word)100 xfbmp_load(
101 struct xchk_rtbitmap *rtb,
102 xrep_wordoff_t wordoff,
103 xfs_rtword_t *word)
104 {
105 union xfs_rtword_raw urk;
106 int error;
107
108 ASSERT(xfs_has_rtgroups(rtb->sc->mp));
109
110 error = xfile_load(rtb->sc->xfile, &urk,
111 sizeof(union xfs_rtword_raw),
112 wordoff << XFS_WORDLOG);
113 if (error)
114 return error;
115
116 *word = be32_to_cpu(urk.rtg);
117 return 0;
118 }
119
120 static inline int
xfbmp_store(struct xchk_rtbitmap * rtb,xrep_wordoff_t wordoff,const xfs_rtword_t word)121 xfbmp_store(
122 struct xchk_rtbitmap *rtb,
123 xrep_wordoff_t wordoff,
124 const xfs_rtword_t word)
125 {
126 union xfs_rtword_raw urk;
127
128 ASSERT(xfs_has_rtgroups(rtb->sc->mp));
129
130 urk.rtg = cpu_to_be32(word);
131 return xfile_store(rtb->sc->xfile, &urk,
132 sizeof(union xfs_rtword_raw),
133 wordoff << XFS_WORDLOG);
134 }
135
136 static inline int
xfbmp_copyin(struct xchk_rtbitmap * rtb,xrep_wordoff_t wordoff,const union xfs_rtword_raw * word,xrep_wordcnt_t nr_words)137 xfbmp_copyin(
138 struct xchk_rtbitmap *rtb,
139 xrep_wordoff_t wordoff,
140 const union xfs_rtword_raw *word,
141 xrep_wordcnt_t nr_words)
142 {
143 return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
144 wordoff << XFS_WORDLOG);
145 }
146
147 static inline int
xfbmp_copyout(struct xchk_rtbitmap * rtb,xrep_wordoff_t wordoff,union xfs_rtword_raw * word,xrep_wordcnt_t nr_words)148 xfbmp_copyout(
149 struct xchk_rtbitmap *rtb,
150 xrep_wordoff_t wordoff,
151 union xfs_rtword_raw *word,
152 xrep_wordcnt_t nr_words)
153 {
154 return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
155 wordoff << XFS_WORDLOG);
156 }
157
158 /* Perform a logical OR operation on an rtword in the incore bitmap. */
159 static int
xrep_rtbitmap_or(struct xchk_rtbitmap * rtb,xrep_wordoff_t wordoff,xfs_rtword_t mask)160 xrep_rtbitmap_or(
161 struct xchk_rtbitmap *rtb,
162 xrep_wordoff_t wordoff,
163 xfs_rtword_t mask)
164 {
165 xfs_rtword_t word;
166 int error;
167
168 error = xfbmp_load(rtb, wordoff, &word);
169 if (error)
170 return error;
171
172 trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
173
174 return xfbmp_store(rtb, wordoff, word | mask);
175 }
176
177 /*
178 * Mark as free every rt extent between the next rt block we expected to see
179 * in the rtrmap records and the given rt block.
180 */
181 STATIC int
xrep_rtbitmap_mark_free(struct xchk_rtbitmap * rtb,xfs_rgblock_t rgbno)182 xrep_rtbitmap_mark_free(
183 struct xchk_rtbitmap *rtb,
184 xfs_rgblock_t rgbno)
185 {
186 struct xfs_mount *mp = rtb->sc->mp;
187 struct xchk_rt *sr = &rtb->sc->sr;
188 struct xfs_rtgroup *rtg = sr->rtg;
189 xfs_rtxnum_t startrtx;
190 xfs_rtxnum_t nextrtx;
191 xrep_wordoff_t wordoff, nextwordoff;
192 unsigned int bit;
193 unsigned int bufwsize;
194 xfs_extlen_t mod;
195 xfs_rtword_t mask;
196 enum xbtree_recpacking outcome;
197 int error;
198
199 if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
200 return -EFSCORRUPTED;
201
202 /*
203 * Convert rt blocks to rt extents The block range we find must be
204 * aligned to an rtextent boundary on both ends.
205 */
206 startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
207 mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
208 if (mod)
209 return -EFSCORRUPTED;
210
211 nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
212 mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
213 if (mod != mp->m_sb.sb_rextsize - 1)
214 return -EFSCORRUPTED;
215
216 /* Must not be shared or CoW staging. */
217 if (sr->refc_cur) {
218 error = xfs_refcount_has_records(sr->refc_cur,
219 XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
220 rgbno - rtb->next_rgbno, &outcome);
221 if (error)
222 return error;
223 if (outcome != XBTREE_RECPACKING_EMPTY)
224 return -EFSCORRUPTED;
225
226 error = xfs_refcount_has_records(sr->refc_cur,
227 XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
228 rgbno - rtb->next_rgbno, &outcome);
229 if (error)
230 return error;
231 if (outcome != XBTREE_RECPACKING_EMPTY)
232 return -EFSCORRUPTED;
233 }
234
235 trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
236
237 /* Set bits as needed to round startrtx up to the nearest word. */
238 bit = startrtx & XREP_RTBMP_WORDMASK;
239 if (bit) {
240 xfs_rtblock_t len = nextrtx - startrtx;
241 unsigned int lastbit;
242
243 lastbit = min(bit + len, XFS_NBWORD);
244 mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
245
246 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
247 mask);
248 if (error || lastbit - bit == len)
249 return error;
250 startrtx += XFS_NBWORD - bit;
251 }
252
253 /* Set bits as needed to round nextrtx down to the nearest word. */
254 bit = nextrtx & XREP_RTBMP_WORDMASK;
255 if (bit) {
256 mask = ((xfs_rtword_t)1 << bit) - 1;
257
258 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
259 mask);
260 if (error || startrtx + bit == nextrtx)
261 return error;
262 nextrtx -= bit;
263 }
264
265 trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
266
267 /* Set all the words in between, up to a whole fs block at once. */
268 wordoff = rtx_to_wordoff(mp, startrtx);
269 nextwordoff = rtx_to_wordoff(mp, nextrtx);
270 bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
271
272 while (wordoff < nextwordoff) {
273 xrep_wordoff_t rem;
274 xrep_wordcnt_t wordcnt;
275
276 wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
277 bufwsize);
278
279 /*
280 * Try to keep us aligned to the rtwords buffer to reduce the
281 * number of xfile writes.
282 */
283 rem = wordoff & (bufwsize - 1);
284 if (rem)
285 wordcnt = min_t(xrep_wordcnt_t, wordcnt,
286 bufwsize - rem);
287
288 error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
289 if (error)
290 return error;
291
292 wordoff += wordcnt;
293 }
294
295 return 0;
296 }
297
298 /* Set free space in the rtbitmap based on rtrmapbt records. */
299 STATIC int
xrep_rtbitmap_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)300 xrep_rtbitmap_walk_rtrmap(
301 struct xfs_btree_cur *cur,
302 const struct xfs_rmap_irec *rec,
303 void *priv)
304 {
305 struct xchk_rtbitmap *rtb = priv;
306 int error = 0;
307
308 if (xchk_should_terminate(rtb->sc, &error))
309 return error;
310
311 if (rtb->next_rgbno < rec->rm_startblock) {
312 error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
313 if (error)
314 return error;
315 }
316
317 rtb->next_rgbno = max(rtb->next_rgbno,
318 rec->rm_startblock + rec->rm_blockcount);
319 return 0;
320 }
321
322 /*
323 * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
324 * in the realtime bitmap that we're computing.
325 */
326 STATIC int
xrep_rtbitmap_find_freespace(struct xchk_rtbitmap * rtb)327 xrep_rtbitmap_find_freespace(
328 struct xchk_rtbitmap *rtb)
329 {
330 struct xfs_scrub *sc = rtb->sc;
331 struct xfs_mount *mp = sc->mp;
332 struct xfs_rtgroup *rtg = sc->sr.rtg;
333 uint64_t blockcount;
334 int error;
335
336 /* Prepare a buffer of ones so that we can accelerate bulk setting. */
337 memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
338
339 xrep_rtgroup_btcur_init(sc, &sc->sr);
340 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
341 rtb);
342 if (error)
343 goto out;
344
345 /*
346 * Mark as free every possible rt extent from the last one we saw to
347 * the end of the rt group.
348 */
349 blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
350 if (rtb->next_rgbno < blockcount) {
351 error = xrep_rtbitmap_mark_free(rtb, blockcount);
352 if (error)
353 goto out;
354 }
355
356 out:
357 xchk_rtgroup_btcur_free(&sc->sr);
358 return error;
359 }
360
361 static int
xrep_rtbitmap_prep_buf(struct xfs_scrub * sc,struct xfs_buf * bp,void * data)362 xrep_rtbitmap_prep_buf(
363 struct xfs_scrub *sc,
364 struct xfs_buf *bp,
365 void *data)
366 {
367 struct xchk_rtbitmap *rtb = data;
368 struct xfs_mount *mp = sc->mp;
369 union xfs_rtword_raw *ondisk;
370 int error;
371
372 rtb->args.mp = sc->mp;
373 rtb->args.tp = sc->tp;
374 rtb->args.rbmbp = bp;
375 ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
376 rtb->args.rbmbp = NULL;
377
378 error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
379 mp->m_blockwsize);
380 if (error)
381 return error;
382
383 if (xfs_has_rtgroups(sc->mp)) {
384 struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
385
386 hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
387 hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
388 hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
389 hdr->rt_lsn = 0;
390 uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
391 bp->b_ops = &xfs_rtbitmap_buf_ops;
392 } else {
393 bp->b_ops = &xfs_rtbuf_ops;
394 }
395
396 rtb->prep_wordoff += mp->m_blockwsize;
397 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
398 return 0;
399 }
400
401 /*
402 * Make sure that the given range of the data fork of the realtime file is
403 * mapped to written blocks. The caller must ensure that the inode is joined
404 * to the transaction.
405 */
406 STATIC int
xrep_rtbitmap_data_mappings(struct xfs_scrub * sc,xfs_filblks_t len)407 xrep_rtbitmap_data_mappings(
408 struct xfs_scrub *sc,
409 xfs_filblks_t len)
410 {
411 struct xfs_bmbt_irec map;
412 xfs_fileoff_t off = 0;
413 int error;
414
415 ASSERT(sc->ip != NULL);
416
417 while (off < len) {
418 int nmaps = 1;
419
420 /*
421 * If we have a real extent mapping this block then we're
422 * in ok shape.
423 */
424 error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
425 XFS_DATA_FORK);
426 if (error)
427 return error;
428 if (nmaps == 0) {
429 ASSERT(nmaps != 0);
430 return -EFSCORRUPTED;
431 }
432
433 /*
434 * Written extents are ok. Holes are not filled because we
435 * do not know the freespace information.
436 */
437 if (xfs_bmap_is_written_extent(&map) ||
438 map.br_startblock == HOLESTARTBLOCK) {
439 off = map.br_startoff + map.br_blockcount;
440 continue;
441 }
442
443 /*
444 * If we find a delalloc reservation then something is very
445 * very wrong. Bail out.
446 */
447 if (map.br_startblock == DELAYSTARTBLOCK)
448 return -EFSCORRUPTED;
449
450 /* Make sure we're really converting an unwritten extent. */
451 if (map.br_state != XFS_EXT_UNWRITTEN) {
452 ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
453 return -EFSCORRUPTED;
454 }
455
456 /* Make sure this block has a real zeroed extent mapped. */
457 nmaps = 1;
458 error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
459 map.br_blockcount,
460 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
461 0, &map, &nmaps);
462 if (error)
463 return error;
464
465 /* Commit new extent and all deferred work. */
466 error = xrep_defer_finish(sc);
467 if (error)
468 return error;
469
470 off = map.br_startoff + map.br_blockcount;
471 }
472
473 return 0;
474 }
475
476 /* Fix broken rt volume geometry. */
477 STATIC int
xrep_rtbitmap_geometry(struct xfs_scrub * sc,struct xchk_rtbitmap * rtb)478 xrep_rtbitmap_geometry(
479 struct xfs_scrub *sc,
480 struct xchk_rtbitmap *rtb)
481 {
482 struct xfs_mount *mp = sc->mp;
483 struct xfs_trans *tp = sc->tp;
484
485 /* Superblock fields */
486 if (mp->m_sb.sb_rextents != rtb->rextents)
487 xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
488 rtb->rextents - mp->m_sb.sb_rextents);
489
490 if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
491 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
492 rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
493
494 if (mp->m_sb.sb_rextslog != rtb->rextslog)
495 xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
496 rtb->rextslog - mp->m_sb.sb_rextslog);
497
498 /* Fix broken isize */
499 sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
500 mp->m_sb.sb_blocksize);
501
502 if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
503 sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
504
505 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
506 return xrep_roll_trans(sc);
507 }
508
509 /* Repair the realtime bitmap file metadata. */
510 int
xrep_rtbitmap(struct xfs_scrub * sc)511 xrep_rtbitmap(
512 struct xfs_scrub *sc)
513 {
514 struct xchk_rtbitmap *rtb = sc->buf;
515 struct xfs_mount *mp = sc->mp;
516 struct xfs_group *xg = rtg_group(sc->sr.rtg);
517 unsigned long long blocks = 0;
518 unsigned int busy_gen;
519 int error;
520
521 /* We require the realtime rmapbt to rebuild anything. */
522 if (!xfs_has_rtrmapbt(sc->mp))
523 return -EOPNOTSUPP;
524 /* We require atomic file exchange range to rebuild anything. */
525 if (!xfs_has_exchange_range(sc->mp))
526 return -EOPNOTSUPP;
527
528 /* Impossibly large rtbitmap means we can't touch the filesystem. */
529 if (rtb->rbmblocks > U32_MAX)
530 return 0;
531
532 /*
533 * If the size of the rt bitmap file is larger than what we reserved,
534 * figure out if we need to adjust the block reservation in the
535 * transaction.
536 */
537 blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
538 if (blocks > UINT_MAX)
539 return -EOPNOTSUPP;
540 if (blocks > rtb->resblks) {
541 error = xfs_trans_reserve_more(sc->tp, blocks, 0);
542 if (error)
543 return error;
544
545 rtb->resblks += blocks;
546 }
547
548 /* Fix inode core and forks. */
549 error = xrep_metadata_inode_forks(sc);
550 if (error)
551 return error;
552
553 xfs_trans_ijoin(sc->tp, sc->ip, 0);
554
555 /* Ensure no unwritten extents. */
556 error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
557 if (error)
558 return error;
559
560 /*
561 * Fix inconsistent bitmap geometry. This function returns with a
562 * clean scrub transaction.
563 */
564 error = xrep_rtbitmap_geometry(sc, rtb);
565 if (error)
566 return error;
567
568 /*
569 * Make sure the busy extent list is clear because we can't put extents
570 * on there twice.
571 */
572 if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
573 error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
574 if (error)
575 return error;
576 }
577
578 /*
579 * Generate the new rtbitmap data. We don't need the rtbmp information
580 * once this call is finished.
581 */
582 error = xrep_rtbitmap_find_freespace(rtb);
583 if (error)
584 return error;
585
586 /*
587 * Try to take ILOCK_EXCL of the temporary file. We had better be the
588 * only ones holding onto this inode, but we can't block while holding
589 * the rtbitmap file's ILOCK_EXCL.
590 */
591 while (!xrep_tempfile_ilock_nowait(sc)) {
592 if (xchk_should_terminate(sc, &error))
593 return error;
594 delay(1);
595 }
596
597 /*
598 * Make sure we have space allocated for the part of the bitmap
599 * file that corresponds to this group. We already joined sc->ip.
600 */
601 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
602 error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
603 if (error)
604 return error;
605
606 /* Last chance to abort before we start committing fixes. */
607 if (xchk_should_terminate(sc, &error))
608 return error;
609
610 /* Copy the bitmap file that we generated. */
611 error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
612 xrep_rtbitmap_prep_buf, rtb);
613 if (error)
614 return error;
615 error = xrep_tempfile_set_isize(sc,
616 XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
617 if (error)
618 return error;
619
620 /*
621 * Now exchange the data fork contents. We're done with the temporary
622 * buffer, so we can reuse it for the tempfile exchmaps information.
623 */
624 error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
625 rtb->rbmblocks, &rtb->tempexch);
626 if (error)
627 return error;
628
629 error = xrep_tempexch_contents(sc, &rtb->tempexch);
630 if (error)
631 return error;
632
633 /* Free the old rtbitmap blocks if they're not in use. */
634 return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
635 }
636