xref: /linux/fs/xfs/scrub/rtbitmap_repair.c (revision 7f81907b7e3f93dfed2e903af52659baa4944341)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans.h"
15 #include "xfs_rtalloc.h"
16 #include "xfs_inode.h"
17 #include "xfs_bit.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rtrmap_btree.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_rtbitmap.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_extent_busy.h"
26 #include "xfs_refcount.h"
27 #include "scrub/scrub.h"
28 #include "scrub/common.h"
29 #include "scrub/trace.h"
30 #include "scrub/repair.h"
31 #include "scrub/xfile.h"
32 #include "scrub/tempfile.h"
33 #include "scrub/tempexch.h"
34 #include "scrub/reap.h"
35 #include "scrub/rtbitmap.h"
36 
37 /* rt bitmap content repairs */
38 
39 /* Set up to repair the realtime bitmap for this group. */
40 int
41 xrep_setup_rtbitmap(
42 	struct xfs_scrub	*sc,
43 	struct xchk_rtbitmap	*rtb)
44 {
45 	struct xfs_mount	*mp = sc->mp;
46 	char			*descr;
47 	unsigned long long	blocks = mp->m_sb.sb_rbmblocks;
48 	int			error;
49 
50 	error = xrep_tempfile_create(sc, S_IFREG);
51 	if (error)
52 		return error;
53 
54 	/* Create an xfile to hold our reconstructed bitmap. */
55 	descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
56 	error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
57 	kfree(descr);
58 	if (error)
59 		return error;
60 
61 	/*
62 	 * Reserve enough blocks to write out a completely new bitmap file,
63 	 * plus twice as many blocks as we would need if we can only allocate
64 	 * one block per data fork mapping.  This should cover the
65 	 * preallocation of the temporary file and exchanging the extent
66 	 * mappings.
67 	 *
68 	 * We cannot use xfs_exchmaps_estimate because we have not yet
69 	 * constructed the replacement bitmap and therefore do not know how
70 	 * many extents it will use.  By the time we do, we will have a dirty
71 	 * transaction (which we cannot drop because we cannot drop the
72 	 * rtbitmap ILOCK) and cannot ask for more reservation.
73 	 */
74 	blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
75 	if (blocks > UINT_MAX)
76 		return -EOPNOTSUPP;
77 
78 	rtb->resblks += blocks;
79 	return 0;
80 }
81 
82 static inline xrep_wordoff_t
83 rtx_to_wordoff(
84 	struct xfs_mount	*mp,
85 	xfs_rtxnum_t		rtx)
86 {
87 	return rtx >> XFS_NBWORDLOG;
88 }
89 
90 static inline xrep_wordcnt_t
91 rtxlen_to_wordcnt(
92 	xfs_rtxlen_t	rtxlen)
93 {
94 	return rtxlen >> XFS_NBWORDLOG;
95 }
96 
97 /* Helper functions to record rtwords in an xfile. */
98 
99 static inline int
100 xfbmp_load(
101 	struct xchk_rtbitmap	*rtb,
102 	xrep_wordoff_t		wordoff,
103 	xfs_rtword_t		*word)
104 {
105 	union xfs_rtword_raw	urk;
106 	int			error;
107 
108 	ASSERT(xfs_has_rtgroups(rtb->sc->mp));
109 
110 	error = xfile_load(rtb->sc->xfile, &urk,
111 			sizeof(union xfs_rtword_raw),
112 			wordoff << XFS_WORDLOG);
113 	if (error)
114 		return error;
115 
116 	*word = be32_to_cpu(urk.rtg);
117 	return 0;
118 }
119 
120 static inline int
121 xfbmp_store(
122 	struct xchk_rtbitmap	*rtb,
123 	xrep_wordoff_t		wordoff,
124 	const xfs_rtword_t	word)
125 {
126 	union xfs_rtword_raw	urk;
127 
128 	ASSERT(xfs_has_rtgroups(rtb->sc->mp));
129 
130 	urk.rtg = cpu_to_be32(word);
131 	return xfile_store(rtb->sc->xfile, &urk,
132 			sizeof(union xfs_rtword_raw),
133 			wordoff << XFS_WORDLOG);
134 }
135 
136 static inline int
137 xfbmp_copyin(
138 	struct xchk_rtbitmap	*rtb,
139 	xrep_wordoff_t		wordoff,
140 	const union xfs_rtword_raw	*word,
141 	xrep_wordcnt_t		nr_words)
142 {
143 	return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
144 			wordoff << XFS_WORDLOG);
145 }
146 
147 static inline int
148 xfbmp_copyout(
149 	struct xchk_rtbitmap	*rtb,
150 	xrep_wordoff_t		wordoff,
151 	union xfs_rtword_raw	*word,
152 	xrep_wordcnt_t		nr_words)
153 {
154 	return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
155 			wordoff << XFS_WORDLOG);
156 }
157 
158 /* Perform a logical OR operation on an rtword in the incore bitmap. */
159 static int
160 xrep_rtbitmap_or(
161 	struct xchk_rtbitmap	*rtb,
162 	xrep_wordoff_t		wordoff,
163 	xfs_rtword_t		mask)
164 {
165 	xfs_rtword_t		word;
166 	int			error;
167 
168 	error = xfbmp_load(rtb, wordoff, &word);
169 	if (error)
170 		return error;
171 
172 	trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
173 
174 	return xfbmp_store(rtb, wordoff, word | mask);
175 }
176 
177 /*
178  * Mark as free every rt extent between the next rt block we expected to see
179  * in the rtrmap records and the given rt block.
180  */
181 STATIC int
182 xrep_rtbitmap_mark_free(
183 	struct xchk_rtbitmap	*rtb,
184 	xfs_rgblock_t		rgbno)
185 {
186 	struct xfs_mount	*mp = rtb->sc->mp;
187 	struct xchk_rt		*sr = &rtb->sc->sr;
188 	struct xfs_rtgroup	*rtg = sr->rtg;
189 	xfs_rtxnum_t		startrtx;
190 	xfs_rtxnum_t		nextrtx;
191 	xrep_wordoff_t		wordoff, nextwordoff;
192 	unsigned int		bit;
193 	unsigned int		bufwsize;
194 	xfs_extlen_t		mod;
195 	xfs_rtword_t		mask;
196 	enum xbtree_recpacking	outcome;
197 	int			error;
198 
199 	if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
200 		return -EFSCORRUPTED;
201 
202 	/*
203 	 * Convert rt blocks to rt extents  The block range we find must be
204 	 * aligned to an rtextent boundary on both ends.
205 	 */
206 	startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
207 	mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
208 	if (mod)
209 		return -EFSCORRUPTED;
210 
211 	nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
212 	mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
213 	if (mod != mp->m_sb.sb_rextsize - 1)
214 		return -EFSCORRUPTED;
215 
216 	/* Must not be shared or CoW staging. */
217 	if (sr->refc_cur) {
218 		error = xfs_refcount_has_records(sr->refc_cur,
219 				XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
220 				rgbno - rtb->next_rgbno, &outcome);
221 		if (error)
222 			return error;
223 		if (outcome != XBTREE_RECPACKING_EMPTY)
224 			return -EFSCORRUPTED;
225 
226 		error = xfs_refcount_has_records(sr->refc_cur,
227 				XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
228 				rgbno - rtb->next_rgbno, &outcome);
229 		if (error)
230 			return error;
231 		if (outcome != XBTREE_RECPACKING_EMPTY)
232 			return -EFSCORRUPTED;
233 	}
234 
235 	trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
236 
237 	/* Set bits as needed to round startrtx up to the nearest word. */
238 	bit = startrtx & XREP_RTBMP_WORDMASK;
239 	if (bit) {
240 		xfs_rtblock_t	len = nextrtx - startrtx;
241 		unsigned int	lastbit;
242 
243 		lastbit = min(bit + len, XFS_NBWORD);
244 		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
245 
246 		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
247 				mask);
248 		if (error || lastbit - bit == len)
249 			return error;
250 		startrtx += XFS_NBWORD - bit;
251 	}
252 
253 	/* Set bits as needed to round nextrtx down to the nearest word. */
254 	bit = nextrtx & XREP_RTBMP_WORDMASK;
255 	if (bit) {
256 		mask = ((xfs_rtword_t)1 << bit) - 1;
257 
258 		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
259 				mask);
260 		if (error || startrtx + bit == nextrtx)
261 			return error;
262 		nextrtx -= bit;
263 	}
264 
265 	trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
266 
267 	/* Set all the words in between, up to a whole fs block at once. */
268 	wordoff = rtx_to_wordoff(mp, startrtx);
269 	nextwordoff = rtx_to_wordoff(mp, nextrtx);
270 	bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
271 
272 	while (wordoff < nextwordoff) {
273 		xrep_wordoff_t	rem;
274 		xrep_wordcnt_t	wordcnt;
275 
276 		wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
277 				bufwsize);
278 
279 		/*
280 		 * Try to keep us aligned to the rtwords buffer to reduce the
281 		 * number of xfile writes.
282 		 */
283 		rem = wordoff & (bufwsize - 1);
284 		if (rem)
285 			wordcnt = min_t(xrep_wordcnt_t, wordcnt,
286 					bufwsize - rem);
287 
288 		error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
289 		if (error)
290 			return error;
291 
292 		wordoff += wordcnt;
293 	}
294 
295 	return 0;
296 }
297 
298 /* Set free space in the rtbitmap based on rtrmapbt records. */
299 STATIC int
300 xrep_rtbitmap_walk_rtrmap(
301 	struct xfs_btree_cur		*cur,
302 	const struct xfs_rmap_irec	*rec,
303 	void				*priv)
304 {
305 	struct xchk_rtbitmap		*rtb = priv;
306 	int				error = 0;
307 
308 	if (xchk_should_terminate(rtb->sc, &error))
309 		return error;
310 
311 	if (rtb->next_rgbno < rec->rm_startblock) {
312 		error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
313 		if (error)
314 			return error;
315 	}
316 
317 	rtb->next_rgbno = max(rtb->next_rgbno,
318 			      rec->rm_startblock + rec->rm_blockcount);
319 	return 0;
320 }
321 
322 /*
323  * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
324  * in the realtime bitmap that we're computing.
325  */
326 STATIC int
327 xrep_rtbitmap_find_freespace(
328 	struct xchk_rtbitmap	*rtb)
329 {
330 	struct xfs_scrub	*sc = rtb->sc;
331 	struct xfs_mount	*mp = sc->mp;
332 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
333 	uint64_t		blockcount;
334 	int			error;
335 
336 	/* Prepare a buffer of ones so that we can accelerate bulk setting. */
337 	memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
338 
339 	xrep_rtgroup_btcur_init(sc, &sc->sr);
340 	error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
341 			rtb);
342 	if (error)
343 		goto out;
344 
345 	/*
346 	 * Mark as free every possible rt extent from the last one we saw to
347 	 * the end of the rt group.
348 	 */
349 	blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
350 	if (rtb->next_rgbno < blockcount) {
351 		error = xrep_rtbitmap_mark_free(rtb, blockcount);
352 		if (error)
353 			goto out;
354 	}
355 
356 out:
357 	xchk_rtgroup_btcur_free(&sc->sr);
358 	return error;
359 }
360 
361 static int
362 xrep_rtbitmap_prep_buf(
363 	struct xfs_scrub	*sc,
364 	struct xfs_buf		*bp,
365 	void			*data)
366 {
367 	struct xchk_rtbitmap	*rtb = data;
368 	struct xfs_mount	*mp = sc->mp;
369 	union xfs_rtword_raw	*ondisk;
370 	int			error;
371 
372 	rtb->args.mp = sc->mp;
373 	rtb->args.tp = sc->tp;
374 	rtb->args.rbmbp = bp;
375 	ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
376 	rtb->args.rbmbp = NULL;
377 
378 	error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
379 			mp->m_blockwsize);
380 	if (error)
381 		return error;
382 
383 	if (xfs_has_rtgroups(sc->mp)) {
384 		struct xfs_rtbuf_blkinfo	*hdr = bp->b_addr;
385 
386 		hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
387 		hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
388 		hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
389 		hdr->rt_lsn = 0;
390 		uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
391 		bp->b_ops = &xfs_rtbitmap_buf_ops;
392 	} else {
393 		bp->b_ops = &xfs_rtbuf_ops;
394 	}
395 
396 	rtb->prep_wordoff += mp->m_blockwsize;
397 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
398 	return 0;
399 }
400 
401 /*
402  * Make sure that the given range of the data fork of the realtime file is
403  * mapped to written blocks.  The caller must ensure that the inode is joined
404  * to the transaction.
405  */
406 STATIC int
407 xrep_rtbitmap_data_mappings(
408 	struct xfs_scrub	*sc,
409 	xfs_filblks_t		len)
410 {
411 	struct xfs_bmbt_irec	map;
412 	xfs_fileoff_t		off = 0;
413 	int			error;
414 
415 	ASSERT(sc->ip != NULL);
416 
417 	while (off < len) {
418 		int		nmaps = 1;
419 
420 		/*
421 		 * If we have a real extent mapping this block then we're
422 		 * in ok shape.
423 		 */
424 		error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
425 				XFS_DATA_FORK);
426 		if (error)
427 			return error;
428 		if (nmaps == 0) {
429 			ASSERT(nmaps != 0);
430 			return -EFSCORRUPTED;
431 		}
432 
433 		/*
434 		 * Written extents are ok.  Holes are not filled because we
435 		 * do not know the freespace information.
436 		 */
437 		if (xfs_bmap_is_written_extent(&map) ||
438 		    map.br_startblock == HOLESTARTBLOCK) {
439 			off = map.br_startoff + map.br_blockcount;
440 			continue;
441 		}
442 
443 		/*
444 		 * If we find a delalloc reservation then something is very
445 		 * very wrong.  Bail out.
446 		 */
447 		if (map.br_startblock == DELAYSTARTBLOCK)
448 			return -EFSCORRUPTED;
449 
450 		/* Make sure we're really converting an unwritten extent. */
451 		if (map.br_state != XFS_EXT_UNWRITTEN) {
452 			ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
453 			return -EFSCORRUPTED;
454 		}
455 
456 		/* Make sure this block has a real zeroed extent mapped. */
457 		nmaps = 1;
458 		error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
459 				map.br_blockcount,
460 				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
461 				0, &map, &nmaps);
462 		if (error)
463 			return error;
464 
465 		/* Commit new extent and all deferred work. */
466 		error = xrep_defer_finish(sc);
467 		if (error)
468 			return error;
469 
470 		off = map.br_startoff + map.br_blockcount;
471 	}
472 
473 	return 0;
474 }
475 
476 /* Fix broken rt volume geometry. */
477 STATIC int
478 xrep_rtbitmap_geometry(
479 	struct xfs_scrub	*sc,
480 	struct xchk_rtbitmap	*rtb)
481 {
482 	struct xfs_mount	*mp = sc->mp;
483 	struct xfs_trans	*tp = sc->tp;
484 
485 	/* Superblock fields */
486 	if (mp->m_sb.sb_rextents != rtb->rextents)
487 		xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
488 				rtb->rextents - mp->m_sb.sb_rextents);
489 
490 	if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
491 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
492 				rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
493 
494 	if (mp->m_sb.sb_rextslog != rtb->rextslog)
495 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
496 				rtb->rextslog - mp->m_sb.sb_rextslog);
497 
498 	/* Fix broken isize */
499 	sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
500 					 mp->m_sb.sb_blocksize);
501 
502 	if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
503 		sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
504 
505 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
506 	return xrep_roll_trans(sc);
507 }
508 
509 /* Repair the realtime bitmap file metadata. */
510 int
511 xrep_rtbitmap(
512 	struct xfs_scrub	*sc)
513 {
514 	struct xchk_rtbitmap	*rtb = sc->buf;
515 	struct xfs_mount	*mp = sc->mp;
516 	struct xfs_group	*xg = rtg_group(sc->sr.rtg);
517 	unsigned long long	blocks = 0;
518 	unsigned int		busy_gen;
519 	int			error;
520 
521 	/* We require the realtime rmapbt to rebuild anything. */
522 	if (!xfs_has_rtrmapbt(sc->mp))
523 		return -EOPNOTSUPP;
524 	/* We require atomic file exchange range to rebuild anything. */
525 	if (!xfs_has_exchange_range(sc->mp))
526 		return -EOPNOTSUPP;
527 
528 	/* Impossibly large rtbitmap means we can't touch the filesystem. */
529 	if (rtb->rbmblocks > U32_MAX)
530 		return 0;
531 
532 	/*
533 	 * If the size of the rt bitmap file is larger than what we reserved,
534 	 * figure out if we need to adjust the block reservation in the
535 	 * transaction.
536 	 */
537 	blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
538 	if (blocks > UINT_MAX)
539 		return -EOPNOTSUPP;
540 	if (blocks > rtb->resblks) {
541 		error = xfs_trans_reserve_more(sc->tp, blocks, 0);
542 		if (error)
543 			return error;
544 
545 		rtb->resblks += blocks;
546 	}
547 
548 	/* Fix inode core and forks. */
549 	error = xrep_metadata_inode_forks(sc);
550 	if (error)
551 		return error;
552 
553 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
554 
555 	/* Ensure no unwritten extents. */
556 	error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
557 	if (error)
558 		return error;
559 
560 	/*
561 	 * Fix inconsistent bitmap geometry.  This function returns with a
562 	 * clean scrub transaction.
563 	 */
564 	error = xrep_rtbitmap_geometry(sc, rtb);
565 	if (error)
566 		return error;
567 
568 	/*
569 	 * Make sure the busy extent list is clear because we can't put extents
570 	 * on there twice.
571 	 */
572 	if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
573 		error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
574 		if (error)
575 			return error;
576 	}
577 
578 	/*
579 	 * Generate the new rtbitmap data.  We don't need the rtbmp information
580 	 * once this call is finished.
581 	 */
582 	error = xrep_rtbitmap_find_freespace(rtb);
583 	if (error)
584 		return error;
585 
586 	/*
587 	 * Try to take ILOCK_EXCL of the temporary file.  We had better be the
588 	 * only ones holding onto this inode, but we can't block while holding
589 	 * the rtbitmap file's ILOCK_EXCL.
590 	 */
591 	while (!xrep_tempfile_ilock_nowait(sc)) {
592 		if (xchk_should_terminate(sc, &error))
593 			return error;
594 		delay(1);
595 	}
596 
597 	/*
598 	 * Make sure we have space allocated for the part of the bitmap
599 	 * file that corresponds to this group.  We already joined sc->ip.
600 	 */
601 	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
602 	error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
603 	if (error)
604 		return error;
605 
606 	/* Last chance to abort before we start committing fixes. */
607 	if (xchk_should_terminate(sc, &error))
608 		return error;
609 
610 	/* Copy the bitmap file that we generated. */
611 	error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
612 			xrep_rtbitmap_prep_buf, rtb);
613 	if (error)
614 		return error;
615 	error = xrep_tempfile_set_isize(sc,
616 			XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
617 	if (error)
618 		return error;
619 
620 	/*
621 	 * Now exchange the data fork contents.  We're done with the temporary
622 	 * buffer, so we can reuse it for the tempfile exchmaps information.
623 	 */
624 	error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
625 			rtb->rbmblocks, &rtb->tempexch);
626 	if (error)
627 		return error;
628 
629 	error = xrep_tempexch_contents(sc, &rtb->tempexch);
630 	if (error)
631 		return error;
632 
633 	/* Free the old rtbitmap blocks if they're not in use. */
634 	return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
635 }
636