xref: /linux/fs/xfs/scrub/rtbitmap_repair.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_log_format.h"
14 #include "xfs_trans.h"
15 #include "xfs_rtalloc.h"
16 #include "xfs_inode.h"
17 #include "xfs_bit.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rtrmap_btree.h"
22 #include "xfs_exchmaps.h"
23 #include "xfs_rtbitmap.h"
24 #include "xfs_rtgroup.h"
25 #include "xfs_extent_busy.h"
26 #include "xfs_refcount.h"
27 #include "scrub/scrub.h"
28 #include "scrub/common.h"
29 #include "scrub/trace.h"
30 #include "scrub/repair.h"
31 #include "scrub/xfile.h"
32 #include "scrub/tempfile.h"
33 #include "scrub/tempexch.h"
34 #include "scrub/reap.h"
35 #include "scrub/rtbitmap.h"
36 
37 /* rt bitmap content repairs */
38 
39 /* Set up to repair the realtime bitmap for this group. */
40 int
41 xrep_setup_rtbitmap(
42 	struct xfs_scrub	*sc,
43 	struct xchk_rtbitmap	*rtb)
44 {
45 	struct xfs_mount	*mp = sc->mp;
46 	unsigned long long	blocks = mp->m_sb.sb_rbmblocks;
47 	int			error;
48 
49 	error = xrep_tempfile_create(sc, S_IFREG);
50 	if (error)
51 		return error;
52 
53 	/* Create an xfile to hold our reconstructed bitmap. */
54 	error = xfile_create("realtime bitmap file",
55 			blocks * mp->m_sb.sb_blocksize, &sc->xfile);
56 	if (error)
57 		return error;
58 
59 	/*
60 	 * Reserve enough blocks to write out a completely new bitmap file,
61 	 * plus twice as many blocks as we would need if we can only allocate
62 	 * one block per data fork mapping.  This should cover the
63 	 * preallocation of the temporary file and exchanging the extent
64 	 * mappings.
65 	 *
66 	 * We cannot use xfs_exchmaps_estimate because we have not yet
67 	 * constructed the replacement bitmap and therefore do not know how
68 	 * many extents it will use.  By the time we do, we will have a dirty
69 	 * transaction (which we cannot drop because we cannot drop the
70 	 * rtbitmap ILOCK) and cannot ask for more reservation.
71 	 */
72 	blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
73 	if (blocks > UINT_MAX)
74 		return -EOPNOTSUPP;
75 
76 	rtb->resblks += blocks;
77 	return 0;
78 }
79 
80 static inline xrep_wordoff_t
81 rtx_to_wordoff(
82 	struct xfs_mount	*mp,
83 	xfs_rtxnum_t		rtx)
84 {
85 	return rtx >> XFS_NBWORDLOG;
86 }
87 
88 static inline xrep_wordcnt_t
89 rtxlen_to_wordcnt(
90 	xfs_rtxlen_t	rtxlen)
91 {
92 	return rtxlen >> XFS_NBWORDLOG;
93 }
94 
95 /* Helper functions to record rtwords in an xfile. */
96 
97 static inline int
98 xfbmp_load(
99 	struct xchk_rtbitmap	*rtb,
100 	xrep_wordoff_t		wordoff,
101 	xfs_rtword_t		*word)
102 {
103 	union xfs_rtword_raw	urk;
104 	int			error;
105 
106 	ASSERT(xfs_has_rtgroups(rtb->sc->mp));
107 
108 	error = xfile_load(rtb->sc->xfile, &urk,
109 			sizeof(union xfs_rtword_raw),
110 			wordoff << XFS_WORDLOG);
111 	if (error)
112 		return error;
113 
114 	*word = be32_to_cpu(urk.rtg);
115 	return 0;
116 }
117 
118 static inline int
119 xfbmp_store(
120 	struct xchk_rtbitmap	*rtb,
121 	xrep_wordoff_t		wordoff,
122 	const xfs_rtword_t	word)
123 {
124 	union xfs_rtword_raw	urk;
125 
126 	ASSERT(xfs_has_rtgroups(rtb->sc->mp));
127 
128 	urk.rtg = cpu_to_be32(word);
129 	return xfile_store(rtb->sc->xfile, &urk,
130 			sizeof(union xfs_rtword_raw),
131 			wordoff << XFS_WORDLOG);
132 }
133 
134 static inline int
135 xfbmp_copyin(
136 	struct xchk_rtbitmap	*rtb,
137 	xrep_wordoff_t		wordoff,
138 	const union xfs_rtword_raw	*word,
139 	xrep_wordcnt_t		nr_words)
140 {
141 	return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
142 			wordoff << XFS_WORDLOG);
143 }
144 
145 static inline int
146 xfbmp_copyout(
147 	struct xchk_rtbitmap	*rtb,
148 	xrep_wordoff_t		wordoff,
149 	union xfs_rtword_raw	*word,
150 	xrep_wordcnt_t		nr_words)
151 {
152 	return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
153 			wordoff << XFS_WORDLOG);
154 }
155 
156 /* Perform a logical OR operation on an rtword in the incore bitmap. */
157 static int
158 xrep_rtbitmap_or(
159 	struct xchk_rtbitmap	*rtb,
160 	xrep_wordoff_t		wordoff,
161 	xfs_rtword_t		mask)
162 {
163 	xfs_rtword_t		word;
164 	int			error;
165 
166 	error = xfbmp_load(rtb, wordoff, &word);
167 	if (error)
168 		return error;
169 
170 	trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
171 
172 	return xfbmp_store(rtb, wordoff, word | mask);
173 }
174 
175 /*
176  * Mark as free every rt extent between the next rt block we expected to see
177  * in the rtrmap records and the given rt block.
178  */
179 STATIC int
180 xrep_rtbitmap_mark_free(
181 	struct xchk_rtbitmap	*rtb,
182 	xfs_rgblock_t		rgbno)
183 {
184 	struct xfs_mount	*mp = rtb->sc->mp;
185 	struct xchk_rt		*sr = &rtb->sc->sr;
186 	struct xfs_rtgroup	*rtg = sr->rtg;
187 	xfs_rtxnum_t		startrtx;
188 	xfs_rtxnum_t		nextrtx;
189 	xrep_wordoff_t		wordoff, nextwordoff;
190 	unsigned int		bit;
191 	unsigned int		bufwsize;
192 	xfs_extlen_t		mod;
193 	xfs_rtword_t		mask;
194 	enum xbtree_recpacking	outcome;
195 	int			error;
196 
197 	if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
198 		return -EFSCORRUPTED;
199 
200 	/*
201 	 * Convert rt blocks to rt extents  The block range we find must be
202 	 * aligned to an rtextent boundary on both ends.
203 	 */
204 	startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
205 	mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
206 	if (mod)
207 		return -EFSCORRUPTED;
208 
209 	nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
210 	mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
211 	if (mod != mp->m_sb.sb_rextsize - 1)
212 		return -EFSCORRUPTED;
213 
214 	/* Must not be shared or CoW staging. */
215 	if (sr->refc_cur) {
216 		error = xfs_refcount_has_records(sr->refc_cur,
217 				XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
218 				rgbno - rtb->next_rgbno, &outcome);
219 		if (error)
220 			return error;
221 		if (outcome != XBTREE_RECPACKING_EMPTY)
222 			return -EFSCORRUPTED;
223 
224 		error = xfs_refcount_has_records(sr->refc_cur,
225 				XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
226 				rgbno - rtb->next_rgbno, &outcome);
227 		if (error)
228 			return error;
229 		if (outcome != XBTREE_RECPACKING_EMPTY)
230 			return -EFSCORRUPTED;
231 	}
232 
233 	trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
234 
235 	/* Set bits as needed to round startrtx up to the nearest word. */
236 	bit = startrtx & XREP_RTBMP_WORDMASK;
237 	if (bit) {
238 		xfs_rtblock_t	len = nextrtx - startrtx;
239 		unsigned int	lastbit;
240 
241 		lastbit = min(bit + len, XFS_NBWORD);
242 		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
243 
244 		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
245 				mask);
246 		if (error || lastbit - bit == len)
247 			return error;
248 		startrtx += XFS_NBWORD - bit;
249 	}
250 
251 	/* Set bits as needed to round nextrtx down to the nearest word. */
252 	bit = nextrtx & XREP_RTBMP_WORDMASK;
253 	if (bit) {
254 		mask = ((xfs_rtword_t)1 << bit) - 1;
255 
256 		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
257 				mask);
258 		if (error || startrtx + bit == nextrtx)
259 			return error;
260 		nextrtx -= bit;
261 	}
262 
263 	trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
264 
265 	/* Set all the words in between, up to a whole fs block at once. */
266 	wordoff = rtx_to_wordoff(mp, startrtx);
267 	nextwordoff = rtx_to_wordoff(mp, nextrtx);
268 	bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
269 
270 	while (wordoff < nextwordoff) {
271 		xrep_wordoff_t	rem;
272 		xrep_wordcnt_t	wordcnt;
273 
274 		wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
275 				bufwsize);
276 
277 		/*
278 		 * Try to keep us aligned to the rtwords buffer to reduce the
279 		 * number of xfile writes.
280 		 */
281 		rem = wordoff & (bufwsize - 1);
282 		if (rem)
283 			wordcnt = min_t(xrep_wordcnt_t, wordcnt,
284 					bufwsize - rem);
285 
286 		error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
287 		if (error)
288 			return error;
289 
290 		wordoff += wordcnt;
291 	}
292 
293 	return 0;
294 }
295 
296 /* Set free space in the rtbitmap based on rtrmapbt records. */
297 STATIC int
298 xrep_rtbitmap_walk_rtrmap(
299 	struct xfs_btree_cur		*cur,
300 	const struct xfs_rmap_irec	*rec,
301 	void				*priv)
302 {
303 	struct xchk_rtbitmap		*rtb = priv;
304 	int				error = 0;
305 
306 	if (xchk_should_terminate(rtb->sc, &error))
307 		return error;
308 
309 	if (rtb->next_rgbno < rec->rm_startblock) {
310 		error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
311 		if (error)
312 			return error;
313 	}
314 
315 	rtb->next_rgbno = max(rtb->next_rgbno,
316 			      rec->rm_startblock + rec->rm_blockcount);
317 	return 0;
318 }
319 
320 /*
321  * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
322  * in the realtime bitmap that we're computing.
323  */
324 STATIC int
325 xrep_rtbitmap_find_freespace(
326 	struct xchk_rtbitmap	*rtb)
327 {
328 	struct xfs_scrub	*sc = rtb->sc;
329 	struct xfs_mount	*mp = sc->mp;
330 	struct xfs_rtgroup	*rtg = sc->sr.rtg;
331 	uint64_t		blockcount;
332 	int			error;
333 
334 	/* Prepare a buffer of ones so that we can accelerate bulk setting. */
335 	memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
336 
337 	xrep_rtgroup_btcur_init(sc, &sc->sr);
338 	error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
339 			rtb);
340 	if (error)
341 		goto out;
342 
343 	/*
344 	 * Mark as free every possible rt extent from the last one we saw to
345 	 * the end of the rt group.
346 	 */
347 	blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
348 	if (rtb->next_rgbno < blockcount) {
349 		error = xrep_rtbitmap_mark_free(rtb, blockcount);
350 		if (error)
351 			goto out;
352 	}
353 
354 out:
355 	xchk_rtgroup_btcur_free(&sc->sr);
356 	return error;
357 }
358 
359 static int
360 xrep_rtbitmap_prep_buf(
361 	struct xfs_scrub	*sc,
362 	struct xfs_buf		*bp,
363 	void			*data)
364 {
365 	struct xchk_rtbitmap	*rtb = data;
366 	struct xfs_mount	*mp = sc->mp;
367 	union xfs_rtword_raw	*ondisk;
368 	int			error;
369 
370 	rtb->args.mp = sc->mp;
371 	rtb->args.tp = sc->tp;
372 	rtb->args.rbmbp = bp;
373 	ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
374 	rtb->args.rbmbp = NULL;
375 
376 	error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
377 			mp->m_blockwsize);
378 	if (error)
379 		return error;
380 
381 	if (xfs_has_rtgroups(sc->mp)) {
382 		struct xfs_rtbuf_blkinfo	*hdr = bp->b_addr;
383 
384 		hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
385 		hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
386 		hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
387 		hdr->rt_lsn = 0;
388 		uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
389 		bp->b_ops = &xfs_rtbitmap_buf_ops;
390 	} else {
391 		bp->b_ops = &xfs_rtbuf_ops;
392 	}
393 
394 	rtb->prep_wordoff += mp->m_blockwsize;
395 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
396 	return 0;
397 }
398 
399 /*
400  * Make sure that the given range of the data fork of the realtime file is
401  * mapped to written blocks.  The caller must ensure that the inode is joined
402  * to the transaction.
403  */
404 STATIC int
405 xrep_rtbitmap_data_mappings(
406 	struct xfs_scrub	*sc,
407 	xfs_filblks_t		len)
408 {
409 	struct xfs_bmbt_irec	map;
410 	xfs_fileoff_t		off = 0;
411 	int			error;
412 
413 	ASSERT(sc->ip != NULL);
414 
415 	while (off < len) {
416 		int		nmaps = 1;
417 
418 		/*
419 		 * If we have a real extent mapping this block then we're
420 		 * in ok shape.
421 		 */
422 		error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
423 				XFS_DATA_FORK);
424 		if (error)
425 			return error;
426 		if (nmaps == 0) {
427 			ASSERT(nmaps != 0);
428 			return -EFSCORRUPTED;
429 		}
430 
431 		/*
432 		 * Written extents are ok.  Holes are not filled because we
433 		 * do not know the freespace information.
434 		 */
435 		if (xfs_bmap_is_written_extent(&map) ||
436 		    map.br_startblock == HOLESTARTBLOCK) {
437 			off = map.br_startoff + map.br_blockcount;
438 			continue;
439 		}
440 
441 		/*
442 		 * If we find a delalloc reservation then something is very
443 		 * very wrong.  Bail out.
444 		 */
445 		if (map.br_startblock == DELAYSTARTBLOCK)
446 			return -EFSCORRUPTED;
447 
448 		/* Make sure we're really converting an unwritten extent. */
449 		if (map.br_state != XFS_EXT_UNWRITTEN) {
450 			ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
451 			return -EFSCORRUPTED;
452 		}
453 
454 		/* Make sure this block has a real zeroed extent mapped. */
455 		nmaps = 1;
456 		error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
457 				map.br_blockcount,
458 				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
459 				0, &map, &nmaps);
460 		if (error)
461 			return error;
462 
463 		/* Commit new extent and all deferred work. */
464 		error = xrep_defer_finish(sc);
465 		if (error)
466 			return error;
467 
468 		off = map.br_startoff + map.br_blockcount;
469 	}
470 
471 	return 0;
472 }
473 
474 /* Fix broken rt volume geometry. */
475 STATIC int
476 xrep_rtbitmap_geometry(
477 	struct xfs_scrub	*sc,
478 	struct xchk_rtbitmap	*rtb)
479 {
480 	struct xfs_mount	*mp = sc->mp;
481 	struct xfs_trans	*tp = sc->tp;
482 
483 	/* Superblock fields */
484 	if (mp->m_sb.sb_rextents != rtb->rextents)
485 		xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
486 				rtb->rextents - mp->m_sb.sb_rextents);
487 
488 	if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
489 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
490 				rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
491 
492 	if (mp->m_sb.sb_rextslog != rtb->rextslog)
493 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
494 				rtb->rextslog - mp->m_sb.sb_rextslog);
495 
496 	/* Fix broken isize */
497 	sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
498 					 mp->m_sb.sb_blocksize);
499 
500 	if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
501 		sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
502 
503 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
504 	return xrep_roll_trans(sc);
505 }
506 
507 /* Repair the realtime bitmap file metadata. */
508 int
509 xrep_rtbitmap(
510 	struct xfs_scrub	*sc)
511 {
512 	struct xchk_rtbitmap	*rtb = sc->buf;
513 	struct xfs_mount	*mp = sc->mp;
514 	struct xfs_group	*xg = rtg_group(sc->sr.rtg);
515 	unsigned long long	blocks = 0;
516 	unsigned int		busy_gen;
517 	int			error;
518 
519 	/* We require the realtime rmapbt to rebuild anything. */
520 	if (!xfs_has_rtrmapbt(sc->mp))
521 		return -EOPNOTSUPP;
522 	/* We require atomic file exchange range to rebuild anything. */
523 	if (!xfs_has_exchange_range(sc->mp))
524 		return -EOPNOTSUPP;
525 
526 	/* Impossibly large rtbitmap means we can't touch the filesystem. */
527 	if (rtb->rbmblocks > U32_MAX)
528 		return 0;
529 
530 	/*
531 	 * If the size of the rt bitmap file is larger than what we reserved,
532 	 * figure out if we need to adjust the block reservation in the
533 	 * transaction.
534 	 */
535 	blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
536 	if (blocks > UINT_MAX)
537 		return -EOPNOTSUPP;
538 	if (blocks > rtb->resblks) {
539 		error = xfs_trans_reserve_more(sc->tp, blocks, 0);
540 		if (error)
541 			return error;
542 
543 		rtb->resblks += blocks;
544 	}
545 
546 	/* Fix inode core and forks. */
547 	error = xrep_metadata_inode_forks(sc);
548 	if (error)
549 		return error;
550 
551 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
552 
553 	/* Ensure no unwritten extents. */
554 	error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
555 	if (error)
556 		return error;
557 
558 	/*
559 	 * Fix inconsistent bitmap geometry.  This function returns with a
560 	 * clean scrub transaction.
561 	 */
562 	error = xrep_rtbitmap_geometry(sc, rtb);
563 	if (error)
564 		return error;
565 
566 	/*
567 	 * Make sure the busy extent list is clear because we can't put extents
568 	 * on there twice.
569 	 */
570 	if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
571 		error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
572 		if (error)
573 			return error;
574 	}
575 
576 	/*
577 	 * Generate the new rtbitmap data.  We don't need the rtbmp information
578 	 * once this call is finished.
579 	 */
580 	error = xrep_rtbitmap_find_freespace(rtb);
581 	if (error)
582 		return error;
583 
584 	/*
585 	 * Try to take ILOCK_EXCL of the temporary file.  We had better be the
586 	 * only ones holding onto this inode, but we can't block while holding
587 	 * the rtbitmap file's ILOCK_EXCL.
588 	 */
589 	while (!xrep_tempfile_ilock_nowait(sc)) {
590 		if (xchk_should_terminate(sc, &error))
591 			return error;
592 		delay(1);
593 	}
594 
595 	/*
596 	 * Make sure we have space allocated for the part of the bitmap
597 	 * file that corresponds to this group.  We already joined sc->ip.
598 	 */
599 	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
600 	error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
601 	if (error)
602 		return error;
603 
604 	/* Last chance to abort before we start committing fixes. */
605 	if (xchk_should_terminate(sc, &error))
606 		return error;
607 
608 	/* Copy the bitmap file that we generated. */
609 	error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
610 			xrep_rtbitmap_prep_buf, rtb);
611 	if (error)
612 		return error;
613 	error = xrep_tempfile_set_isize(sc,
614 			XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
615 	if (error)
616 		return error;
617 
618 	/*
619 	 * Now exchange the data fork contents.  We're done with the temporary
620 	 * buffer, so we can reuse it for the tempfile exchmaps information.
621 	 */
622 	error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
623 			rtb->rbmblocks, &rtb->tempexch);
624 	if (error)
625 		return error;
626 
627 	error = xrep_tempexch_contents(sc, &rtb->tempexch);
628 	if (error)
629 		return error;
630 
631 	/* Free the old rtbitmap blocks if they're not in use. */
632 	return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
633 }
634