xref: /linux/fs/xfs/xfs_verify_media.c (revision 8457669db968c98edb781892d73fa559e1efcbd4)
1b8accfd6SDarrick J. Wong // SPDX-License-Identifier: GPL-2.0-or-later
2b8accfd6SDarrick J. Wong /*
3b8accfd6SDarrick J. Wong  * Copyright (c) 2026 Oracle.  All Rights Reserved.
4b8accfd6SDarrick J. Wong  * Author: Darrick J. Wong <djwong@kernel.org>
5b8accfd6SDarrick J. Wong  */
604a65666SCarlos Maiolino #include "xfs_platform.h"
7b8accfd6SDarrick J. Wong #include "xfs_shared.h"
8b8accfd6SDarrick J. Wong #include "xfs_format.h"
9b8accfd6SDarrick J. Wong #include "xfs_log_format.h"
10b8accfd6SDarrick J. Wong #include "xfs_trans_resv.h"
11b8accfd6SDarrick J. Wong #include "xfs_mount.h"
12b8accfd6SDarrick J. Wong #include "xfs_bit.h"
13b8accfd6SDarrick J. Wong #include "xfs_btree.h"
14b8accfd6SDarrick J. Wong #include "xfs_inode.h"
15b8accfd6SDarrick J. Wong #include "xfs_icache.h"
16b8accfd6SDarrick J. Wong #include "xfs_trans.h"
17b8accfd6SDarrick J. Wong #include "xfs_alloc.h"
18b8accfd6SDarrick J. Wong #include "xfs_ag.h"
19b8accfd6SDarrick J. Wong #include "xfs_rmap.h"
20b8accfd6SDarrick J. Wong #include "xfs_rmap_btree.h"
21b8accfd6SDarrick J. Wong #include "xfs_rtgroup.h"
22b8accfd6SDarrick J. Wong #include "xfs_rtrmap_btree.h"
23b8accfd6SDarrick J. Wong #include "xfs_health.h"
24b8accfd6SDarrick J. Wong #include "xfs_healthmon.h"
25b8accfd6SDarrick J. Wong #include "xfs_trace.h"
26b8accfd6SDarrick J. Wong #include "xfs_verify_media.h"
27b8accfd6SDarrick J. Wong 
28b8accfd6SDarrick J. Wong #include <linux/fserror.h>
29b8accfd6SDarrick J. Wong 
30b8accfd6SDarrick J. Wong struct xfs_group_data_lost {
31b8accfd6SDarrick J. Wong 	xfs_agblock_t		startblock;
32b8accfd6SDarrick J. Wong 	xfs_extlen_t		blockcount;
33b8accfd6SDarrick J. Wong };
34b8accfd6SDarrick J. Wong 
35b8accfd6SDarrick J. Wong /* Report lost file data from rmap records */
36b8accfd6SDarrick J. Wong static int
xfs_verify_report_data_lost(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * data)37b8accfd6SDarrick J. Wong xfs_verify_report_data_lost(
38b8accfd6SDarrick J. Wong 	struct xfs_btree_cur		*cur,
39b8accfd6SDarrick J. Wong 	const struct xfs_rmap_irec	*rec,
40b8accfd6SDarrick J. Wong 	void				*data)
41b8accfd6SDarrick J. Wong {
42b8accfd6SDarrick J. Wong 	struct xfs_mount		*mp = cur->bc_mp;
43b8accfd6SDarrick J. Wong 	struct xfs_inode		*ip;
44b8accfd6SDarrick J. Wong 	struct xfs_group_data_lost	*lost = data;
45b8accfd6SDarrick J. Wong 	xfs_fileoff_t			fileoff = rec->rm_offset;
46b8accfd6SDarrick J. Wong 	xfs_extlen_t			blocks = rec->rm_blockcount;
47b8accfd6SDarrick J. Wong 	const bool			is_attr =
48b8accfd6SDarrick J. Wong 			(rec->rm_flags & XFS_RMAP_ATTR_FORK);
49b8accfd6SDarrick J. Wong 	const xfs_agblock_t		lost_end =
50b8accfd6SDarrick J. Wong 			lost->startblock + lost->blockcount;
51b8accfd6SDarrick J. Wong 	const xfs_agblock_t		rmap_end =
52b8accfd6SDarrick J. Wong 			rec->rm_startblock + rec->rm_blockcount;
53b8accfd6SDarrick J. Wong 	int				error = 0;
54b8accfd6SDarrick J. Wong 
55b8accfd6SDarrick J. Wong 	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
56b8accfd6SDarrick J. Wong 	       return 0;
57b8accfd6SDarrick J. Wong 
58b8accfd6SDarrick J. Wong 	error = xfs_iget(mp, cur->bc_tp, rec->rm_owner, 0, 0, &ip);
59b8accfd6SDarrick J. Wong 	if (error)
60b8accfd6SDarrick J. Wong 		return 0;
61b8accfd6SDarrick J. Wong 
62b8accfd6SDarrick J. Wong 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
63b8accfd6SDarrick J. Wong 		xfs_bmap_mark_sick(ip, is_attr ? XFS_ATTR_FORK : XFS_DATA_FORK);
64b8accfd6SDarrick J. Wong 		goto out_rele;
65b8accfd6SDarrick J. Wong 	}
66b8accfd6SDarrick J. Wong 
67b8accfd6SDarrick J. Wong 	if (is_attr) {
68b8accfd6SDarrick J. Wong 		xfs_inode_mark_sick(ip, XFS_SICK_INO_XATTR);
69b8accfd6SDarrick J. Wong 		goto out_rele;
70b8accfd6SDarrick J. Wong 	}
71b8accfd6SDarrick J. Wong 
72b8accfd6SDarrick J. Wong 	if (lost->startblock > rec->rm_startblock) {
73b8accfd6SDarrick J. Wong 		fileoff += lost->startblock - rec->rm_startblock;
74b8accfd6SDarrick J. Wong 		blocks -= lost->startblock - rec->rm_startblock;
75b8accfd6SDarrick J. Wong 	}
76b8accfd6SDarrick J. Wong 	if (rmap_end > lost_end)
77b8accfd6SDarrick J. Wong 		blocks -= rmap_end - lost_end;
78b8accfd6SDarrick J. Wong 
79b8accfd6SDarrick J. Wong 	fserror_report_data_lost(VFS_I(ip), XFS_FSB_TO_B(mp, fileoff),
80b8accfd6SDarrick J. Wong 			XFS_FSB_TO_B(mp, blocks), GFP_NOFS);
81b8accfd6SDarrick J. Wong 
82b8accfd6SDarrick J. Wong out_rele:
83b8accfd6SDarrick J. Wong 	xfs_irele(ip);
84b8accfd6SDarrick J. Wong 	return 0;
85b8accfd6SDarrick J. Wong }
86b8accfd6SDarrick J. Wong 
87b8accfd6SDarrick J. Wong /* Walk reverse mappings to look for all file data loss */
88b8accfd6SDarrick J. Wong static int
xfs_verify_report_losses(struct xfs_mount * mp,enum xfs_group_type type,xfs_daddr_t daddr,u64 bblen)89b8accfd6SDarrick J. Wong xfs_verify_report_losses(
90b8accfd6SDarrick J. Wong 	struct xfs_mount	*mp,
91b8accfd6SDarrick J. Wong 	enum xfs_group_type	type,
92b8accfd6SDarrick J. Wong 	xfs_daddr_t		daddr,
93b8accfd6SDarrick J. Wong 	u64			bblen)
94b8accfd6SDarrick J. Wong {
95b8accfd6SDarrick J. Wong 	struct xfs_group	*xg = NULL;
96b8accfd6SDarrick J. Wong 	struct xfs_trans	*tp;
97b8accfd6SDarrick J. Wong 	xfs_fsblock_t		start_bno, end_bno;
98b8accfd6SDarrick J. Wong 	uint32_t		start_gno, end_gno;
99b8accfd6SDarrick J. Wong 	int			error;
100b8accfd6SDarrick J. Wong 
101b8accfd6SDarrick J. Wong 	if (type == XG_TYPE_RTG) {
102b8accfd6SDarrick J. Wong 		start_bno = xfs_daddr_to_rtb(mp, daddr);
103b8accfd6SDarrick J. Wong 		end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
104b8accfd6SDarrick J. Wong 	} else {
105b8accfd6SDarrick J. Wong 		start_bno = XFS_DADDR_TO_FSB(mp, daddr);
106b8accfd6SDarrick J. Wong 		end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
107b8accfd6SDarrick J. Wong 	}
108b8accfd6SDarrick J. Wong 
109b8accfd6SDarrick J. Wong 	tp = xfs_trans_alloc_empty(mp);
110b8accfd6SDarrick J. Wong 	start_gno = xfs_fsb_to_gno(mp, start_bno, type);
111b8accfd6SDarrick J. Wong 	end_gno = xfs_fsb_to_gno(mp, end_bno, type);
112b8accfd6SDarrick J. Wong 	while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
113b8accfd6SDarrick J. Wong 		struct xfs_buf		*agf_bp = NULL;
114b8accfd6SDarrick J. Wong 		struct xfs_rtgroup	*rtg = NULL;
115b8accfd6SDarrick J. Wong 		struct xfs_btree_cur	*cur;
116b8accfd6SDarrick J. Wong 		struct xfs_rmap_irec	ri_low = { };
117b8accfd6SDarrick J. Wong 		struct xfs_rmap_irec	ri_high;
118b8accfd6SDarrick J. Wong 		struct xfs_group_data_lost lost;
119b8accfd6SDarrick J. Wong 
120b8accfd6SDarrick J. Wong 		if (type == XG_TYPE_AG) {
121b8accfd6SDarrick J. Wong 			struct xfs_perag	*pag = to_perag(xg);
122b8accfd6SDarrick J. Wong 
123b8accfd6SDarrick J. Wong 			error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
124b8accfd6SDarrick J. Wong 			if (error) {
125*16145698SDarrick J. Wong 				xfs_perag_rele(pag);
126b8accfd6SDarrick J. Wong 				break;
127b8accfd6SDarrick J. Wong 			}
128b8accfd6SDarrick J. Wong 
129b8accfd6SDarrick J. Wong 			cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
130b8accfd6SDarrick J. Wong 		} else {
131b8accfd6SDarrick J. Wong 			rtg = to_rtg(xg);
132b8accfd6SDarrick J. Wong 			xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
133b8accfd6SDarrick J. Wong 			cur = xfs_rtrmapbt_init_cursor(tp, rtg);
134b8accfd6SDarrick J. Wong 		}
135b8accfd6SDarrick J. Wong 
136b8accfd6SDarrick J. Wong 		/*
137b8accfd6SDarrick J. Wong 		 * Set the rmap range from ri_low to ri_high, which represents
138b8accfd6SDarrick J. Wong 		 * a [start, end] where we looking for the files or metadata.
139b8accfd6SDarrick J. Wong 		 */
140b8accfd6SDarrick J. Wong 		memset(&ri_high, 0xFF, sizeof(ri_high));
141b8accfd6SDarrick J. Wong 		if (xg->xg_gno == start_gno)
142b8accfd6SDarrick J. Wong 			ri_low.rm_startblock =
143b8accfd6SDarrick J. Wong 				xfs_fsb_to_gbno(mp, start_bno, type);
144b8accfd6SDarrick J. Wong 		if (xg->xg_gno == end_gno)
145b8accfd6SDarrick J. Wong 			ri_high.rm_startblock =
146b8accfd6SDarrick J. Wong 				xfs_fsb_to_gbno(mp, end_bno, type);
147b8accfd6SDarrick J. Wong 
148b8accfd6SDarrick J. Wong 		lost.startblock = ri_low.rm_startblock;
149b8accfd6SDarrick J. Wong 		lost.blockcount = min(xg->xg_block_count,
150b8accfd6SDarrick J. Wong 				      ri_high.rm_startblock + 1) -
151b8accfd6SDarrick J. Wong 							ri_low.rm_startblock;
152b8accfd6SDarrick J. Wong 
153b8accfd6SDarrick J. Wong 		error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
154b8accfd6SDarrick J. Wong 				xfs_verify_report_data_lost, &lost);
155b8accfd6SDarrick J. Wong 		xfs_btree_del_cursor(cur, error);
156b8accfd6SDarrick J. Wong 		if (agf_bp)
157b8accfd6SDarrick J. Wong 			xfs_trans_brelse(tp, agf_bp);
158b8accfd6SDarrick J. Wong 		if (rtg)
159b8accfd6SDarrick J. Wong 			xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
160b8accfd6SDarrick J. Wong 		if (error) {
161*16145698SDarrick J. Wong 			xfs_group_rele(xg);
162b8accfd6SDarrick J. Wong 			break;
163b8accfd6SDarrick J. Wong 		}
164b8accfd6SDarrick J. Wong 	}
165b8accfd6SDarrick J. Wong 
166b8accfd6SDarrick J. Wong 	xfs_trans_cancel(tp);
167b8accfd6SDarrick J. Wong 	return 0;
168b8accfd6SDarrick J. Wong }
169b8accfd6SDarrick J. Wong 
170b8accfd6SDarrick J. Wong /*
171b8accfd6SDarrick J. Wong  * Compute the desired verify IO size.
172b8accfd6SDarrick J. Wong  *
173b8accfd6SDarrick J. Wong  * To minimize command overhead, we'd like to create bios that are 1MB, though
174b8accfd6SDarrick J. Wong  * we allow the user to ask for a smaller size.
175b8accfd6SDarrick J. Wong  */
176b8accfd6SDarrick J. Wong static unsigned int
xfs_verify_iosize(const struct xfs_verify_media * me,struct xfs_buftarg * btp,uint64_t bbcount)177b8accfd6SDarrick J. Wong xfs_verify_iosize(
178b8accfd6SDarrick J. Wong 	const struct xfs_verify_media	*me,
179b8accfd6SDarrick J. Wong 	struct xfs_buftarg		*btp,
180b8accfd6SDarrick J. Wong 	uint64_t			bbcount)
181b8accfd6SDarrick J. Wong {
182b8accfd6SDarrick J. Wong 	unsigned int			iosize =
183b8accfd6SDarrick J. Wong 			min_not_zero(SZ_1M, me->me_max_io_size);
184b8accfd6SDarrick J. Wong 
185b8accfd6SDarrick J. Wong 	BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
186b8accfd6SDarrick J. Wong 	ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
187b8accfd6SDarrick J. Wong 
188b8accfd6SDarrick J. Wong 	return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
189b8accfd6SDarrick J. Wong 			BBTOB(bbcount));
190b8accfd6SDarrick J. Wong }
191b8accfd6SDarrick J. Wong 
192b8accfd6SDarrick J. Wong /* Allocate as much memory as we can get for verification buffer. */
193b8accfd6SDarrick J. Wong static struct folio *
xfs_verify_alloc_folio(const unsigned int iosize)194b8accfd6SDarrick J. Wong xfs_verify_alloc_folio(
195b8accfd6SDarrick J. Wong 	const unsigned int	iosize)
196b8accfd6SDarrick J. Wong {
197b8accfd6SDarrick J. Wong 	unsigned int		order = get_order(iosize);
198b8accfd6SDarrick J. Wong 
199b8accfd6SDarrick J. Wong 	while (order > 0) {
200b8accfd6SDarrick J. Wong 		struct folio	*folio =
201b8accfd6SDarrick J. Wong 			folio_alloc(GFP_KERNEL | __GFP_NORETRY, order);
202b8accfd6SDarrick J. Wong 
203b8accfd6SDarrick J. Wong 		if (folio)
204b8accfd6SDarrick J. Wong 			return folio;
205b8accfd6SDarrick J. Wong 		order--;
206b8accfd6SDarrick J. Wong 	}
207b8accfd6SDarrick J. Wong 
208b8accfd6SDarrick J. Wong 	return folio_alloc(GFP_KERNEL, 0);
209b8accfd6SDarrick J. Wong }
210b8accfd6SDarrick J. Wong 
211b8accfd6SDarrick J. Wong /* Report any kind of problem verifying media */
212b8accfd6SDarrick J. Wong static void
xfs_verify_media_error(struct xfs_mount * mp,struct xfs_verify_media * me,struct xfs_buftarg * btp,xfs_daddr_t daddr,unsigned int bio_bbcount,blk_status_t bio_status)213b8accfd6SDarrick J. Wong xfs_verify_media_error(
214b8accfd6SDarrick J. Wong 	struct xfs_mount	*mp,
215b8accfd6SDarrick J. Wong 	struct xfs_verify_media	*me,
216b8accfd6SDarrick J. Wong 	struct xfs_buftarg	*btp,
217b8accfd6SDarrick J. Wong 	xfs_daddr_t		daddr,
218b8accfd6SDarrick J. Wong 	unsigned int		bio_bbcount,
219b8accfd6SDarrick J. Wong 	blk_status_t		bio_status)
220b8accfd6SDarrick J. Wong {
221b8accfd6SDarrick J. Wong 	trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
222b8accfd6SDarrick J. Wong 			bio_bbcount, bio_status);
223b8accfd6SDarrick J. Wong 
224b8accfd6SDarrick J. Wong 	/*
225b8accfd6SDarrick J. Wong 	 * Pass any error, I/O or otherwise, up to the caller if we didn't
226b8accfd6SDarrick J. Wong 	 * successfully verify any bytes at all.
227b8accfd6SDarrick J. Wong 	 */
228b8accfd6SDarrick J. Wong 	if (me->me_start_daddr == daddr)
229b8accfd6SDarrick J. Wong 		me->me_ioerror = -blk_status_to_errno(bio_status);
230b8accfd6SDarrick J. Wong 
231b8accfd6SDarrick J. Wong 	/*
232b8accfd6SDarrick J. Wong 	 * PI validation failures, medium errors, or general IO errors are
233b8accfd6SDarrick J. Wong 	 * treated as indicators of data loss.  Everything else are (hopefully)
234b8accfd6SDarrick J. Wong 	 * transient errors and are not reported to healthmon or fsnotify.
235b8accfd6SDarrick J. Wong 	 */
236b8accfd6SDarrick J. Wong 	switch (bio_status) {
237b8accfd6SDarrick J. Wong 	case BLK_STS_PROTECTION:
238b8accfd6SDarrick J. Wong 	case BLK_STS_IOERR:
239b8accfd6SDarrick J. Wong 	case BLK_STS_MEDIUM:
240b8accfd6SDarrick J. Wong 		break;
241b8accfd6SDarrick J. Wong 	default:
242b8accfd6SDarrick J. Wong 		return;
243b8accfd6SDarrick J. Wong 	}
244b8accfd6SDarrick J. Wong 
245b8accfd6SDarrick J. Wong 	if (!(me->me_flags & XFS_VERIFY_MEDIA_REPORT))
246b8accfd6SDarrick J. Wong 		return;
247b8accfd6SDarrick J. Wong 
248b8accfd6SDarrick J. Wong 	xfs_healthmon_report_media(mp, me->me_dev, daddr, bio_bbcount);
249b8accfd6SDarrick J. Wong 
250b8accfd6SDarrick J. Wong 	if (!xfs_has_rmapbt(mp))
251b8accfd6SDarrick J. Wong 		return;
252b8accfd6SDarrick J. Wong 
253b8accfd6SDarrick J. Wong 	switch (me->me_dev) {
254b8accfd6SDarrick J. Wong 	case XFS_DEV_DATA:
255b8accfd6SDarrick J. Wong 		xfs_verify_report_losses(mp, XG_TYPE_AG, daddr, bio_bbcount);
256b8accfd6SDarrick J. Wong 		break;
257b8accfd6SDarrick J. Wong 	case XFS_DEV_RT:
258b8accfd6SDarrick J. Wong 		xfs_verify_report_losses(mp, XG_TYPE_RTG, daddr, bio_bbcount);
259b8accfd6SDarrick J. Wong 		break;
260b8accfd6SDarrick J. Wong 	}
261b8accfd6SDarrick J. Wong }
262b8accfd6SDarrick J. Wong 
263b8accfd6SDarrick J. Wong /* Verify the media of an xfs device by submitting read requests to the disk. */
264b8accfd6SDarrick J. Wong static int
xfs_verify_media(struct xfs_mount * mp,struct xfs_verify_media * me)265b8accfd6SDarrick J. Wong xfs_verify_media(
266b8accfd6SDarrick J. Wong 	struct xfs_mount	*mp,
267b8accfd6SDarrick J. Wong 	struct xfs_verify_media	*me)
268b8accfd6SDarrick J. Wong {
269b8accfd6SDarrick J. Wong 	struct xfs_buftarg	*btp = NULL;
270b8accfd6SDarrick J. Wong 	struct bio		*bio;
271b8accfd6SDarrick J. Wong 	struct folio		*folio;
272b8accfd6SDarrick J. Wong 	xfs_daddr_t		daddr;
273b8accfd6SDarrick J. Wong 	uint64_t		bbcount;
274b8accfd6SDarrick J. Wong 	int			error = 0;
275b8accfd6SDarrick J. Wong 
276b8accfd6SDarrick J. Wong 	me->me_ioerror = 0;
277b8accfd6SDarrick J. Wong 
278b8accfd6SDarrick J. Wong 	switch (me->me_dev) {
279b8accfd6SDarrick J. Wong 	case XFS_DEV_DATA:
280b8accfd6SDarrick J. Wong 		btp = mp->m_ddev_targp;
281b8accfd6SDarrick J. Wong 		break;
282b8accfd6SDarrick J. Wong 	case XFS_DEV_LOG:
283b8accfd6SDarrick J. Wong 		if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev)
284b8accfd6SDarrick J. Wong 			btp = mp->m_logdev_targp;
285b8accfd6SDarrick J. Wong 		break;
286b8accfd6SDarrick J. Wong 	case XFS_DEV_RT:
287b8accfd6SDarrick J. Wong 		btp = mp->m_rtdev_targp;
288b8accfd6SDarrick J. Wong 		break;
289b8accfd6SDarrick J. Wong 	}
290b8accfd6SDarrick J. Wong 	if (!btp)
291b8accfd6SDarrick J. Wong 		return -ENODEV;
292b8accfd6SDarrick J. Wong 
293b8accfd6SDarrick J. Wong 	/*
294b8accfd6SDarrick J. Wong 	 * If the caller told us to verify beyond the end of the disk, tell the
295b8accfd6SDarrick J. Wong 	 * user exactly where that was.
296b8accfd6SDarrick J. Wong 	 */
297b8accfd6SDarrick J. Wong 	if (me->me_end_daddr > btp->bt_nr_sectors)
298b8accfd6SDarrick J. Wong 		me->me_end_daddr = btp->bt_nr_sectors;
299b8accfd6SDarrick J. Wong 
300b8accfd6SDarrick J. Wong 	/* start and end have to be aligned to the lba size */
301b8accfd6SDarrick J. Wong 	if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr),
302b8accfd6SDarrick J. Wong 			bdev_logical_block_size(btp->bt_bdev)))
303b8accfd6SDarrick J. Wong 		return -EINVAL;
304b8accfd6SDarrick J. Wong 
305b8accfd6SDarrick J. Wong 	/*
306b8accfd6SDarrick J. Wong 	 * end_daddr is the exclusive end of the range, so if start_daddr
307b8accfd6SDarrick J. Wong 	 * reaches there (or beyond), there's no work to be done.
308b8accfd6SDarrick J. Wong 	 */
309b8accfd6SDarrick J. Wong 	if (me->me_start_daddr >= me->me_end_daddr)
310b8accfd6SDarrick J. Wong 		return 0;
311b8accfd6SDarrick J. Wong 
312b8accfd6SDarrick J. Wong 	/*
313b8accfd6SDarrick J. Wong 	 * There are three ranges involved here:
314b8accfd6SDarrick J. Wong 	 *
315b8accfd6SDarrick J. Wong 	 *  - [me->me_start_daddr, me->me_end_daddr) is the range that the
316b8accfd6SDarrick J. Wong 	 *    user wants to verify.  end_daddr can be beyond the end of the
317b8accfd6SDarrick J. Wong 	 *    disk; we'll constrain it to the end if necessary.
318b8accfd6SDarrick J. Wong 	 *
319b8accfd6SDarrick J. Wong 	 *  - [daddr, me->me_end_daddr) is the range that we have not yet
320b8accfd6SDarrick J. Wong 	 *    verified.  We update daddr after each successful read.
321b8accfd6SDarrick J. Wong 	 *    me->me_start_daddr is set to daddr before returning.
322b8accfd6SDarrick J. Wong 	 *
323b8accfd6SDarrick J. Wong 	 *  - [daddr, daddr + bio_bbcount) is the range that we're currently
324b8accfd6SDarrick J. Wong 	 *    verifying.
325b8accfd6SDarrick J. Wong 	 */
326b8accfd6SDarrick J. Wong 	daddr = me->me_start_daddr;
327b8accfd6SDarrick J. Wong 	bbcount = min_t(sector_t, me->me_end_daddr, btp->bt_nr_sectors) -
328b8accfd6SDarrick J. Wong 			  me->me_start_daddr;
329b8accfd6SDarrick J. Wong 
330b8accfd6SDarrick J. Wong 	folio = xfs_verify_alloc_folio(xfs_verify_iosize(me, btp, bbcount));
331b8accfd6SDarrick J. Wong 	if (!folio)
332b8accfd6SDarrick J. Wong 		return -ENOMEM;
333b8accfd6SDarrick J. Wong 
334b8accfd6SDarrick J. Wong 	trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount,
335b8accfd6SDarrick J. Wong 			folio);
336b8accfd6SDarrick J. Wong 
337b8accfd6SDarrick J. Wong 	bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL);
338b8accfd6SDarrick J. Wong 	if (!bio) {
339b8accfd6SDarrick J. Wong 		error = -ENOMEM;
340b8accfd6SDarrick J. Wong 		goto out_folio;
341b8accfd6SDarrick J. Wong 	}
342b8accfd6SDarrick J. Wong 
343b8accfd6SDarrick J. Wong 	while (bbcount > 0) {
344b8accfd6SDarrick J. Wong 		unsigned int	bio_bbcount;
345b8accfd6SDarrick J. Wong 		blk_status_t	bio_status;
346b8accfd6SDarrick J. Wong 
347b8accfd6SDarrick J. Wong 		bio_reset(bio, btp->bt_bdev, REQ_OP_READ);
348b8accfd6SDarrick J. Wong 		bio->bi_iter.bi_sector = daddr;
349b8accfd6SDarrick J. Wong 		bio_add_folio_nofail(bio, folio,
350b8accfd6SDarrick J. Wong 				min(bbcount << SECTOR_SHIFT, folio_size(folio)),
351b8accfd6SDarrick J. Wong 				0);
352b8accfd6SDarrick J. Wong 
353b8accfd6SDarrick J. Wong 		/*
354b8accfd6SDarrick J. Wong 		 * Save the length of the bio before we submit it, because we
355b8accfd6SDarrick J. Wong 		 * need the original daddr and length for reporting IO errors
356b8accfd6SDarrick J. Wong 		 * if the bio fails.
357b8accfd6SDarrick J. Wong 		 */
358b8accfd6SDarrick J. Wong 		bio_bbcount = bio->bi_iter.bi_size >> SECTOR_SHIFT;
359b8accfd6SDarrick J. Wong 		submit_bio_wait(bio);
360b8accfd6SDarrick J. Wong 		bio_status = bio->bi_status;
361b8accfd6SDarrick J. Wong 		if (bio_status != BLK_STS_OK) {
362b8accfd6SDarrick J. Wong 			xfs_verify_media_error(mp, me, btp, daddr, bio_bbcount,
363b8accfd6SDarrick J. Wong 					bio_status);
364b8accfd6SDarrick J. Wong 			error = 0;
365b8accfd6SDarrick J. Wong 			break;
366b8accfd6SDarrick J. Wong 		}
367b8accfd6SDarrick J. Wong 
368b8accfd6SDarrick J. Wong 		daddr += bio_bbcount;
369b8accfd6SDarrick J. Wong 		bbcount -= bio_bbcount;
370b8accfd6SDarrick J. Wong 
371b8accfd6SDarrick J. Wong 		if (bbcount == 0)
372b8accfd6SDarrick J. Wong 			break;
373b8accfd6SDarrick J. Wong 
374b8accfd6SDarrick J. Wong 		if (me->me_rest_us) {
375b8accfd6SDarrick J. Wong 			ktime_t	expires;
376b8accfd6SDarrick J. Wong 
377b8accfd6SDarrick J. Wong 			expires = ktime_add_ns(ktime_get(),
378b8accfd6SDarrick J. Wong 					me->me_rest_us * 1000);
379b8accfd6SDarrick J. Wong 			set_current_state(TASK_KILLABLE);
380b8accfd6SDarrick J. Wong 			schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
381b8accfd6SDarrick J. Wong 		}
382b8accfd6SDarrick J. Wong 
383b8accfd6SDarrick J. Wong 		if (fatal_signal_pending(current)) {
384b8accfd6SDarrick J. Wong 			error = -EINTR;
385b8accfd6SDarrick J. Wong 			break;
386b8accfd6SDarrick J. Wong 		}
387b8accfd6SDarrick J. Wong 
388b8accfd6SDarrick J. Wong 		cond_resched();
389b8accfd6SDarrick J. Wong 	}
390b8accfd6SDarrick J. Wong 
391b8accfd6SDarrick J. Wong 	bio_put(bio);
392b8accfd6SDarrick J. Wong out_folio:
393b8accfd6SDarrick J. Wong 	folio_put(folio);
394b8accfd6SDarrick J. Wong 
395b8accfd6SDarrick J. Wong 	if (error)
396b8accfd6SDarrick J. Wong 		return error;
397b8accfd6SDarrick J. Wong 
398b8accfd6SDarrick J. Wong 	/*
399b8accfd6SDarrick J. Wong 	 * Advance start_daddr to the end of what we verified if there wasn't
400b8accfd6SDarrick J. Wong 	 * an operational error.
401b8accfd6SDarrick J. Wong 	 */
402b8accfd6SDarrick J. Wong 	me->me_start_daddr = daddr;
403b8accfd6SDarrick J. Wong 	trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev);
404b8accfd6SDarrick J. Wong 	return 0;
405b8accfd6SDarrick J. Wong }
406b8accfd6SDarrick J. Wong 
407b8accfd6SDarrick J. Wong int
xfs_ioc_verify_media(struct file * file,struct xfs_verify_media __user * arg)408b8accfd6SDarrick J. Wong xfs_ioc_verify_media(
409b8accfd6SDarrick J. Wong 	struct file			*file,
410b8accfd6SDarrick J. Wong 	struct xfs_verify_media __user	*arg)
411b8accfd6SDarrick J. Wong {
412b8accfd6SDarrick J. Wong 	struct xfs_verify_media		me;
413b8accfd6SDarrick J. Wong 	struct xfs_inode		*ip = XFS_I(file_inode(file));
414b8accfd6SDarrick J. Wong 	struct xfs_mount		*mp = ip->i_mount;
415b8accfd6SDarrick J. Wong 	int				error;
416b8accfd6SDarrick J. Wong 
417b8accfd6SDarrick J. Wong 	if (!capable(CAP_SYS_ADMIN))
418b8accfd6SDarrick J. Wong 		return -EPERM;
419b8accfd6SDarrick J. Wong 
420b8accfd6SDarrick J. Wong 	if (copy_from_user(&me, arg, sizeof(me)))
421b8accfd6SDarrick J. Wong 		return -EFAULT;
422b8accfd6SDarrick J. Wong 
423b8accfd6SDarrick J. Wong 	if (me.me_pad)
424b8accfd6SDarrick J. Wong 		return -EINVAL;
425b8accfd6SDarrick J. Wong 	if (me.me_flags & ~XFS_VERIFY_MEDIA_FLAGS)
426b8accfd6SDarrick J. Wong 		return -EINVAL;
427b8accfd6SDarrick J. Wong 
428b8accfd6SDarrick J. Wong 	switch (me.me_dev) {
429b8accfd6SDarrick J. Wong 	case XFS_DEV_DATA:
430b8accfd6SDarrick J. Wong 	case XFS_DEV_LOG:
431b8accfd6SDarrick J. Wong 	case XFS_DEV_RT:
432b8accfd6SDarrick J. Wong 		break;
433b8accfd6SDarrick J. Wong 	default:
434b8accfd6SDarrick J. Wong 		return -EINVAL;
435b8accfd6SDarrick J. Wong 	}
436b8accfd6SDarrick J. Wong 
437b8accfd6SDarrick J. Wong 	error = xfs_verify_media(mp, &me);
438b8accfd6SDarrick J. Wong 	if (error)
439b8accfd6SDarrick J. Wong 		return error;
440b8accfd6SDarrick J. Wong 
441b8accfd6SDarrick J. Wong 	if (copy_to_user(arg, &me, sizeof(me)))
442b8accfd6SDarrick J. Wong 		return -EFAULT;
443b8accfd6SDarrick J. Wong 
444b8accfd6SDarrick J. Wong 	return 0;
445b8accfd6SDarrick J. Wong }
446