xref: /linux/fs/xfs/scrub/readdir.c (revision 4f05e82003d1c20da29fa593420b8d92e2c8d4e6)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_dir2.h"
15 #include "xfs_dir2_priv.h"
16 #include "xfs_trace.h"
17 #include "xfs_bmap.h"
18 #include "xfs_trans.h"
19 #include "xfs_error.h"
20 #include "scrub/scrub.h"
21 #include "scrub/common.h"
22 #include "scrub/readdir.h"
23 
24 /* Call a function for every entry in a shortform directory. */
25 STATIC int
26 xchk_dir_walk_sf(
27 	struct xfs_scrub	*sc,
28 	struct xfs_inode	*dp,
29 	xchk_dirent_fn		dirent_fn,
30 	void			*priv)
31 {
32 	struct xfs_name		name = {
33 		.name		= ".",
34 		.len		= 1,
35 		.type		= XFS_DIR3_FT_DIR,
36 	};
37 	struct xfs_mount	*mp = dp->i_mount;
38 	struct xfs_da_geometry	*geo = mp->m_dir_geo;
39 	struct xfs_dir2_sf_entry *sfep;
40 	struct xfs_dir2_sf_hdr	*sfp = dp->i_df.if_data;
41 	xfs_ino_t		ino;
42 	xfs_dir2_dataptr_t	dapos;
43 	unsigned int		i;
44 	int			error;
45 
46 	ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
47 	ASSERT(sfp != NULL);
48 
49 	/* dot entry */
50 	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
51 			geo->data_entry_offset);
52 
53 	error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
54 	if (error)
55 		return error;
56 
57 	/* dotdot entry */
58 	dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
59 			geo->data_entry_offset +
60 			xfs_dir2_data_entsize(mp, sizeof(".") - 1));
61 	ino = xfs_dir2_sf_get_parent_ino(sfp);
62 	name.name = "..";
63 	name.len = 2;
64 
65 	error = dirent_fn(sc, dp, dapos, &name, ino, priv);
66 	if (error)
67 		return error;
68 
69 	/* iterate everything else */
70 	sfep = xfs_dir2_sf_firstentry(sfp);
71 	for (i = 0; i < sfp->count; i++) {
72 		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
73 				xfs_dir2_sf_get_offset(sfep));
74 		ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
75 		name.name = sfep->name;
76 		name.len = sfep->namelen;
77 		name.type = xfs_dir2_sf_get_ftype(mp, sfep);
78 
79 		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
80 		if (error)
81 			return error;
82 
83 		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
84 	}
85 
86 	return 0;
87 }
88 
89 /* Call a function for every entry in a block directory. */
90 STATIC int
91 xchk_dir_walk_block(
92 	struct xfs_scrub	*sc,
93 	struct xfs_inode	*dp,
94 	xchk_dirent_fn		dirent_fn,
95 	void			*priv)
96 {
97 	struct xfs_mount	*mp = dp->i_mount;
98 	struct xfs_da_geometry	*geo = mp->m_dir_geo;
99 	struct xfs_buf		*bp;
100 	unsigned int		off, next_off, end;
101 	int			error;
102 
103 	error = xfs_dir3_block_read(sc->tp, dp, dp->i_ino, &bp);
104 	if (error)
105 		return error;
106 
107 	/* Walk each directory entry. */
108 	end = xfs_dir3_data_end_offset(geo, bp->b_addr);
109 	for (off = geo->data_entry_offset; off < end; off = next_off) {
110 		struct xfs_name			name = { };
111 		struct xfs_dir2_data_unused	*dup = bp->b_addr + off;
112 		struct xfs_dir2_data_entry	*dep = bp->b_addr + off;
113 		xfs_ino_t			ino;
114 		xfs_dir2_dataptr_t		dapos;
115 
116 		/* Skip an empty entry. */
117 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
118 			next_off = off + be16_to_cpu(dup->length);
119 			continue;
120 		}
121 
122 		/* Otherwise, find the next entry and report it. */
123 		next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
124 		if (next_off > end)
125 			break;
126 
127 		dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
128 		ino = be64_to_cpu(dep->inumber);
129 		name.name = dep->name;
130 		name.len = dep->namelen;
131 		name.type = xfs_dir2_data_get_ftype(mp, dep);
132 
133 		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
134 		if (error)
135 			break;
136 	}
137 
138 	xfs_trans_brelse(sc->tp, bp);
139 	return error;
140 }
141 
142 /* Read a leaf-format directory buffer. */
143 STATIC int
144 xchk_read_leaf_dir_buf(
145 	struct xfs_trans	*tp,
146 	struct xfs_inode	*dp,
147 	struct xfs_da_geometry	*geo,
148 	xfs_dir2_off_t		*curoff,
149 	struct xfs_buf		**bpp)
150 {
151 	struct xfs_iext_cursor	icur;
152 	struct xfs_bmbt_irec	map;
153 	struct xfs_ifork	*ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
154 	xfs_dablk_t		last_da;
155 	xfs_dablk_t		map_off;
156 	xfs_dir2_off_t		new_off;
157 
158 	*bpp = NULL;
159 
160 	/*
161 	 * Look for mapped directory blocks at or above the current offset.
162 	 * Truncate down to the nearest directory block to start the scanning
163 	 * operation.
164 	 */
165 	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
166 	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
167 
168 	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
169 		return 0;
170 	if (map.br_startoff >= last_da)
171 		return 0;
172 	xfs_trim_extent(&map, map_off, last_da - map_off);
173 
174 	/* Read the directory block of that first mapping. */
175 	new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
176 	if (new_off > *curoff)
177 		*curoff = new_off;
178 
179 	return xfs_dir3_data_read(tp, dp, dp->i_ino, map.br_startoff, 0, bpp);
180 }
181 
182 /* Call a function for every entry in a leaf directory. */
183 STATIC int
184 xchk_dir_walk_leaf(
185 	struct xfs_scrub	*sc,
186 	struct xfs_inode	*dp,
187 	xchk_dirent_fn		dirent_fn,
188 	void			*priv)
189 {
190 	struct xfs_mount	*mp = dp->i_mount;
191 	struct xfs_da_geometry	*geo = mp->m_dir_geo;
192 	struct xfs_buf		*bp = NULL;
193 	xfs_dir2_off_t		curoff = 0;
194 	unsigned int		offset = 0;
195 	int			error;
196 
197 	/* Iterate every directory offset in this directory. */
198 	while (curoff < XFS_DIR2_LEAF_OFFSET) {
199 		struct xfs_name			name = { };
200 		struct xfs_dir2_data_unused	*dup;
201 		struct xfs_dir2_data_entry	*dep;
202 		xfs_ino_t			ino;
203 		unsigned int			length;
204 		xfs_dir2_dataptr_t		dapos;
205 
206 		/*
207 		 * If we have no buffer, or we're off the end of the
208 		 * current buffer, need to get another one.
209 		 */
210 		if (!bp || offset >= geo->blksize) {
211 			if (bp) {
212 				xfs_trans_brelse(sc->tp, bp);
213 				bp = NULL;
214 			}
215 
216 			error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
217 					&bp);
218 			if (error || !bp)
219 				break;
220 
221 			/*
222 			 * Find our position in the block.
223 			 */
224 			offset = geo->data_entry_offset;
225 			curoff += geo->data_entry_offset;
226 		}
227 
228 		/* Skip an empty entry. */
229 		dup = bp->b_addr + offset;
230 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
231 			length = be16_to_cpu(dup->length);
232 			offset += length;
233 			curoff += length;
234 			continue;
235 		}
236 
237 		/* Otherwise, find the next entry and report it. */
238 		dep = bp->b_addr + offset;
239 		length = xfs_dir2_data_entsize(mp, dep->namelen);
240 
241 		dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
242 		ino = be64_to_cpu(dep->inumber);
243 		name.name = dep->name;
244 		name.len = dep->namelen;
245 		name.type = xfs_dir2_data_get_ftype(mp, dep);
246 
247 		error = dirent_fn(sc, dp, dapos, &name, ino, priv);
248 		if (error)
249 			break;
250 
251 		/* Advance to the next entry. */
252 		offset += length;
253 		curoff += length;
254 	}
255 
256 	if (bp)
257 		xfs_trans_brelse(sc->tp, bp);
258 	return error;
259 }
260 
261 /*
262  * Call a function for every entry in a directory.
263  *
264  * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.
265  */
266 int
267 xchk_dir_walk(
268 	struct xfs_scrub	*sc,
269 	struct xfs_inode	*dp,
270 	xchk_dirent_fn		dirent_fn,
271 	void			*priv)
272 {
273 	struct xfs_da_args	args = {
274 		.dp		= dp,
275 		.geo		= dp->i_mount->m_dir_geo,
276 		.trans		= sc->tp,
277 		.owner		= dp->i_ino,
278 	};
279 	int			error;
280 
281 	if (xfs_is_shutdown(dp->i_mount))
282 		return -EIO;
283 
284 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
285 	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
286 
287 	switch (xfs_dir2_format(&args, &error)) {
288 	case XFS_DIR2_FMT_SF:
289 		return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
290 	case XFS_DIR2_FMT_BLOCK:
291 		return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
292 	case XFS_DIR2_FMT_LEAF:
293 	case XFS_DIR2_FMT_NODE:
294 		return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
295 	default:
296 		return error;
297 	}
298 }
299 
300 /*
301  * Look up the inode number for an exact name in a directory.
302  *
303  * Callers must hold the ILOCK.  File types are XFS_DIR3_FT_*.  Names are not
304  * checked for correctness.
305  */
306 int
307 xchk_dir_lookup(
308 	struct xfs_scrub	*sc,
309 	struct xfs_inode	*dp,
310 	const struct xfs_name	*name,
311 	xfs_ino_t		*ino)
312 {
313 	struct xfs_da_args	args = {
314 		.dp		= dp,
315 		.geo		= dp->i_mount->m_dir_geo,
316 		.trans		= sc->tp,
317 		.name		= name->name,
318 		.namelen	= name->len,
319 		.filetype	= name->type,
320 		.hashval	= xfs_dir2_hashname(dp->i_mount, name),
321 		.whichfork	= XFS_DATA_FORK,
322 		.op_flags	= XFS_DA_OP_OKNOENT,
323 		.owner		= dp->i_ino,
324 	};
325 	int			error;
326 
327 	if (xfs_is_shutdown(dp->i_mount))
328 		return -EIO;
329 
330 	/*
331 	 * A temporary directory's block headers are written with the owner
332 	 * set to sc->ip, so we must switch the owner here for the lookup.
333 	 */
334 	if (dp == sc->tempip)
335 		args.owner = sc->ip->i_ino;
336 
337 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
338 	xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
339 
340 	error = xfs_dir_lookup_args(&args);
341 	if (!error)
342 		*ino = args.inumber;
343 	return error;
344 }
345 
346 /*
347  * Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
348  * state.  The caller may have a transaction, so we must use trylock for both
349  * IOLOCKs.
350  */
351 static inline unsigned int
352 xchk_dir_trylock_both(
353 	struct xfs_scrub	*sc,
354 	struct xfs_inode	*ip)
355 {
356 	if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
357 		return 0;
358 
359 	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
360 		goto parent_iolock;
361 
362 	xchk_ilock(sc, XFS_ILOCK_EXCL);
363 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
364 		goto parent_ilock;
365 
366 	return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
367 
368 parent_ilock:
369 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
370 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
371 parent_iolock:
372 	xchk_iunlock(sc, XFS_IOLOCK_EXCL);
373 	return 0;
374 }
375 
376 /*
377  * Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
378  * (@sc->ip) and the inode at the other end (@ip) of a directory or parent
379  * pointer link so that we can check that link.
380  *
381  * We do not know ahead of time that the directory tree is /not/ corrupt, so we
382  * cannot use the "lock two inode" functions because we do not know that there
383  * is not a racing thread trying to take the locks in opposite order.  First
384  * take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
385  * of @ip to synchronize with the VFS.  Next, take ILOCK_EXCL of the scrub
386  * target and @ip to synchronize with XFS.
387  *
388  * If the trylocks succeed, *lockmode will be set to the locks held for @ip;
389  * @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
390  * be returned.  If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
391  * XCHK_TRY_HARDER was set.  Returns -EINTR if the process has been killed.
392  */
393 int
394 xchk_dir_trylock_for_pptrs(
395 	struct xfs_scrub	*sc,
396 	struct xfs_inode	*ip,
397 	unsigned int		*lockmode)
398 {
399 	unsigned int		nr;
400 	int			error = 0;
401 
402 	ASSERT(sc->ilock_flags == 0);
403 
404 	for (nr = 0; nr < HZ; nr++) {
405 		*lockmode = xchk_dir_trylock_both(sc, ip);
406 		if (*lockmode)
407 			return 0;
408 
409 		if (xchk_should_terminate(sc, &error))
410 			return error;
411 
412 		delay(1);
413 	}
414 
415 	if (sc->flags & XCHK_TRY_HARDER) {
416 		xchk_set_incomplete(sc);
417 		return -ETIMEDOUT;
418 	}
419 
420 	return -EDEADLOCK;
421 }
422