xref: /linux/fs/xfs/scrub/parent.c (revision 87c9c16317882dd6dbbc07e349bc3223e14f3244)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2017 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_inode.h"
14 #include "xfs_icache.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "scrub/scrub.h"
18 #include "scrub/common.h"
19 
20 /* Set us up to scrub parents. */
21 int
22 xchk_setup_parent(
23 	struct xfs_scrub	*sc)
24 {
25 	return xchk_setup_inode_contents(sc, 0);
26 }
27 
28 /* Parent pointers */
29 
30 /* Look for an entry in a parent pointing to this inode. */
31 
32 struct xchk_parent_ctx {
33 	struct dir_context	dc;
34 	struct xfs_scrub	*sc;
35 	xfs_ino_t		ino;
36 	xfs_nlink_t		nlink;
37 	bool			cancelled;
38 };
39 
40 /* Look for a single entry in a directory pointing to an inode. */
41 STATIC int
42 xchk_parent_actor(
43 	struct dir_context	*dc,
44 	const char		*name,
45 	int			namelen,
46 	loff_t			pos,
47 	u64			ino,
48 	unsigned		type)
49 {
50 	struct xchk_parent_ctx	*spc;
51 	int			error = 0;
52 
53 	spc = container_of(dc, struct xchk_parent_ctx, dc);
54 	if (spc->ino == ino)
55 		spc->nlink++;
56 
57 	/*
58 	 * If we're facing a fatal signal, bail out.  Store the cancellation
59 	 * status separately because the VFS readdir code squashes error codes
60 	 * into short directory reads.
61 	 */
62 	if (xchk_should_terminate(spc->sc, &error))
63 		spc->cancelled = true;
64 
65 	return error;
66 }
67 
68 /* Count the number of dentries in the parent dir that point to this inode. */
69 STATIC int
70 xchk_parent_count_parent_dentries(
71 	struct xfs_scrub	*sc,
72 	struct xfs_inode	*parent,
73 	xfs_nlink_t		*nlink)
74 {
75 	struct xchk_parent_ctx	spc = {
76 		.dc.actor	= xchk_parent_actor,
77 		.ino		= sc->ip->i_ino,
78 		.sc		= sc,
79 	};
80 	size_t			bufsize;
81 	loff_t			oldpos;
82 	uint			lock_mode;
83 	int			error = 0;
84 
85 	/*
86 	 * If there are any blocks, read-ahead block 0 as we're almost
87 	 * certain to have the next operation be a read there.  This is
88 	 * how we guarantee that the parent's extent map has been loaded,
89 	 * if there is one.
90 	 */
91 	lock_mode = xfs_ilock_data_map_shared(parent);
92 	if (parent->i_df.if_nextents > 0)
93 		error = xfs_dir3_data_readahead(parent, 0, 0);
94 	xfs_iunlock(parent, lock_mode);
95 	if (error)
96 		return error;
97 
98 	/*
99 	 * Iterate the parent dir to confirm that there is
100 	 * exactly one entry pointing back to the inode being
101 	 * scanned.
102 	 */
103 	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
104 			parent->i_disk_size);
105 	oldpos = 0;
106 	while (true) {
107 		error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
108 		if (error)
109 			goto out;
110 		if (spc.cancelled) {
111 			error = -EAGAIN;
112 			goto out;
113 		}
114 		if (oldpos == spc.dc.pos)
115 			break;
116 		oldpos = spc.dc.pos;
117 	}
118 	*nlink = spc.nlink;
119 out:
120 	return error;
121 }
122 
123 /*
124  * Given the inode number of the alleged parent of the inode being
125  * scrubbed, try to validate that the parent has exactly one directory
126  * entry pointing back to the inode being scrubbed.
127  */
128 STATIC int
129 xchk_parent_validate(
130 	struct xfs_scrub	*sc,
131 	xfs_ino_t		dnum,
132 	bool			*try_again)
133 {
134 	struct xfs_mount	*mp = sc->mp;
135 	struct xfs_inode	*dp = NULL;
136 	xfs_nlink_t		expected_nlink;
137 	xfs_nlink_t		nlink;
138 	int			error = 0;
139 
140 	*try_again = false;
141 
142 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
143 		goto out;
144 
145 	/* '..' must not point to ourselves. */
146 	if (sc->ip->i_ino == dnum) {
147 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
148 		goto out;
149 	}
150 
151 	/*
152 	 * If we're an unlinked directory, the parent /won't/ have a link
153 	 * to us.  Otherwise, it should have one link.
154 	 */
155 	expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
156 
157 	/*
158 	 * Grab this parent inode.  We release the inode before we
159 	 * cancel the scrub transaction.  Since we're don't know a
160 	 * priori that releasing the inode won't trigger eofblocks
161 	 * cleanup (which allocates what would be a nested transaction)
162 	 * if the parent pointer erroneously points to a file, we
163 	 * can't use DONTCACHE here because DONTCACHE inodes can trigger
164 	 * immediate inactive cleanup of the inode.
165 	 *
166 	 * If _iget returns -EINVAL or -ENOENT then the parent inode number is
167 	 * garbage and the directory is corrupt.  If the _iget returns
168 	 * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
169 	 *  cross referencing error.  Any other error is an operational error.
170 	 */
171 	error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
172 	if (error == -EINVAL || error == -ENOENT) {
173 		error = -EFSCORRUPTED;
174 		xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
175 		goto out;
176 	}
177 	if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
178 		goto out;
179 	if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
180 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
181 		goto out_rele;
182 	}
183 
184 	/*
185 	 * We prefer to keep the inode locked while we lock and search
186 	 * its alleged parent for a forward reference.  If we can grab
187 	 * the iolock, validate the pointers and we're done.  We must
188 	 * use nowait here to avoid an ABBA deadlock on the parent and
189 	 * the child inodes.
190 	 */
191 	if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
192 		error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
193 		if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
194 				&error))
195 			goto out_unlock;
196 		if (nlink != expected_nlink)
197 			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
198 		goto out_unlock;
199 	}
200 
201 	/*
202 	 * The game changes if we get here.  We failed to lock the parent,
203 	 * so we're going to try to verify both pointers while only holding
204 	 * one lock so as to avoid deadlocking with something that's actually
205 	 * trying to traverse down the directory tree.
206 	 */
207 	xfs_iunlock(sc->ip, sc->ilock_flags);
208 	sc->ilock_flags = 0;
209 	error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED);
210 	if (error)
211 		goto out_rele;
212 
213 	/* Go looking for our dentry. */
214 	error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
215 	if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
216 		goto out_unlock;
217 
218 	/* Drop the parent lock, relock this inode. */
219 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
220 	error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
221 	if (error)
222 		goto out_rele;
223 	sc->ilock_flags = XFS_IOLOCK_EXCL;
224 
225 	/*
226 	 * If we're an unlinked directory, the parent /won't/ have a link
227 	 * to us.  Otherwise, it should have one link.  We have to re-set
228 	 * it here because we dropped the lock on sc->ip.
229 	 */
230 	expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
231 
232 	/* Look up '..' to see if the inode changed. */
233 	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
234 	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
235 		goto out_rele;
236 
237 	/* Drat, parent changed.  Try again! */
238 	if (dnum != dp->i_ino) {
239 		xfs_irele(dp);
240 		*try_again = true;
241 		return 0;
242 	}
243 	xfs_irele(dp);
244 
245 	/*
246 	 * '..' didn't change, so check that there was only one entry
247 	 * for us in the parent.
248 	 */
249 	if (nlink != expected_nlink)
250 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
251 	return error;
252 
253 out_unlock:
254 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
255 out_rele:
256 	xfs_irele(dp);
257 out:
258 	return error;
259 }
260 
261 /* Scrub a parent pointer. */
262 int
263 xchk_parent(
264 	struct xfs_scrub	*sc)
265 {
266 	struct xfs_mount	*mp = sc->mp;
267 	xfs_ino_t		dnum;
268 	bool			try_again;
269 	int			tries = 0;
270 	int			error = 0;
271 
272 	/*
273 	 * If we're a directory, check that the '..' link points up to
274 	 * a directory that has one entry pointing to us.
275 	 */
276 	if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
277 		return -ENOENT;
278 
279 	/* We're not a special inode, are we? */
280 	if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
281 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
282 		goto out;
283 	}
284 
285 	/*
286 	 * The VFS grabs a read or write lock via i_rwsem before it reads
287 	 * or writes to a directory.  If we've gotten this far we've
288 	 * already obtained IOLOCK_EXCL, which (since 4.10) is the same as
289 	 * getting a write lock on i_rwsem.  Therefore, it is safe for us
290 	 * to drop the ILOCK here in order to do directory lookups.
291 	 */
292 	sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
293 	xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
294 
295 	/* Look up '..' */
296 	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
297 	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
298 		goto out;
299 	if (!xfs_verify_dir_ino(mp, dnum)) {
300 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
301 		goto out;
302 	}
303 
304 	/* Is this the root dir?  Then '..' must point to itself. */
305 	if (sc->ip == mp->m_rootip) {
306 		if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
307 		    sc->ip->i_ino != dnum)
308 			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
309 		goto out;
310 	}
311 
312 	do {
313 		error = xchk_parent_validate(sc, dnum, &try_again);
314 		if (error)
315 			goto out;
316 	} while (try_again && ++tries < 20);
317 
318 	/*
319 	 * We gave it our best shot but failed, so mark this scrub
320 	 * incomplete.  Userspace can decide if it wants to try again.
321 	 */
322 	if (try_again && tries == 20)
323 		xchk_set_incomplete(sc);
324 out:
325 	/*
326 	 * If we failed to lock the parent inode even after a retry, just mark
327 	 * this scrub incomplete and return.
328 	 */
329 	if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
330 		error = 0;
331 		xchk_set_incomplete(sc);
332 	}
333 	return error;
334 }
335