xref: /linux/fs/xfs/scrub/nlinks.c (revision f3f5edc5e41e038cf66d124a4cbacf6ff0983513)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_iwalk.h"
17 #include "xfs_ialloc.h"
18 #include "xfs_dir2.h"
19 #include "xfs_dir2_priv.h"
20 #include "xfs_ag.h"
21 #include "xfs_parent.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/repair.h"
25 #include "scrub/xfile.h"
26 #include "scrub/xfarray.h"
27 #include "scrub/iscan.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/nlinks.h"
30 #include "scrub/trace.h"
31 #include "scrub/readdir.h"
32 #include "scrub/tempfile.h"
33 #include "scrub/listxattr.h"
34 
35 /*
36  * Live Inode Link Count Checking
37  * ==============================
38  *
39  * Inode link counts are "summary" metadata, in the sense that they are
40  * computed as the number of directory entries referencing each file on the
41  * filesystem.  Therefore, we compute the correct link counts by creating a
42  * shadow link count structure and walking every inode.
43  */
44 
45 /* Set us up to scrub inode link counts. */
46 int
xchk_setup_nlinks(struct xfs_scrub * sc)47 xchk_setup_nlinks(
48 	struct xfs_scrub	*sc)
49 {
50 	struct xchk_nlink_ctrs	*xnc;
51 	int			error;
52 
53 	xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
54 
55 	if (xchk_could_repair(sc)) {
56 		error = xrep_setup_nlinks(sc);
57 		if (error)
58 			return error;
59 	}
60 
61 	xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
62 	if (!xnc)
63 		return -ENOMEM;
64 	xnc->xname.name = xnc->namebuf;
65 	xnc->sc = sc;
66 	sc->buf = xnc;
67 
68 	return xchk_setup_fs(sc);
69 }
70 
71 /*
72  * Part 1: Collecting file link counts.  For each file, we create a shadow link
73  * counting structure, then walk the entire directory tree, incrementing parent
74  * and child link counts for each directory entry seen.
75  *
76  * To avoid false corruption reports in part 2, any failure in this part must
77  * set the INCOMPLETE flag even when a negative errno is returned.  This care
78  * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
79  * ECANCELED) that are absorbed into a scrub state flag update by
80  * xchk_*_process_error.  Scrub and repair share the same incore data
81  * structures, so the INCOMPLETE flag is critical to prevent a repair based on
82  * insufficient information.
83  *
84  * Because we are scanning a live filesystem, it's possible that another thread
85  * will try to update the link counts for an inode that we've already scanned.
86  * This will cause our counts to be incorrect.  Therefore, we hook all
87  * directory entry updates because that is when link count updates occur.  By
88  * shadowing transaction updates in this manner, live nlink check can ensure by
89  * locking the inode and the shadow structure that its own copies are not out
90  * of date.  Because the hook code runs in a different process context from the
91  * scrub code and the scrub state flags are not accessed atomically, failures
92  * in the hook code must abort the iscan and the scrubber must notice the
93  * aborted scan and set the incomplete flag.
94  *
95  * Note that we use jump labels and srcu notifier hooks to minimize the
96  * overhead when live nlinks is /not/ running.  Locking order for nlink
97  * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
98  */
99 
100 /*
101  * Add a delta to an nlink counter, clamping the value to U32_MAX.  Because
102  * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
103  * even if we lose some precision.
104  */
105 static inline void
careful_add(xfs_nlink_t * nlinkp,int delta)106 careful_add(
107 	xfs_nlink_t	*nlinkp,
108 	int		delta)
109 {
110 	uint64_t	new_value = (uint64_t)(*nlinkp) + delta;
111 
112 	BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
113 	*nlinkp = min_t(uint64_t, new_value, U32_MAX);
114 }
115 
116 /* Update incore link count information.  Caller must hold the nlinks lock. */
117 STATIC int
xchk_nlinks_update_incore(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino,int parents_delta,int backrefs_delta,int children_delta)118 xchk_nlinks_update_incore(
119 	struct xchk_nlink_ctrs	*xnc,
120 	xfs_ino_t		ino,
121 	int			parents_delta,
122 	int			backrefs_delta,
123 	int			children_delta)
124 {
125 	struct xchk_nlink	nl;
126 	int			error;
127 
128 	if (!xnc->nlinks)
129 		return 0;
130 
131 	error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
132 	if (error)
133 		return error;
134 
135 	trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
136 			backrefs_delta, children_delta);
137 
138 	careful_add(&nl.parents, parents_delta);
139 	careful_add(&nl.backrefs, backrefs_delta);
140 	careful_add(&nl.children, children_delta);
141 
142 	nl.flags |= XCHK_NLINK_WRITTEN;
143 	error = xfarray_store(xnc->nlinks, ino, &nl);
144 	if (error == -EFBIG) {
145 		/*
146 		 * EFBIG means we tried to store data at too high a byte offset
147 		 * in the sparse array.  IOWs, we cannot complete the check and
148 		 * must notify userspace that the check was incomplete.
149 		 */
150 		error = -ECANCELED;
151 	}
152 	return error;
153 }
154 
155 /*
156  * Apply a link count change from the regular filesystem into our shadow link
157  * count structure based on a directory update in progress.
158  */
159 STATIC int
xchk_nlinks_live_update(struct notifier_block * nb,unsigned long action,void * data)160 xchk_nlinks_live_update(
161 	struct notifier_block		*nb,
162 	unsigned long			action,
163 	void				*data)
164 {
165 	struct xfs_dir_update_params	*p = data;
166 	struct xchk_nlink_ctrs		*xnc;
167 	int				error;
168 
169 	xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
170 
171 	/*
172 	 * Ignore temporary directories being used to stage dir repairs, since
173 	 * we don't bump the link counts of the children.
174 	 */
175 	if (xrep_is_tempfile(p->dp))
176 		return NOTIFY_DONE;
177 
178 	trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
179 			p->delta, p->name->name, p->name->len);
180 
181 	/*
182 	 * If we've already scanned @dp, update the number of parents that link
183 	 * to @ip.  If @ip is a subdirectory, update the number of child links
184 	 * going out of @dp.
185 	 */
186 	if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
187 		mutex_lock(&xnc->lock);
188 		error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
189 				0, 0);
190 		if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
191 			error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
192 					0, p->delta);
193 		mutex_unlock(&xnc->lock);
194 		if (error)
195 			goto out_abort;
196 	}
197 
198 	/*
199 	 * If @ip is a subdirectory and we've already scanned it, update the
200 	 * number of backrefs pointing to @dp.
201 	 */
202 	if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
203 	    xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
204 		mutex_lock(&xnc->lock);
205 		error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
206 				p->delta, 0);
207 		mutex_unlock(&xnc->lock);
208 		if (error)
209 			goto out_abort;
210 	}
211 
212 	return NOTIFY_DONE;
213 
214 out_abort:
215 	xchk_iscan_abort(&xnc->collect_iscan);
216 	return NOTIFY_DONE;
217 }
218 
219 /* Bump the observed link count for the inode referenced by this entry. */
220 STATIC int
xchk_nlinks_collect_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)221 xchk_nlinks_collect_dirent(
222 	struct xfs_scrub	*sc,
223 	struct xfs_inode	*dp,
224 	xfs_dir2_dataptr_t	dapos,
225 	const struct xfs_name	*name,
226 	xfs_ino_t		ino,
227 	void			*priv)
228 {
229 	struct xchk_nlink_ctrs	*xnc = priv;
230 	bool			dot = false, dotdot = false;
231 	int			error;
232 
233 	/* Does this name make sense? */
234 	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
235 		error = -ECANCELED;
236 		goto out_abort;
237 	}
238 
239 	if (name->len == 1 && name->name[0] == '.')
240 		dot = true;
241 	else if (name->len == 2 && name->name[0] == '.' &&
242 				   name->name[1] == '.')
243 		dotdot = true;
244 
245 	/* Don't accept a '.' entry that points somewhere else. */
246 	if (dot && ino != dp->i_ino) {
247 		error = -ECANCELED;
248 		goto out_abort;
249 	}
250 
251 	/* Don't accept an invalid inode number. */
252 	if (!xfs_verify_dir_ino(sc->mp, ino)) {
253 		error = -ECANCELED;
254 		goto out_abort;
255 	}
256 
257 	/* Update the shadow link counts if we haven't already failed. */
258 
259 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
260 		error = -ECANCELED;
261 		goto out_incomplete;
262 	}
263 
264 	trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
265 
266 	mutex_lock(&xnc->lock);
267 
268 	/*
269 	 * If this is a dotdot entry, it is a back link from dp to ino.  How
270 	 * we handle this depends on whether or not dp is the root directory.
271 	 *
272 	 * The root directory is its own parent, so we pretend the dotdot entry
273 	 * establishes the "parent" of the root directory.  Increment the
274 	 * number of parents of the root directory.
275 	 *
276 	 * Otherwise, increment the number of backrefs pointing back to ino.
277 	 *
278 	 * If the filesystem has parent pointers, we walk the pptrs to
279 	 * determine the backref count.
280 	 */
281 	if (dotdot) {
282 		if (xchk_inode_is_dirtree_root(dp))
283 			error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
284 		else if (!xfs_has_parent(sc->mp))
285 			error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
286 		else
287 			error = 0;
288 		if (error)
289 			goto out_unlock;
290 	}
291 
292 	/*
293 	 * If this dirent is a forward link from dp to ino, increment the
294 	 * number of parents linking into ino.
295 	 */
296 	if (!dot && !dotdot) {
297 		error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
298 		if (error)
299 			goto out_unlock;
300 	}
301 
302 	/*
303 	 * If this dirent is a forward link to a subdirectory, increment the
304 	 * number of child links of dp.
305 	 */
306 	if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
307 		error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
308 		if (error)
309 			goto out_unlock;
310 	}
311 
312 	mutex_unlock(&xnc->lock);
313 	return 0;
314 
315 out_unlock:
316 	mutex_unlock(&xnc->lock);
317 out_abort:
318 	xchk_iscan_abort(&xnc->collect_iscan);
319 out_incomplete:
320 	xchk_set_incomplete(sc);
321 	return error;
322 }
323 
324 /* Bump the backref count for the inode referenced by this parent pointer. */
325 STATIC int
xchk_nlinks_collect_pptr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)326 xchk_nlinks_collect_pptr(
327 	struct xfs_scrub		*sc,
328 	struct xfs_inode		*ip,
329 	unsigned int			attr_flags,
330 	const unsigned char		*name,
331 	unsigned int			namelen,
332 	const void			*value,
333 	unsigned int			valuelen,
334 	void				*priv)
335 {
336 	struct xfs_name			xname = {
337 		.name			= name,
338 		.len			= namelen,
339 	};
340 	struct xchk_nlink_ctrs		*xnc = priv;
341 	const struct xfs_parent_rec	*pptr_rec = value;
342 	xfs_ino_t			parent_ino;
343 	int				error;
344 
345 	/* Update the shadow link counts if we haven't already failed. */
346 
347 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
348 		error = -ECANCELED;
349 		goto out_incomplete;
350 	}
351 
352 	if (!(attr_flags & XFS_ATTR_PARENT))
353 		return 0;
354 
355 	error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
356 			valuelen, &parent_ino, NULL);
357 	if (error)
358 		return error;
359 
360 	trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec);
361 
362 	mutex_lock(&xnc->lock);
363 
364 	error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0);
365 	if (error)
366 		goto out_unlock;
367 
368 	mutex_unlock(&xnc->lock);
369 	return 0;
370 
371 out_unlock:
372 	mutex_unlock(&xnc->lock);
373 	xchk_iscan_abort(&xnc->collect_iscan);
374 out_incomplete:
375 	xchk_set_incomplete(sc);
376 	return error;
377 }
378 
379 /* Walk a directory to bump the observed link counts of the children. */
380 STATIC int
xchk_nlinks_collect_dir(struct xchk_nlink_ctrs * xnc,struct xfs_inode * dp)381 xchk_nlinks_collect_dir(
382 	struct xchk_nlink_ctrs	*xnc,
383 	struct xfs_inode	*dp)
384 {
385 	struct xfs_scrub	*sc = xnc->sc;
386 	unsigned int		lock_mode;
387 	int			error = 0;
388 
389 	/*
390 	 * Ignore temporary directories being used to stage dir repairs, since
391 	 * we don't bump the link counts of the children.
392 	 */
393 	if (xrep_is_tempfile(dp))
394 		return 0;
395 
396 	/* Prevent anyone from changing this directory while we walk it. */
397 	xfs_ilock(dp, XFS_IOLOCK_SHARED);
398 	lock_mode = xfs_ilock_data_map_shared(dp);
399 
400 	/*
401 	 * The dotdot entry of an unlinked directory still points to the last
402 	 * parent, but the parent no longer links to this directory.  Skip the
403 	 * directory to avoid overcounting.
404 	 */
405 	if (VFS_I(dp)->i_nlink == 0)
406 		goto out_unlock;
407 
408 	/*
409 	 * We cannot count file links if the directory looks as though it has
410 	 * been zapped by the inode record repair code.
411 	 */
412 	if (xchk_dir_looks_zapped(dp)) {
413 		error = -EBUSY;
414 		goto out_abort;
415 	}
416 
417 	error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
418 	if (error == -ECANCELED) {
419 		error = 0;
420 		goto out_unlock;
421 	}
422 	if (error)
423 		goto out_abort;
424 
425 	/* Walk the parent pointers to get real backref counts. */
426 	if (xfs_has_parent(sc->mp)) {
427 		/*
428 		 * If the extended attributes look as though they has been
429 		 * zapped by the inode record repair code, we cannot scan for
430 		 * parent pointers.
431 		 */
432 		if (xchk_pptr_looks_zapped(dp)) {
433 			error = -EBUSY;
434 			goto out_unlock;
435 		}
436 
437 		error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL,
438 				xnc);
439 		if (error == -ECANCELED) {
440 			error = 0;
441 			goto out_unlock;
442 		}
443 		if (error)
444 			goto out_abort;
445 	}
446 
447 	xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
448 	goto out_unlock;
449 
450 out_abort:
451 	xchk_set_incomplete(sc);
452 	xchk_iscan_abort(&xnc->collect_iscan);
453 out_unlock:
454 	xfs_iunlock(dp, lock_mode);
455 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
456 	return error;
457 }
458 
459 /* If this looks like a valid pointer, count it. */
460 static inline int
xchk_nlinks_collect_metafile(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino)461 xchk_nlinks_collect_metafile(
462 	struct xchk_nlink_ctrs	*xnc,
463 	xfs_ino_t		ino)
464 {
465 	if (!xfs_verify_ino(xnc->sc->mp, ino))
466 		return 0;
467 
468 	trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
469 	return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
470 }
471 
472 /* Bump the link counts of metadata files rooted in the superblock. */
473 STATIC int
xchk_nlinks_collect_metafiles(struct xchk_nlink_ctrs * xnc)474 xchk_nlinks_collect_metafiles(
475 	struct xchk_nlink_ctrs	*xnc)
476 {
477 	struct xfs_mount	*mp = xnc->sc->mp;
478 	int			error = -ECANCELED;
479 
480 
481 	if (xchk_iscan_aborted(&xnc->collect_iscan))
482 		goto out_incomplete;
483 
484 	mutex_lock(&xnc->lock);
485 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
486 	if (error)
487 		goto out_abort;
488 
489 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
490 	if (error)
491 		goto out_abort;
492 
493 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
494 	if (error)
495 		goto out_abort;
496 
497 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
498 	if (error)
499 		goto out_abort;
500 
501 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
502 	if (error)
503 		goto out_abort;
504 	mutex_unlock(&xnc->lock);
505 
506 	return 0;
507 
508 out_abort:
509 	mutex_unlock(&xnc->lock);
510 	xchk_iscan_abort(&xnc->collect_iscan);
511 out_incomplete:
512 	xchk_set_incomplete(xnc->sc);
513 	return error;
514 }
515 
516 /* Advance the collection scan cursor for this non-directory file. */
517 static inline int
xchk_nlinks_collect_file(struct xchk_nlink_ctrs * xnc,struct xfs_inode * ip)518 xchk_nlinks_collect_file(
519 	struct xchk_nlink_ctrs	*xnc,
520 	struct xfs_inode	*ip)
521 {
522 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
523 	xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
524 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
525 	return 0;
526 }
527 
528 /* Walk all directories and count inode links. */
529 STATIC int
xchk_nlinks_collect(struct xchk_nlink_ctrs * xnc)530 xchk_nlinks_collect(
531 	struct xchk_nlink_ctrs	*xnc)
532 {
533 	struct xfs_scrub	*sc = xnc->sc;
534 	struct xfs_inode	*ip;
535 	int			error;
536 
537 	/* Count the rt and quota files that are rooted in the superblock. */
538 	error = xchk_nlinks_collect_metafiles(xnc);
539 	if (error)
540 		return error;
541 
542 	/*
543 	 * Set up for a potentially lengthy filesystem scan by reducing our
544 	 * transaction resource usage for the duration.  Specifically:
545 	 *
546 	 * Cancel the transaction to release the log grant space while we scan
547 	 * the filesystem.
548 	 *
549 	 * Create a new empty transaction to eliminate the possibility of the
550 	 * inode scan deadlocking on cyclical metadata.
551 	 *
552 	 * We pass the empty transaction to the file scanning function to avoid
553 	 * repeatedly cycling empty transactions.  This can be done even though
554 	 * we take the IOLOCK to quiesce the file because empty transactions
555 	 * do not take sb_internal.
556 	 */
557 	xchk_trans_cancel(sc);
558 	xchk_trans_alloc_empty(sc);
559 
560 	while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
561 		if (S_ISDIR(VFS_I(ip)->i_mode))
562 			error = xchk_nlinks_collect_dir(xnc, ip);
563 		else
564 			error = xchk_nlinks_collect_file(xnc, ip);
565 		xchk_irele(sc, ip);
566 		if (error)
567 			break;
568 
569 		if (xchk_should_terminate(sc, &error))
570 			break;
571 	}
572 	xchk_iscan_iter_finish(&xnc->collect_iscan);
573 	if (error) {
574 		xchk_set_incomplete(sc);
575 		/*
576 		 * If we couldn't grab an inode that was busy with a state
577 		 * change, change the error code so that we exit to userspace
578 		 * as quickly as possible.
579 		 */
580 		if (error == -EBUSY)
581 			return -ECANCELED;
582 		return error;
583 	}
584 
585 	/*
586 	 * Switch out for a real transaction in preparation for building a new
587 	 * tree.
588 	 */
589 	xchk_trans_cancel(sc);
590 	return xchk_setup_fs(sc);
591 }
592 
593 /*
594  * Part 2: Comparing file link counters.  Walk each inode and compare the link
595  * counts against our shadow information; and then walk each shadow link count
596  * structure (that wasn't covered in the first part), comparing it against the
597  * file.
598  */
599 
600 /* Read the observed link count for comparison with the actual inode. */
601 STATIC int
xchk_nlinks_comparison_read(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino,struct xchk_nlink * obs)602 xchk_nlinks_comparison_read(
603 	struct xchk_nlink_ctrs	*xnc,
604 	xfs_ino_t		ino,
605 	struct xchk_nlink	*obs)
606 {
607 	struct xchk_nlink	nl;
608 	int			error;
609 
610 	error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
611 	if (error)
612 		return error;
613 
614 	nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
615 
616 	error = xfarray_store(xnc->nlinks, ino, &nl);
617 	if (error == -EFBIG) {
618 		/*
619 		 * EFBIG means we tried to store data at too high a byte offset
620 		 * in the sparse array.  IOWs, we cannot complete the check and
621 		 * must notify userspace that the check was incomplete.  This
622 		 * shouldn't really happen outside of the collection phase.
623 		 */
624 		xchk_set_incomplete(xnc->sc);
625 		return -ECANCELED;
626 	}
627 	if (error)
628 		return error;
629 
630 	/* Copy the counters, but do not expose the internal state. */
631 	obs->parents = nl.parents;
632 	obs->backrefs = nl.backrefs;
633 	obs->children = nl.children;
634 	obs->flags = 0;
635 	return 0;
636 }
637 
638 /* Check our link count against an inode. */
639 STATIC int
xchk_nlinks_compare_inode(struct xchk_nlink_ctrs * xnc,struct xfs_inode * ip)640 xchk_nlinks_compare_inode(
641 	struct xchk_nlink_ctrs	*xnc,
642 	struct xfs_inode	*ip)
643 {
644 	struct xchk_nlink	obs;
645 	struct xfs_scrub	*sc = xnc->sc;
646 	uint64_t		total_links;
647 	unsigned int		actual_nlink;
648 	int			error;
649 
650 	/*
651 	 * Ignore temporary files being used to stage repairs, since we assume
652 	 * they're correct for non-directories, and the directory repair code
653 	 * doesn't bump the link counts for the children.
654 	 */
655 	if (xrep_is_tempfile(ip))
656 		return 0;
657 
658 	xfs_ilock(ip, XFS_ILOCK_SHARED);
659 	mutex_lock(&xnc->lock);
660 
661 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
662 		xchk_set_incomplete(xnc->sc);
663 		error = -ECANCELED;
664 		goto out_scanlock;
665 	}
666 
667 	error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
668 	if (error)
669 		goto out_scanlock;
670 
671 	/*
672 	 * If we don't have ftype to get an accurate count of the subdirectory
673 	 * entries in this directory, take advantage of the fact that on a
674 	 * consistent ftype=0 filesystem, the number of subdirectory
675 	 * backreferences (dotdot entries) pointing towards this directory
676 	 * should be equal to the number of subdirectory entries in the
677 	 * directory.
678 	 */
679 	if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
680 		obs.children = obs.backrefs;
681 
682 	total_links = xchk_nlink_total(ip, &obs);
683 	actual_nlink = VFS_I(ip)->i_nlink;
684 
685 	trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
686 
687 	/*
688 	 * If we found so many parents that we'd overflow i_nlink, we must flag
689 	 * this as a corruption.  The VFS won't let users increase the link
690 	 * count, but it will let them decrease it.
691 	 */
692 	if (total_links > XFS_NLINK_PINNED) {
693 		xchk_ino_set_corrupt(sc, ip->i_ino);
694 		goto out_corrupt;
695 	} else if (total_links > XFS_MAXLINK) {
696 		xchk_ino_set_warning(sc, ip->i_ino);
697 	}
698 
699 	/* Link counts should match. */
700 	if (total_links != actual_nlink) {
701 		xchk_ino_set_corrupt(sc, ip->i_ino);
702 		goto out_corrupt;
703 	}
704 
705 	if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
706 		/*
707 		 * The collection phase ignores directories with zero link
708 		 * count, so we ignore them here too.
709 		 *
710 		 * The number of subdirectory backreferences (dotdot entries)
711 		 * pointing towards this directory should be equal to the
712 		 * number of subdirectory entries in the directory.
713 		 */
714 		if (obs.children != obs.backrefs)
715 			xchk_ino_xref_set_corrupt(sc, ip->i_ino);
716 	} else {
717 		/*
718 		 * Non-directories and unlinked directories should not have
719 		 * back references.
720 		 */
721 		if (obs.backrefs != 0) {
722 			xchk_ino_set_corrupt(sc, ip->i_ino);
723 			goto out_corrupt;
724 		}
725 
726 		/*
727 		 * Non-directories and unlinked directories should not have
728 		 * children.
729 		 */
730 		if (obs.children != 0) {
731 			xchk_ino_set_corrupt(sc, ip->i_ino);
732 			goto out_corrupt;
733 		}
734 	}
735 
736 	if (xchk_inode_is_dirtree_root(ip)) {
737 		/*
738 		 * For the root of a directory tree, both the '.' and '..'
739 		 * entries should point to the root directory.  The dotdot
740 		 * entry is counted as a parent of the root /and/ a backref of
741 		 * the root directory.
742 		 */
743 		if (obs.parents != 1) {
744 			xchk_ino_set_corrupt(sc, ip->i_ino);
745 			goto out_corrupt;
746 		}
747 	} else if (actual_nlink > 0) {
748 		/*
749 		 * Linked files that are not the root directory should have at
750 		 * least one parent.
751 		 */
752 		if (obs.parents == 0) {
753 			xchk_ino_set_corrupt(sc, ip->i_ino);
754 			goto out_corrupt;
755 		}
756 	}
757 
758 out_corrupt:
759 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
760 		error = -ECANCELED;
761 out_scanlock:
762 	mutex_unlock(&xnc->lock);
763 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
764 	return error;
765 }
766 
767 /*
768  * Check our link count against an inode that wasn't checked previously.  This
769  * is intended to catch directories with dangling links, though we could be
770  * racing with inode allocation in other threads.
771  */
772 STATIC int
xchk_nlinks_compare_inum(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino)773 xchk_nlinks_compare_inum(
774 	struct xchk_nlink_ctrs	*xnc,
775 	xfs_ino_t		ino)
776 {
777 	struct xchk_nlink	obs;
778 	struct xfs_mount	*mp = xnc->sc->mp;
779 	struct xfs_trans	*tp = xnc->sc->tp;
780 	struct xfs_buf		*agi_bp;
781 	struct xfs_inode	*ip;
782 	int			error;
783 
784 	/*
785 	 * The first iget failed, so try again with the variant that returns
786 	 * either an incore inode or the AGI buffer.  If the function returns
787 	 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
788 	 * can guarantee that the inode won't be allocated while we check for
789 	 * a zero link count in the observed link count data.
790 	 */
791 	error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
792 	if (!error) {
793 		/* Actually got an inode, so use the inode compare. */
794 		error = xchk_nlinks_compare_inode(xnc, ip);
795 		xchk_irele(xnc->sc, ip);
796 		return error;
797 	}
798 	if (error == -ENOENT || error == -EINVAL) {
799 		/* No inode was found.  Check for zero link count below. */
800 		error = 0;
801 	}
802 	if (error)
803 		goto out_agi;
804 
805 	/* Ensure that we have protected against inode allocation/freeing. */
806 	if (agi_bp == NULL) {
807 		ASSERT(agi_bp != NULL);
808 		xchk_set_incomplete(xnc->sc);
809 		return -ECANCELED;
810 	}
811 
812 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
813 		xchk_set_incomplete(xnc->sc);
814 		error = -ECANCELED;
815 		goto out_agi;
816 	}
817 
818 	mutex_lock(&xnc->lock);
819 	error = xchk_nlinks_comparison_read(xnc, ino, &obs);
820 	if (error)
821 		goto out_scanlock;
822 
823 	trace_xchk_nlinks_check_zero(mp, ino, &obs);
824 
825 	/*
826 	 * If we can't grab the inode, the link count had better be zero.  We
827 	 * still hold the AGI to prevent inode allocation/freeing.
828 	 */
829 	if (xchk_nlink_total(NULL, &obs) != 0) {
830 		xchk_ino_set_corrupt(xnc->sc, ino);
831 		error = -ECANCELED;
832 	}
833 
834 out_scanlock:
835 	mutex_unlock(&xnc->lock);
836 out_agi:
837 	if (agi_bp)
838 		xfs_trans_brelse(tp, agi_bp);
839 	return error;
840 }
841 
842 /*
843  * Try to visit every inode in the filesystem to compare the link count.  Move
844  * on if we can't grab an inode, since we'll revisit unchecked nlink records in
845  * the second part.
846  */
847 static int
xchk_nlinks_compare_iter(struct xchk_nlink_ctrs * xnc,struct xfs_inode ** ipp)848 xchk_nlinks_compare_iter(
849 	struct xchk_nlink_ctrs	*xnc,
850 	struct xfs_inode	**ipp)
851 {
852 	int			error;
853 
854 	do {
855 		error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
856 	} while (error == -EBUSY);
857 
858 	return error;
859 }
860 
861 /* Compare the link counts we observed against the live information. */
862 STATIC int
xchk_nlinks_compare(struct xchk_nlink_ctrs * xnc)863 xchk_nlinks_compare(
864 	struct xchk_nlink_ctrs	*xnc)
865 {
866 	struct xchk_nlink	nl;
867 	struct xfs_scrub	*sc = xnc->sc;
868 	struct xfs_inode	*ip;
869 	xfarray_idx_t		cur = XFARRAY_CURSOR_INIT;
870 	int			error;
871 
872 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
873 		return 0;
874 
875 	/*
876 	 * Create a new empty transaction so that we can advance the iscan
877 	 * cursor without deadlocking if the inobt has a cycle and push on the
878 	 * inactivation workqueue.
879 	 */
880 	xchk_trans_cancel(sc);
881 	xchk_trans_alloc_empty(sc);
882 
883 	/*
884 	 * Use the inobt to walk all allocated inodes to compare the link
885 	 * counts.  Inodes skipped by _compare_iter will be tried again in the
886 	 * next phase of the scan.
887 	 */
888 	xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
889 	while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
890 		error = xchk_nlinks_compare_inode(xnc, ip);
891 		xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
892 		xchk_irele(sc, ip);
893 		if (error)
894 			break;
895 
896 		if (xchk_should_terminate(sc, &error))
897 			break;
898 	}
899 	xchk_iscan_iter_finish(&xnc->compare_iscan);
900 	xchk_iscan_teardown(&xnc->compare_iscan);
901 	if (error)
902 		return error;
903 
904 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
905 		return 0;
906 
907 	/*
908 	 * Walk all the non-null nlink observations that weren't checked in the
909 	 * previous step.
910 	 */
911 	mutex_lock(&xnc->lock);
912 	while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
913 		xfs_ino_t	ino = cur - 1;
914 
915 		if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
916 			continue;
917 
918 		mutex_unlock(&xnc->lock);
919 
920 		error = xchk_nlinks_compare_inum(xnc, ino);
921 		if (error)
922 			return error;
923 
924 		if (xchk_should_terminate(xnc->sc, &error))
925 			return error;
926 
927 		mutex_lock(&xnc->lock);
928 	}
929 	mutex_unlock(&xnc->lock);
930 
931 	return error;
932 }
933 
934 /* Tear down everything associated with a nlinks check. */
935 static void
xchk_nlinks_teardown_scan(void * priv)936 xchk_nlinks_teardown_scan(
937 	void			*priv)
938 {
939 	struct xchk_nlink_ctrs	*xnc = priv;
940 
941 	/* Discourage any hook functions that might be running. */
942 	xchk_iscan_abort(&xnc->collect_iscan);
943 
944 	xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
945 
946 	xfarray_destroy(xnc->nlinks);
947 	xnc->nlinks = NULL;
948 
949 	xchk_iscan_teardown(&xnc->collect_iscan);
950 	mutex_destroy(&xnc->lock);
951 	xnc->sc = NULL;
952 }
953 
954 /*
955  * Scan all inodes in the entire filesystem to generate link count data.  If
956  * the scan is successful, the counts will be left alive for a repair.  If any
957  * error occurs, we'll tear everything down.
958  */
959 STATIC int
xchk_nlinks_setup_scan(struct xfs_scrub * sc,struct xchk_nlink_ctrs * xnc)960 xchk_nlinks_setup_scan(
961 	struct xfs_scrub	*sc,
962 	struct xchk_nlink_ctrs	*xnc)
963 {
964 	struct xfs_mount	*mp = sc->mp;
965 	char			*descr;
966 	unsigned long long	max_inos;
967 	xfs_agnumber_t		last_agno = mp->m_sb.sb_agcount - 1;
968 	xfs_agino_t		first_agino, last_agino;
969 	int			error;
970 
971 	mutex_init(&xnc->lock);
972 
973 	/* Retry iget every tenth of a second for up to 30 seconds. */
974 	xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
975 
976 	/*
977 	 * Set up enough space to store an nlink record for the highest
978 	 * possible inode number in this system.
979 	 */
980 	xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
981 	max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
982 	descr = xchk_xfile_descr(sc, "file link counts");
983 	error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
984 			sizeof(struct xchk_nlink), &xnc->nlinks);
985 	kfree(descr);
986 	if (error)
987 		goto out_teardown;
988 
989 	/*
990 	 * Hook into the directory entry code so that we can capture updates to
991 	 * file link counts.  The hook only triggers for inodes that were
992 	 * already scanned, and the scanner thread takes each inode's ILOCK,
993 	 * which means that any in-progress inode updates will finish before we
994 	 * can scan the inode.
995 	 */
996 	ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
997 	xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
998 	error = xfs_dir_hook_add(mp, &xnc->dhook);
999 	if (error)
1000 		goto out_teardown;
1001 
1002 	/* Use deferred cleanup to pass the inode link count data to repair. */
1003 	sc->buf_cleanup = xchk_nlinks_teardown_scan;
1004 	return 0;
1005 
1006 out_teardown:
1007 	xchk_nlinks_teardown_scan(xnc);
1008 	return error;
1009 }
1010 
1011 /* Scrub the link count of all inodes on the filesystem. */
1012 int
xchk_nlinks(struct xfs_scrub * sc)1013 xchk_nlinks(
1014 	struct xfs_scrub	*sc)
1015 {
1016 	struct xchk_nlink_ctrs	*xnc = sc->buf;
1017 	int			error = 0;
1018 
1019 	/* Set ourselves up to check link counts on the live filesystem. */
1020 	error = xchk_nlinks_setup_scan(sc, xnc);
1021 	if (error)
1022 		return error;
1023 
1024 	/* Walk all inodes, picking up link count information. */
1025 	error = xchk_nlinks_collect(xnc);
1026 	if (!xchk_xref_process_error(sc, 0, 0, &error))
1027 		return error;
1028 
1029 	/* Fail fast if we're not playing with a full dataset. */
1030 	if (xchk_iscan_aborted(&xnc->collect_iscan))
1031 		xchk_set_incomplete(sc);
1032 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
1033 		return 0;
1034 
1035 	/* Compare link counts. */
1036 	error = xchk_nlinks_compare(xnc);
1037 	if (!xchk_xref_process_error(sc, 0, 0, &error))
1038 		return error;
1039 
1040 	/* Check one last time for an incomplete dataset. */
1041 	if (xchk_iscan_aborted(&xnc->collect_iscan))
1042 		xchk_set_incomplete(sc);
1043 
1044 	return 0;
1045 }
1046