xref: /linux/fs/xfs/scrub/nlinks.c (revision e6c9e75fbe792e1fb3bc7e7efce5c6bb015023c5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_iwalk.h"
17 #include "xfs_ialloc.h"
18 #include "xfs_dir2.h"
19 #include "xfs_dir2_priv.h"
20 #include "xfs_ag.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/repair.h"
24 #include "scrub/xfile.h"
25 #include "scrub/xfarray.h"
26 #include "scrub/iscan.h"
27 #include "scrub/orphanage.h"
28 #include "scrub/nlinks.h"
29 #include "scrub/trace.h"
30 #include "scrub/readdir.h"
31 #include "scrub/tempfile.h"
32 
33 /*
34  * Live Inode Link Count Checking
35  * ==============================
36  *
37  * Inode link counts are "summary" metadata, in the sense that they are
38  * computed as the number of directory entries referencing each file on the
39  * filesystem.  Therefore, we compute the correct link counts by creating a
40  * shadow link count structure and walking every inode.
41  */
42 
43 /* Set us up to scrub inode link counts. */
44 int
45 xchk_setup_nlinks(
46 	struct xfs_scrub	*sc)
47 {
48 	struct xchk_nlink_ctrs	*xnc;
49 	int			error;
50 
51 	xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
52 
53 	if (xchk_could_repair(sc)) {
54 		error = xrep_setup_nlinks(sc);
55 		if (error)
56 			return error;
57 	}
58 
59 	xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
60 	if (!xnc)
61 		return -ENOMEM;
62 	xnc->xname.name = xnc->namebuf;
63 	xnc->sc = sc;
64 	sc->buf = xnc;
65 
66 	return xchk_setup_fs(sc);
67 }
68 
69 /*
70  * Part 1: Collecting file link counts.  For each file, we create a shadow link
71  * counting structure, then walk the entire directory tree, incrementing parent
72  * and child link counts for each directory entry seen.
73  *
74  * To avoid false corruption reports in part 2, any failure in this part must
75  * set the INCOMPLETE flag even when a negative errno is returned.  This care
76  * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
77  * ECANCELED) that are absorbed into a scrub state flag update by
78  * xchk_*_process_error.  Scrub and repair share the same incore data
79  * structures, so the INCOMPLETE flag is critical to prevent a repair based on
80  * insufficient information.
81  *
82  * Because we are scanning a live filesystem, it's possible that another thread
83  * will try to update the link counts for an inode that we've already scanned.
84  * This will cause our counts to be incorrect.  Therefore, we hook all
85  * directory entry updates because that is when link count updates occur.  By
86  * shadowing transaction updates in this manner, live nlink check can ensure by
87  * locking the inode and the shadow structure that its own copies are not out
88  * of date.  Because the hook code runs in a different process context from the
89  * scrub code and the scrub state flags are not accessed atomically, failures
90  * in the hook code must abort the iscan and the scrubber must notice the
91  * aborted scan and set the incomplete flag.
92  *
93  * Note that we use jump labels and srcu notifier hooks to minimize the
94  * overhead when live nlinks is /not/ running.  Locking order for nlink
95  * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
96  */
97 
98 /*
99  * Add a delta to an nlink counter, clamping the value to U32_MAX.  Because
100  * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
101  * even if we lose some precision.
102  */
103 static inline void
104 careful_add(
105 	xfs_nlink_t	*nlinkp,
106 	int		delta)
107 {
108 	uint64_t	new_value = (uint64_t)(*nlinkp) + delta;
109 
110 	BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
111 	*nlinkp = min_t(uint64_t, new_value, U32_MAX);
112 }
113 
114 /* Update incore link count information.  Caller must hold the nlinks lock. */
115 STATIC int
116 xchk_nlinks_update_incore(
117 	struct xchk_nlink_ctrs	*xnc,
118 	xfs_ino_t		ino,
119 	int			parents_delta,
120 	int			backrefs_delta,
121 	int			children_delta)
122 {
123 	struct xchk_nlink	nl;
124 	int			error;
125 
126 	if (!xnc->nlinks)
127 		return 0;
128 
129 	error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
130 	if (error)
131 		return error;
132 
133 	trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
134 			backrefs_delta, children_delta);
135 
136 	careful_add(&nl.parents, parents_delta);
137 	careful_add(&nl.backrefs, backrefs_delta);
138 	careful_add(&nl.children, children_delta);
139 
140 	nl.flags |= XCHK_NLINK_WRITTEN;
141 	error = xfarray_store(xnc->nlinks, ino, &nl);
142 	if (error == -EFBIG) {
143 		/*
144 		 * EFBIG means we tried to store data at too high a byte offset
145 		 * in the sparse array.  IOWs, we cannot complete the check and
146 		 * must notify userspace that the check was incomplete.
147 		 */
148 		error = -ECANCELED;
149 	}
150 	return error;
151 }
152 
153 /*
154  * Apply a link count change from the regular filesystem into our shadow link
155  * count structure based on a directory update in progress.
156  */
157 STATIC int
158 xchk_nlinks_live_update(
159 	struct notifier_block		*nb,
160 	unsigned long			action,
161 	void				*data)
162 {
163 	struct xfs_dir_update_params	*p = data;
164 	struct xchk_nlink_ctrs		*xnc;
165 	int				error;
166 
167 	xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
168 
169 	/*
170 	 * Ignore temporary directories being used to stage dir repairs, since
171 	 * we don't bump the link counts of the children.
172 	 */
173 	if (xrep_is_tempfile(p->dp))
174 		return NOTIFY_DONE;
175 
176 	trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
177 			p->delta, p->name->name, p->name->len);
178 
179 	/*
180 	 * If we've already scanned @dp, update the number of parents that link
181 	 * to @ip.  If @ip is a subdirectory, update the number of child links
182 	 * going out of @dp.
183 	 */
184 	if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
185 		mutex_lock(&xnc->lock);
186 		error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
187 				0, 0);
188 		if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
189 			error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
190 					0, p->delta);
191 		mutex_unlock(&xnc->lock);
192 		if (error)
193 			goto out_abort;
194 	}
195 
196 	/*
197 	 * If @ip is a subdirectory and we've already scanned it, update the
198 	 * number of backrefs pointing to @dp.
199 	 */
200 	if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
201 	    xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
202 		mutex_lock(&xnc->lock);
203 		error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
204 				p->delta, 0);
205 		mutex_unlock(&xnc->lock);
206 		if (error)
207 			goto out_abort;
208 	}
209 
210 	return NOTIFY_DONE;
211 
212 out_abort:
213 	xchk_iscan_abort(&xnc->collect_iscan);
214 	return NOTIFY_DONE;
215 }
216 
217 /* Bump the observed link count for the inode referenced by this entry. */
218 STATIC int
219 xchk_nlinks_collect_dirent(
220 	struct xfs_scrub	*sc,
221 	struct xfs_inode	*dp,
222 	xfs_dir2_dataptr_t	dapos,
223 	const struct xfs_name	*name,
224 	xfs_ino_t		ino,
225 	void			*priv)
226 {
227 	struct xchk_nlink_ctrs	*xnc = priv;
228 	bool			dot = false, dotdot = false;
229 	int			error;
230 
231 	/* Does this name make sense? */
232 	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
233 		error = -ECANCELED;
234 		goto out_abort;
235 	}
236 
237 	if (name->len == 1 && name->name[0] == '.')
238 		dot = true;
239 	else if (name->len == 2 && name->name[0] == '.' &&
240 				   name->name[1] == '.')
241 		dotdot = true;
242 
243 	/* Don't accept a '.' entry that points somewhere else. */
244 	if (dot && ino != dp->i_ino) {
245 		error = -ECANCELED;
246 		goto out_abort;
247 	}
248 
249 	/* Don't accept an invalid inode number. */
250 	if (!xfs_verify_dir_ino(sc->mp, ino)) {
251 		error = -ECANCELED;
252 		goto out_abort;
253 	}
254 
255 	/* Update the shadow link counts if we haven't already failed. */
256 
257 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
258 		error = -ECANCELED;
259 		goto out_incomplete;
260 	}
261 
262 	trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
263 
264 	mutex_lock(&xnc->lock);
265 
266 	/*
267 	 * If this is a dotdot entry, it is a back link from dp to ino.  How
268 	 * we handle this depends on whether or not dp is the root directory.
269 	 *
270 	 * The root directory is its own parent, so we pretend the dotdot entry
271 	 * establishes the "parent" of the root directory.  Increment the
272 	 * number of parents of the root directory.
273 	 *
274 	 * Otherwise, increment the number of backrefs pointing back to ino.
275 	 */
276 	if (dotdot) {
277 		if (dp == sc->mp->m_rootip)
278 			error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
279 		else
280 			error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
281 		if (error)
282 			goto out_unlock;
283 	}
284 
285 	/*
286 	 * If this dirent is a forward link from dp to ino, increment the
287 	 * number of parents linking into ino.
288 	 */
289 	if (!dot && !dotdot) {
290 		error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
291 		if (error)
292 			goto out_unlock;
293 	}
294 
295 	/*
296 	 * If this dirent is a forward link to a subdirectory, increment the
297 	 * number of child links of dp.
298 	 */
299 	if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
300 		error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
301 		if (error)
302 			goto out_unlock;
303 	}
304 
305 	mutex_unlock(&xnc->lock);
306 	return 0;
307 
308 out_unlock:
309 	mutex_unlock(&xnc->lock);
310 out_abort:
311 	xchk_iscan_abort(&xnc->collect_iscan);
312 out_incomplete:
313 	xchk_set_incomplete(sc);
314 	return error;
315 }
316 
317 /* Walk a directory to bump the observed link counts of the children. */
318 STATIC int
319 xchk_nlinks_collect_dir(
320 	struct xchk_nlink_ctrs	*xnc,
321 	struct xfs_inode	*dp)
322 {
323 	struct xfs_scrub	*sc = xnc->sc;
324 	unsigned int		lock_mode;
325 	int			error = 0;
326 
327 	/*
328 	 * Ignore temporary directories being used to stage dir repairs, since
329 	 * we don't bump the link counts of the children.
330 	 */
331 	if (xrep_is_tempfile(dp))
332 		return 0;
333 
334 	/* Prevent anyone from changing this directory while we walk it. */
335 	xfs_ilock(dp, XFS_IOLOCK_SHARED);
336 	lock_mode = xfs_ilock_data_map_shared(dp);
337 
338 	/*
339 	 * The dotdot entry of an unlinked directory still points to the last
340 	 * parent, but the parent no longer links to this directory.  Skip the
341 	 * directory to avoid overcounting.
342 	 */
343 	if (VFS_I(dp)->i_nlink == 0)
344 		goto out_unlock;
345 
346 	/*
347 	 * We cannot count file links if the directory looks as though it has
348 	 * been zapped by the inode record repair code.
349 	 */
350 	if (xchk_dir_looks_zapped(dp)) {
351 		error = -EBUSY;
352 		goto out_abort;
353 	}
354 
355 	error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
356 	if (error == -ECANCELED) {
357 		error = 0;
358 		goto out_unlock;
359 	}
360 	if (error)
361 		goto out_abort;
362 
363 	xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
364 	goto out_unlock;
365 
366 out_abort:
367 	xchk_set_incomplete(sc);
368 	xchk_iscan_abort(&xnc->collect_iscan);
369 out_unlock:
370 	xfs_iunlock(dp, lock_mode);
371 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
372 	return error;
373 }
374 
375 /* If this looks like a valid pointer, count it. */
376 static inline int
377 xchk_nlinks_collect_metafile(
378 	struct xchk_nlink_ctrs	*xnc,
379 	xfs_ino_t		ino)
380 {
381 	if (!xfs_verify_ino(xnc->sc->mp, ino))
382 		return 0;
383 
384 	trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
385 	return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
386 }
387 
388 /* Bump the link counts of metadata files rooted in the superblock. */
389 STATIC int
390 xchk_nlinks_collect_metafiles(
391 	struct xchk_nlink_ctrs	*xnc)
392 {
393 	struct xfs_mount	*mp = xnc->sc->mp;
394 	int			error = -ECANCELED;
395 
396 
397 	if (xchk_iscan_aborted(&xnc->collect_iscan))
398 		goto out_incomplete;
399 
400 	mutex_lock(&xnc->lock);
401 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
402 	if (error)
403 		goto out_abort;
404 
405 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
406 	if (error)
407 		goto out_abort;
408 
409 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
410 	if (error)
411 		goto out_abort;
412 
413 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
414 	if (error)
415 		goto out_abort;
416 
417 	error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
418 	if (error)
419 		goto out_abort;
420 	mutex_unlock(&xnc->lock);
421 
422 	return 0;
423 
424 out_abort:
425 	mutex_unlock(&xnc->lock);
426 	xchk_iscan_abort(&xnc->collect_iscan);
427 out_incomplete:
428 	xchk_set_incomplete(xnc->sc);
429 	return error;
430 }
431 
432 /* Advance the collection scan cursor for this non-directory file. */
433 static inline int
434 xchk_nlinks_collect_file(
435 	struct xchk_nlink_ctrs	*xnc,
436 	struct xfs_inode	*ip)
437 {
438 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
439 	xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
440 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
441 	return 0;
442 }
443 
444 /* Walk all directories and count inode links. */
445 STATIC int
446 xchk_nlinks_collect(
447 	struct xchk_nlink_ctrs	*xnc)
448 {
449 	struct xfs_scrub	*sc = xnc->sc;
450 	struct xfs_inode	*ip;
451 	int			error;
452 
453 	/* Count the rt and quota files that are rooted in the superblock. */
454 	error = xchk_nlinks_collect_metafiles(xnc);
455 	if (error)
456 		return error;
457 
458 	/*
459 	 * Set up for a potentially lengthy filesystem scan by reducing our
460 	 * transaction resource usage for the duration.  Specifically:
461 	 *
462 	 * Cancel the transaction to release the log grant space while we scan
463 	 * the filesystem.
464 	 *
465 	 * Create a new empty transaction to eliminate the possibility of the
466 	 * inode scan deadlocking on cyclical metadata.
467 	 *
468 	 * We pass the empty transaction to the file scanning function to avoid
469 	 * repeatedly cycling empty transactions.  This can be done even though
470 	 * we take the IOLOCK to quiesce the file because empty transactions
471 	 * do not take sb_internal.
472 	 */
473 	xchk_trans_cancel(sc);
474 	error = xchk_trans_alloc_empty(sc);
475 	if (error)
476 		return error;
477 
478 	while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
479 		if (S_ISDIR(VFS_I(ip)->i_mode))
480 			error = xchk_nlinks_collect_dir(xnc, ip);
481 		else
482 			error = xchk_nlinks_collect_file(xnc, ip);
483 		xchk_irele(sc, ip);
484 		if (error)
485 			break;
486 
487 		if (xchk_should_terminate(sc, &error))
488 			break;
489 	}
490 	xchk_iscan_iter_finish(&xnc->collect_iscan);
491 	if (error) {
492 		xchk_set_incomplete(sc);
493 		/*
494 		 * If we couldn't grab an inode that was busy with a state
495 		 * change, change the error code so that we exit to userspace
496 		 * as quickly as possible.
497 		 */
498 		if (error == -EBUSY)
499 			return -ECANCELED;
500 		return error;
501 	}
502 
503 	/*
504 	 * Switch out for a real transaction in preparation for building a new
505 	 * tree.
506 	 */
507 	xchk_trans_cancel(sc);
508 	return xchk_setup_fs(sc);
509 }
510 
511 /*
512  * Part 2: Comparing file link counters.  Walk each inode and compare the link
513  * counts against our shadow information; and then walk each shadow link count
514  * structure (that wasn't covered in the first part), comparing it against the
515  * file.
516  */
517 
518 /* Read the observed link count for comparison with the actual inode. */
519 STATIC int
520 xchk_nlinks_comparison_read(
521 	struct xchk_nlink_ctrs	*xnc,
522 	xfs_ino_t		ino,
523 	struct xchk_nlink	*obs)
524 {
525 	struct xchk_nlink	nl;
526 	int			error;
527 
528 	error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
529 	if (error)
530 		return error;
531 
532 	nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
533 
534 	error = xfarray_store(xnc->nlinks, ino, &nl);
535 	if (error == -EFBIG) {
536 		/*
537 		 * EFBIG means we tried to store data at too high a byte offset
538 		 * in the sparse array.  IOWs, we cannot complete the check and
539 		 * must notify userspace that the check was incomplete.  This
540 		 * shouldn't really happen outside of the collection phase.
541 		 */
542 		xchk_set_incomplete(xnc->sc);
543 		return -ECANCELED;
544 	}
545 	if (error)
546 		return error;
547 
548 	/* Copy the counters, but do not expose the internal state. */
549 	obs->parents = nl.parents;
550 	obs->backrefs = nl.backrefs;
551 	obs->children = nl.children;
552 	obs->flags = 0;
553 	return 0;
554 }
555 
556 /* Check our link count against an inode. */
557 STATIC int
558 xchk_nlinks_compare_inode(
559 	struct xchk_nlink_ctrs	*xnc,
560 	struct xfs_inode	*ip)
561 {
562 	struct xchk_nlink	obs;
563 	struct xfs_scrub	*sc = xnc->sc;
564 	uint64_t		total_links;
565 	unsigned int		actual_nlink;
566 	int			error;
567 
568 	/*
569 	 * Ignore temporary files being used to stage repairs, since we assume
570 	 * they're correct for non-directories, and the directory repair code
571 	 * doesn't bump the link counts for the children.
572 	 */
573 	if (xrep_is_tempfile(ip))
574 		return 0;
575 
576 	xfs_ilock(ip, XFS_ILOCK_SHARED);
577 	mutex_lock(&xnc->lock);
578 
579 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
580 		xchk_set_incomplete(xnc->sc);
581 		error = -ECANCELED;
582 		goto out_scanlock;
583 	}
584 
585 	error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
586 	if (error)
587 		goto out_scanlock;
588 
589 	/*
590 	 * If we don't have ftype to get an accurate count of the subdirectory
591 	 * entries in this directory, take advantage of the fact that on a
592 	 * consistent ftype=0 filesystem, the number of subdirectory
593 	 * backreferences (dotdot entries) pointing towards this directory
594 	 * should be equal to the number of subdirectory entries in the
595 	 * directory.
596 	 */
597 	if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
598 		obs.children = obs.backrefs;
599 
600 	total_links = xchk_nlink_total(ip, &obs);
601 	actual_nlink = VFS_I(ip)->i_nlink;
602 
603 	trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
604 
605 	/*
606 	 * If we found so many parents that we'd overflow i_nlink, we must flag
607 	 * this as a corruption.  The VFS won't let users increase the link
608 	 * count, but it will let them decrease it.
609 	 */
610 	if (total_links > XFS_MAXLINK) {
611 		xchk_ino_set_corrupt(sc, ip->i_ino);
612 		goto out_corrupt;
613 	}
614 
615 	/* Link counts should match. */
616 	if (total_links != actual_nlink) {
617 		xchk_ino_set_corrupt(sc, ip->i_ino);
618 		goto out_corrupt;
619 	}
620 
621 	if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
622 		/*
623 		 * The collection phase ignores directories with zero link
624 		 * count, so we ignore them here too.
625 		 *
626 		 * The number of subdirectory backreferences (dotdot entries)
627 		 * pointing towards this directory should be equal to the
628 		 * number of subdirectory entries in the directory.
629 		 */
630 		if (obs.children != obs.backrefs)
631 			xchk_ino_xref_set_corrupt(sc, ip->i_ino);
632 	} else {
633 		/*
634 		 * Non-directories and unlinked directories should not have
635 		 * back references.
636 		 */
637 		if (obs.backrefs != 0) {
638 			xchk_ino_set_corrupt(sc, ip->i_ino);
639 			goto out_corrupt;
640 		}
641 
642 		/*
643 		 * Non-directories and unlinked directories should not have
644 		 * children.
645 		 */
646 		if (obs.children != 0) {
647 			xchk_ino_set_corrupt(sc, ip->i_ino);
648 			goto out_corrupt;
649 		}
650 	}
651 
652 	if (ip == sc->mp->m_rootip) {
653 		/*
654 		 * For the root of a directory tree, both the '.' and '..'
655 		 * entries should point to the root directory.  The dotdot
656 		 * entry is counted as a parent of the root /and/ a backref of
657 		 * the root directory.
658 		 */
659 		if (obs.parents != 1) {
660 			xchk_ino_set_corrupt(sc, ip->i_ino);
661 			goto out_corrupt;
662 		}
663 	} else if (actual_nlink > 0) {
664 		/*
665 		 * Linked files that are not the root directory should have at
666 		 * least one parent.
667 		 */
668 		if (obs.parents == 0) {
669 			xchk_ino_set_corrupt(sc, ip->i_ino);
670 			goto out_corrupt;
671 		}
672 	}
673 
674 out_corrupt:
675 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
676 		error = -ECANCELED;
677 out_scanlock:
678 	mutex_unlock(&xnc->lock);
679 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
680 	return error;
681 }
682 
683 /*
684  * Check our link count against an inode that wasn't checked previously.  This
685  * is intended to catch directories with dangling links, though we could be
686  * racing with inode allocation in other threads.
687  */
688 STATIC int
689 xchk_nlinks_compare_inum(
690 	struct xchk_nlink_ctrs	*xnc,
691 	xfs_ino_t		ino)
692 {
693 	struct xchk_nlink	obs;
694 	struct xfs_mount	*mp = xnc->sc->mp;
695 	struct xfs_trans	*tp = xnc->sc->tp;
696 	struct xfs_buf		*agi_bp;
697 	struct xfs_inode	*ip;
698 	int			error;
699 
700 	/*
701 	 * The first iget failed, so try again with the variant that returns
702 	 * either an incore inode or the AGI buffer.  If the function returns
703 	 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
704 	 * can guarantee that the inode won't be allocated while we check for
705 	 * a zero link count in the observed link count data.
706 	 */
707 	error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
708 	if (!error) {
709 		/* Actually got an inode, so use the inode compare. */
710 		error = xchk_nlinks_compare_inode(xnc, ip);
711 		xchk_irele(xnc->sc, ip);
712 		return error;
713 	}
714 	if (error == -ENOENT || error == -EINVAL) {
715 		/* No inode was found.  Check for zero link count below. */
716 		error = 0;
717 	}
718 	if (error)
719 		goto out_agi;
720 
721 	/* Ensure that we have protected against inode allocation/freeing. */
722 	if (agi_bp == NULL) {
723 		ASSERT(agi_bp != NULL);
724 		xchk_set_incomplete(xnc->sc);
725 		return -ECANCELED;
726 	}
727 
728 	if (xchk_iscan_aborted(&xnc->collect_iscan)) {
729 		xchk_set_incomplete(xnc->sc);
730 		error = -ECANCELED;
731 		goto out_agi;
732 	}
733 
734 	mutex_lock(&xnc->lock);
735 	error = xchk_nlinks_comparison_read(xnc, ino, &obs);
736 	if (error)
737 		goto out_scanlock;
738 
739 	trace_xchk_nlinks_check_zero(mp, ino, &obs);
740 
741 	/*
742 	 * If we can't grab the inode, the link count had better be zero.  We
743 	 * still hold the AGI to prevent inode allocation/freeing.
744 	 */
745 	if (xchk_nlink_total(NULL, &obs) != 0) {
746 		xchk_ino_set_corrupt(xnc->sc, ino);
747 		error = -ECANCELED;
748 	}
749 
750 out_scanlock:
751 	mutex_unlock(&xnc->lock);
752 out_agi:
753 	if (agi_bp)
754 		xfs_trans_brelse(tp, agi_bp);
755 	return error;
756 }
757 
758 /*
759  * Try to visit every inode in the filesystem to compare the link count.  Move
760  * on if we can't grab an inode, since we'll revisit unchecked nlink records in
761  * the second part.
762  */
763 static int
764 xchk_nlinks_compare_iter(
765 	struct xchk_nlink_ctrs	*xnc,
766 	struct xfs_inode	**ipp)
767 {
768 	int			error;
769 
770 	do {
771 		error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
772 	} while (error == -EBUSY);
773 
774 	return error;
775 }
776 
777 /* Compare the link counts we observed against the live information. */
778 STATIC int
779 xchk_nlinks_compare(
780 	struct xchk_nlink_ctrs	*xnc)
781 {
782 	struct xchk_nlink	nl;
783 	struct xfs_scrub	*sc = xnc->sc;
784 	struct xfs_inode	*ip;
785 	xfarray_idx_t		cur = XFARRAY_CURSOR_INIT;
786 	int			error;
787 
788 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
789 		return 0;
790 
791 	/*
792 	 * Create a new empty transaction so that we can advance the iscan
793 	 * cursor without deadlocking if the inobt has a cycle and push on the
794 	 * inactivation workqueue.
795 	 */
796 	xchk_trans_cancel(sc);
797 	error = xchk_trans_alloc_empty(sc);
798 	if (error)
799 		return error;
800 
801 	/*
802 	 * Use the inobt to walk all allocated inodes to compare the link
803 	 * counts.  Inodes skipped by _compare_iter will be tried again in the
804 	 * next phase of the scan.
805 	 */
806 	xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
807 	while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
808 		error = xchk_nlinks_compare_inode(xnc, ip);
809 		xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
810 		xchk_irele(sc, ip);
811 		if (error)
812 			break;
813 
814 		if (xchk_should_terminate(sc, &error))
815 			break;
816 	}
817 	xchk_iscan_iter_finish(&xnc->compare_iscan);
818 	xchk_iscan_teardown(&xnc->compare_iscan);
819 	if (error)
820 		return error;
821 
822 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
823 		return 0;
824 
825 	/*
826 	 * Walk all the non-null nlink observations that weren't checked in the
827 	 * previous step.
828 	 */
829 	mutex_lock(&xnc->lock);
830 	while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
831 		xfs_ino_t	ino = cur - 1;
832 
833 		if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
834 			continue;
835 
836 		mutex_unlock(&xnc->lock);
837 
838 		error = xchk_nlinks_compare_inum(xnc, ino);
839 		if (error)
840 			return error;
841 
842 		if (xchk_should_terminate(xnc->sc, &error))
843 			return error;
844 
845 		mutex_lock(&xnc->lock);
846 	}
847 	mutex_unlock(&xnc->lock);
848 
849 	return error;
850 }
851 
852 /* Tear down everything associated with a nlinks check. */
853 static void
854 xchk_nlinks_teardown_scan(
855 	void			*priv)
856 {
857 	struct xchk_nlink_ctrs	*xnc = priv;
858 
859 	/* Discourage any hook functions that might be running. */
860 	xchk_iscan_abort(&xnc->collect_iscan);
861 
862 	xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
863 
864 	xfarray_destroy(xnc->nlinks);
865 	xnc->nlinks = NULL;
866 
867 	xchk_iscan_teardown(&xnc->collect_iscan);
868 	mutex_destroy(&xnc->lock);
869 	xnc->sc = NULL;
870 }
871 
872 /*
873  * Scan all inodes in the entire filesystem to generate link count data.  If
874  * the scan is successful, the counts will be left alive for a repair.  If any
875  * error occurs, we'll tear everything down.
876  */
877 STATIC int
878 xchk_nlinks_setup_scan(
879 	struct xfs_scrub	*sc,
880 	struct xchk_nlink_ctrs	*xnc)
881 {
882 	struct xfs_mount	*mp = sc->mp;
883 	char			*descr;
884 	unsigned long long	max_inos;
885 	xfs_agnumber_t		last_agno = mp->m_sb.sb_agcount - 1;
886 	xfs_agino_t		first_agino, last_agino;
887 	int			error;
888 
889 	mutex_init(&xnc->lock);
890 
891 	/* Retry iget every tenth of a second for up to 30 seconds. */
892 	xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
893 
894 	/*
895 	 * Set up enough space to store an nlink record for the highest
896 	 * possible inode number in this system.
897 	 */
898 	xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
899 	max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
900 	descr = xchk_xfile_descr(sc, "file link counts");
901 	error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
902 			sizeof(struct xchk_nlink), &xnc->nlinks);
903 	kfree(descr);
904 	if (error)
905 		goto out_teardown;
906 
907 	/*
908 	 * Hook into the directory entry code so that we can capture updates to
909 	 * file link counts.  The hook only triggers for inodes that were
910 	 * already scanned, and the scanner thread takes each inode's ILOCK,
911 	 * which means that any in-progress inode updates will finish before we
912 	 * can scan the inode.
913 	 */
914 	ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
915 	xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
916 	error = xfs_dir_hook_add(mp, &xnc->dhook);
917 	if (error)
918 		goto out_teardown;
919 
920 	/* Use deferred cleanup to pass the inode link count data to repair. */
921 	sc->buf_cleanup = xchk_nlinks_teardown_scan;
922 	return 0;
923 
924 out_teardown:
925 	xchk_nlinks_teardown_scan(xnc);
926 	return error;
927 }
928 
929 /* Scrub the link count of all inodes on the filesystem. */
930 int
931 xchk_nlinks(
932 	struct xfs_scrub	*sc)
933 {
934 	struct xchk_nlink_ctrs	*xnc = sc->buf;
935 	int			error = 0;
936 
937 	/* Set ourselves up to check link counts on the live filesystem. */
938 	error = xchk_nlinks_setup_scan(sc, xnc);
939 	if (error)
940 		return error;
941 
942 	/* Walk all inodes, picking up link count information. */
943 	error = xchk_nlinks_collect(xnc);
944 	if (!xchk_xref_process_error(sc, 0, 0, &error))
945 		return error;
946 
947 	/* Fail fast if we're not playing with a full dataset. */
948 	if (xchk_iscan_aborted(&xnc->collect_iscan))
949 		xchk_set_incomplete(sc);
950 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
951 		return 0;
952 
953 	/* Compare link counts. */
954 	error = xchk_nlinks_compare(xnc);
955 	if (!xchk_xref_process_error(sc, 0, 0, &error))
956 		return error;
957 
958 	/* Check one last time for an incomplete dataset. */
959 	if (xchk_iscan_aborted(&xnc->collect_iscan))
960 		xchk_set_incomplete(sc);
961 
962 	return 0;
963 }
964