xref: /linux/fs/xfs/scrub/iscan.c (revision e6a901a00822659181c93c86d8bbc2a17779fddc)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_ialloc.h"
17 #include "xfs_ialloc_btree.h"
18 #include "xfs_ag.h"
19 #include "xfs_error.h"
20 #include "xfs_bit.h"
21 #include "xfs_icache.h"
22 #include "scrub/scrub.h"
23 #include "scrub/iscan.h"
24 #include "scrub/common.h"
25 #include "scrub/trace.h"
26 
27 /*
28  * Live File Scan
29  * ==============
30  *
31  * Live file scans walk every inode in a live filesystem.  This is more or
32  * less like a regular iwalk, except that when we're advancing the scan cursor,
33  * we must ensure that inodes cannot be added or deleted anywhere between the
34  * old cursor value and the new cursor value.  If we're advancing the cursor
35  * by one inode, the caller must hold that inode; if we're finding the next
36  * inode to scan, we must grab the AGI and hold it until we've updated the
37  * scan cursor.
38  *
39  * Callers are expected to use this code to scan all files in the filesystem to
40  * construct a new metadata index of some kind.  The scan races against other
41  * live updates, which means there must be a provision to update the new index
42  * when updates are made to inodes that already been scanned.  The iscan lock
43  * can be used in live update hook code to stop the scan and protect this data
44  * structure.
45  *
46  * To keep the new index up to date with other metadata updates being made to
47  * the live filesystem, it is assumed that the caller will add hooks as needed
48  * to be notified when a metadata update occurs.  The inode scanner must tell
49  * the hook code when an inode has been visited with xchk_iscan_mark_visit.
50  * Hook functions can use xchk_iscan_want_live_update to decide if the
51  * scanner's observations must be updated.
52  */
53 
54 /*
55  * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
56  * that the scan ignores that inode.
57  */
58 STATIC void
59 xchk_iscan_mask_skipino(
60 	struct xchk_iscan	*iscan,
61 	struct xfs_perag	*pag,
62 	struct xfs_inobt_rec_incore	*rec,
63 	xfs_agino_t		lastrecino)
64 {
65 	struct xfs_scrub	*sc = iscan->sc;
66 	struct xfs_mount	*mp = sc->mp;
67 	xfs_agnumber_t		skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
68 	xfs_agnumber_t		skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
69 
70 	if (pag->pag_agno != skip_agno)
71 		return;
72 	if (skip_agino < rec->ir_startino)
73 		return;
74 	if (skip_agino > lastrecino)
75 		return;
76 
77 	rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
78 }
79 
80 /*
81  * Set *cursor to the next allocated inode after whatever it's set to now.
82  * If there are no more inodes in this AG, cursor is set to NULLAGINO.
83  */
84 STATIC int
85 xchk_iscan_find_next(
86 	struct xchk_iscan	*iscan,
87 	struct xfs_buf		*agi_bp,
88 	struct xfs_perag	*pag,
89 	xfs_inofree_t		*allocmaskp,
90 	xfs_agino_t		*cursor,
91 	uint8_t			*nr_inodesp)
92 {
93 	struct xfs_scrub	*sc = iscan->sc;
94 	struct xfs_inobt_rec_incore	rec;
95 	struct xfs_btree_cur	*cur;
96 	struct xfs_mount	*mp = sc->mp;
97 	struct xfs_trans	*tp = sc->tp;
98 	xfs_agnumber_t		agno = pag->pag_agno;
99 	xfs_agino_t		lastino = NULLAGINO;
100 	xfs_agino_t		first, last;
101 	xfs_agino_t		agino = *cursor;
102 	int			has_rec;
103 	int			error;
104 
105 	/* If the cursor is beyond the end of this AG, move to the next one. */
106 	xfs_agino_range(mp, agno, &first, &last);
107 	if (agino > last) {
108 		*cursor = NULLAGINO;
109 		return 0;
110 	}
111 
112 	/*
113 	 * Look up the inode chunk for the current cursor position.  If there
114 	 * is no chunk here, we want the next one.
115 	 */
116 	cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
117 	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
118 	if (!error && !has_rec)
119 		error = xfs_btree_increment(cur, 0, &has_rec);
120 	for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
121 		xfs_inofree_t	allocmask;
122 
123 		/*
124 		 * If we've run out of inobt records in this AG, move the
125 		 * cursor on to the next AG and exit.  The caller can try
126 		 * again with the next AG.
127 		 */
128 		if (!has_rec) {
129 			*cursor = NULLAGINO;
130 			break;
131 		}
132 
133 		error = xfs_inobt_get_rec(cur, &rec, &has_rec);
134 		if (error)
135 			break;
136 		if (!has_rec) {
137 			error = -EFSCORRUPTED;
138 			break;
139 		}
140 
141 		/* Make sure that we always move forward. */
142 		if (lastino != NULLAGINO &&
143 		    XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
144 			error = -EFSCORRUPTED;
145 			break;
146 		}
147 		lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
148 
149 		/*
150 		 * If this record only covers inodes that come before the
151 		 * cursor, advance to the next record.
152 		 */
153 		if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
154 			continue;
155 
156 		if (iscan->skip_ino)
157 			xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
158 
159 		/*
160 		 * If the incoming lookup put us in the middle of an inobt
161 		 * record, mark it and the previous inodes "free" so that the
162 		 * search for allocated inodes will start at the cursor.
163 		 * We don't care about ir_freecount here.
164 		 */
165 		if (agino >= rec.ir_startino)
166 			rec.ir_free |= xfs_inobt_maskn(0,
167 						agino + 1 - rec.ir_startino);
168 
169 		/*
170 		 * If there are allocated inodes in this chunk, find them
171 		 * and update the scan cursor.
172 		 */
173 		allocmask = ~rec.ir_free;
174 		if (hweight64(allocmask) > 0) {
175 			int	next = xfs_lowbit64(allocmask);
176 
177 			ASSERT(next >= 0);
178 			*cursor = rec.ir_startino + next;
179 			*allocmaskp = allocmask >> next;
180 			*nr_inodesp = XFS_INODES_PER_CHUNK - next;
181 			break;
182 		}
183 	}
184 
185 	xfs_btree_del_cursor(cur, error);
186 	return error;
187 }
188 
189 /*
190  * Advance both the scan and the visited cursors.
191  *
192  * The inumber address space for a given filesystem is sparse, which means that
193  * the scan cursor can jump a long ways in a single iter() call.  There are no
194  * inodes in these sparse areas, so we must move the visited cursor forward at
195  * the same time so that the scan user can receive live updates for inodes that
196  * may get created once we release the AGI buffer.
197  */
198 static inline void
199 xchk_iscan_move_cursor(
200 	struct xchk_iscan	*iscan,
201 	xfs_agnumber_t		agno,
202 	xfs_agino_t		agino)
203 {
204 	struct xfs_scrub	*sc = iscan->sc;
205 	struct xfs_mount	*mp = sc->mp;
206 	xfs_ino_t		cursor, visited;
207 
208 	BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
209 
210 	/*
211 	 * Special-case ino == 0 here so that we never set visited_ino to
212 	 * NULLFSINO when wrapping around EOFS, for that will let through all
213 	 * live updates.
214 	 */
215 	cursor = XFS_AGINO_TO_INO(mp, agno, agino);
216 	if (cursor == 0)
217 		visited = XFS_MAXINUMBER;
218 	else
219 		visited = cursor - 1;
220 
221 	mutex_lock(&iscan->lock);
222 	iscan->cursor_ino = cursor;
223 	iscan->__visited_ino = visited;
224 	trace_xchk_iscan_move_cursor(iscan);
225 	mutex_unlock(&iscan->lock);
226 }
227 
228 /*
229  * Prepare to return agno/agino to the iscan caller by moving the lastino
230  * cursor to the previous inode.  Do this while we still hold the AGI so that
231  * no other threads can create or delete inodes in this AG.
232  */
233 static inline void
234 xchk_iscan_finish(
235 	struct xchk_iscan	*iscan)
236 {
237 	mutex_lock(&iscan->lock);
238 	iscan->cursor_ino = NULLFSINO;
239 
240 	/* All live updates will be applied from now on */
241 	iscan->__visited_ino = NULLFSINO;
242 
243 	mutex_unlock(&iscan->lock);
244 }
245 
246 /*
247  * Advance ino to the next inode that the inobt thinks is allocated, being
248  * careful to jump to the next AG if we've reached the right end of this AG's
249  * inode btree.  Advancing ino effectively means that we've pushed the inode
250  * scan forward, so set the iscan cursor to (ino - 1) so that our live update
251  * predicates will track inode allocations in that part of the inode number
252  * key space once we release the AGI buffer.
253  *
254  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
255  * -ECANCELED if the live scan aborted, or the usual negative errno.
256  */
257 STATIC int
258 xchk_iscan_advance(
259 	struct xchk_iscan	*iscan,
260 	struct xfs_perag	**pagp,
261 	struct xfs_buf		**agi_bpp,
262 	xfs_inofree_t		*allocmaskp,
263 	uint8_t			*nr_inodesp)
264 {
265 	struct xfs_scrub	*sc = iscan->sc;
266 	struct xfs_mount	*mp = sc->mp;
267 	struct xfs_buf		*agi_bp;
268 	struct xfs_perag	*pag;
269 	xfs_agnumber_t		agno;
270 	xfs_agino_t		agino;
271 	int			ret;
272 
273 	ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
274 
275 	do {
276 		if (xchk_iscan_aborted(iscan))
277 			return -ECANCELED;
278 
279 		agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
280 		pag = xfs_perag_get(mp, agno);
281 		if (!pag)
282 			return -ECANCELED;
283 
284 		ret = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp);
285 		if (ret)
286 			goto out_pag;
287 
288 		agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
289 		ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
290 				&agino, nr_inodesp);
291 		if (ret)
292 			goto out_buf;
293 
294 		if (agino != NULLAGINO) {
295 			/*
296 			 * Found the next inode in this AG, so return it along
297 			 * with the AGI buffer and the perag structure to
298 			 * ensure it cannot go away.
299 			 */
300 			xchk_iscan_move_cursor(iscan, agno, agino);
301 			*agi_bpp = agi_bp;
302 			*pagp = pag;
303 			return 1;
304 		}
305 
306 		/*
307 		 * Did not find any more inodes in this AG, move on to the next
308 		 * AG.
309 		 */
310 		agno = (agno + 1) % mp->m_sb.sb_agcount;
311 		xchk_iscan_move_cursor(iscan, agno, 0);
312 		xfs_trans_brelse(sc->tp, agi_bp);
313 		xfs_perag_put(pag);
314 
315 		trace_xchk_iscan_advance_ag(iscan);
316 	} while (iscan->cursor_ino != iscan->scan_start_ino);
317 
318 	xchk_iscan_finish(iscan);
319 	return 0;
320 
321 out_buf:
322 	xfs_trans_brelse(sc->tp, agi_bp);
323 out_pag:
324 	xfs_perag_put(pag);
325 	return ret;
326 }
327 
328 /*
329  * Grabbing the inode failed, so we need to back up the scan and ask the caller
330  * to try to _advance the scan again.  Returns -EBUSY if we've run out of retry
331  * opportunities, -ECANCELED if the process has a fatal signal pending, or
332  * -EAGAIN if we should try again.
333  */
334 STATIC int
335 xchk_iscan_iget_retry(
336 	struct xchk_iscan	*iscan,
337 	bool			wait)
338 {
339 	ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
340 
341 	if (!iscan->iget_timeout ||
342 	    time_is_before_jiffies(iscan->__iget_deadline))
343 		return -EBUSY;
344 
345 	if (wait) {
346 		unsigned long	relax;
347 
348 		/*
349 		 * Sleep for a period of time to let the rest of the system
350 		 * catch up.  If we return early, someone sent a kill signal to
351 		 * the calling process.
352 		 */
353 		relax = msecs_to_jiffies(iscan->iget_retry_delay);
354 		trace_xchk_iscan_iget_retry_wait(iscan);
355 
356 		if (schedule_timeout_killable(relax) ||
357 		    xchk_iscan_aborted(iscan))
358 			return -ECANCELED;
359 	}
360 
361 	iscan->cursor_ino--;
362 	return -EAGAIN;
363 }
364 
365 /*
366  * Grab an inode as part of an inode scan.  While scanning this inode, the
367  * caller must ensure that no other threads can modify the inode until a call
368  * to xchk_iscan_visit succeeds.
369  *
370  * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
371  * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
372  * -ECANCELED if there's a fatal signal pending; or some other negative errno.
373  */
374 STATIC int
375 xchk_iscan_iget(
376 	struct xchk_iscan	*iscan,
377 	struct xfs_perag	*pag,
378 	struct xfs_buf		*agi_bp,
379 	xfs_inofree_t		allocmask,
380 	uint8_t			nr_inodes)
381 {
382 	struct xfs_scrub	*sc = iscan->sc;
383 	struct xfs_mount	*mp = sc->mp;
384 	xfs_ino_t		ino = iscan->cursor_ino;
385 	unsigned int		idx = 0;
386 	unsigned int		i;
387 	int			error;
388 
389 	ASSERT(iscan->__inodes[0] == NULL);
390 
391 	/* Fill the first slot in the inode array. */
392 	error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0,
393 			&iscan->__inodes[idx]);
394 
395 	trace_xchk_iscan_iget(iscan, error);
396 
397 	if (error == -ENOENT || error == -EAGAIN) {
398 		xfs_trans_brelse(sc->tp, agi_bp);
399 		xfs_perag_put(pag);
400 
401 		/*
402 		 * It's possible that this inode has lost all of its links but
403 		 * hasn't yet been inactivated.  If we don't have a transaction
404 		 * or it's not writable, flush the inodegc workers and wait.
405 		 */
406 		xfs_inodegc_flush(mp);
407 		return xchk_iscan_iget_retry(iscan, true);
408 	}
409 
410 	if (error == -EINVAL) {
411 		xfs_trans_brelse(sc->tp, agi_bp);
412 		xfs_perag_put(pag);
413 
414 		/*
415 		 * We thought the inode was allocated, but the inode btree
416 		 * lookup failed, which means that it was freed since the last
417 		 * time we advanced the cursor.  Back up and try again.  This
418 		 * should never happen since still hold the AGI buffer from the
419 		 * inobt check, but we need to be careful about infinite loops.
420 		 */
421 		return xchk_iscan_iget_retry(iscan, false);
422 	}
423 
424 	if (error) {
425 		xfs_trans_brelse(sc->tp, agi_bp);
426 		xfs_perag_put(pag);
427 		return error;
428 	}
429 	idx++;
430 	ino++;
431 	allocmask >>= 1;
432 
433 	/*
434 	 * Now that we've filled the first slot in __inodes, try to fill the
435 	 * rest of the batch with consecutively ordered inodes.  to reduce the
436 	 * number of _iter calls.  Make a bitmap of unallocated inodes from the
437 	 * zeroes in the inuse bitmap; these inodes will not be scanned, but
438 	 * the _want_live_update predicate will pass through all live updates.
439 	 *
440 	 * If we can't iget an allocated inode, stop and return what we have.
441 	 */
442 	mutex_lock(&iscan->lock);
443 	iscan->__batch_ino = ino - 1;
444 	iscan->__skipped_inomask = 0;
445 	mutex_unlock(&iscan->lock);
446 
447 	for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
448 		if (!(allocmask & 1)) {
449 			ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
450 
451 			mutex_lock(&iscan->lock);
452 			iscan->cursor_ino = ino;
453 			iscan->__skipped_inomask |= (1ULL << i);
454 			mutex_unlock(&iscan->lock);
455 			continue;
456 		}
457 
458 		ASSERT(iscan->__inodes[idx] == NULL);
459 
460 		error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0,
461 				&iscan->__inodes[idx]);
462 		if (error)
463 			break;
464 
465 		mutex_lock(&iscan->lock);
466 		iscan->cursor_ino = ino;
467 		mutex_unlock(&iscan->lock);
468 		idx++;
469 	}
470 
471 	trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
472 	xfs_trans_brelse(sc->tp, agi_bp);
473 	xfs_perag_put(pag);
474 	return idx;
475 }
476 
477 /*
478  * Advance the visit cursor to reflect skipped inodes beyond whatever we
479  * scanned.
480  */
481 STATIC void
482 xchk_iscan_finish_batch(
483 	struct xchk_iscan	*iscan)
484 {
485 	xfs_ino_t		highest_skipped;
486 
487 	mutex_lock(&iscan->lock);
488 
489 	if (iscan->__batch_ino != NULLFSINO) {
490 		highest_skipped = iscan->__batch_ino +
491 					xfs_highbit64(iscan->__skipped_inomask);
492 		iscan->__visited_ino = max(iscan->__visited_ino,
493 					   highest_skipped);
494 
495 		trace_xchk_iscan_skip(iscan);
496 	}
497 
498 	iscan->__batch_ino = NULLFSINO;
499 	iscan->__skipped_inomask = 0;
500 
501 	mutex_unlock(&iscan->lock);
502 }
503 
504 /*
505  * Advance the inode scan cursor to the next allocated inode and return up to
506  * 64 consecutive allocated inodes starting with the cursor position.
507  */
508 STATIC int
509 xchk_iscan_iter_batch(
510 	struct xchk_iscan	*iscan)
511 {
512 	struct xfs_scrub	*sc = iscan->sc;
513 	int			ret;
514 
515 	xchk_iscan_finish_batch(iscan);
516 
517 	if (iscan->iget_timeout)
518 		iscan->__iget_deadline = jiffies +
519 					 msecs_to_jiffies(iscan->iget_timeout);
520 
521 	do {
522 		struct xfs_buf	*agi_bp = NULL;
523 		struct xfs_perag *pag = NULL;
524 		xfs_inofree_t	allocmask = 0;
525 		uint8_t		nr_inodes = 0;
526 
527 		ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
528 				&nr_inodes);
529 		if (ret != 1)
530 			return ret;
531 
532 		if (xchk_iscan_aborted(iscan)) {
533 			xfs_trans_brelse(sc->tp, agi_bp);
534 			xfs_perag_put(pag);
535 			ret = -ECANCELED;
536 			break;
537 		}
538 
539 		ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
540 	} while (ret == -EAGAIN);
541 
542 	return ret;
543 }
544 
545 /*
546  * Advance the inode scan cursor to the next allocated inode and return the
547  * incore inode structure associated with it.
548  *
549  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
550  * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
551  * grabbed, or the usual negative errno.
552  *
553  * If the function returns -EBUSY and the caller can handle skipping an inode,
554  * it may call this function again to continue the scan with the next allocated
555  * inode.
556  */
557 int
558 xchk_iscan_iter(
559 	struct xchk_iscan	*iscan,
560 	struct xfs_inode	**ipp)
561 {
562 	unsigned int		i;
563 	int			error;
564 
565 	/* Find a cached inode, or go get another batch. */
566 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
567 		if (iscan->__inodes[i])
568 			goto foundit;
569 	}
570 
571 	error = xchk_iscan_iter_batch(iscan);
572 	if (error <= 0)
573 		return error;
574 
575 	ASSERT(iscan->__inodes[0] != NULL);
576 	i = 0;
577 
578 foundit:
579 	/* Give the caller our reference. */
580 	*ipp = iscan->__inodes[i];
581 	iscan->__inodes[i] = NULL;
582 	return 1;
583 }
584 
585 /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
586 void
587 xchk_iscan_iter_finish(
588 	struct xchk_iscan	*iscan)
589 {
590 	struct xfs_scrub	*sc = iscan->sc;
591 	unsigned int		i;
592 
593 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
594 		if (iscan->__inodes[i]) {
595 			xchk_irele(sc, iscan->__inodes[i]);
596 			iscan->__inodes[i] = NULL;
597 		}
598 	}
599 }
600 
601 /* Mark this inode scan finished and release resources. */
602 void
603 xchk_iscan_teardown(
604 	struct xchk_iscan	*iscan)
605 {
606 	xchk_iscan_iter_finish(iscan);
607 	xchk_iscan_finish(iscan);
608 	mutex_destroy(&iscan->lock);
609 }
610 
611 /* Pick an AG from which to start a scan. */
612 static inline xfs_ino_t
613 xchk_iscan_rotor(
614 	struct xfs_mount	*mp)
615 {
616 	static atomic_t		agi_rotor;
617 	unsigned int		r = atomic_inc_return(&agi_rotor) - 1;
618 
619 	/*
620 	 * Rotoring *backwards* through the AGs, so we add one here before
621 	 * subtracting from the agcount to arrive at an AG number.
622 	 */
623 	r = (r % mp->m_sb.sb_agcount) + 1;
624 
625 	return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
626 }
627 
628 /*
629  * Set ourselves up to start an inode scan.  If the @iget_timeout and
630  * @iget_retry_delay parameters are set, the scan will try to iget each inode
631  * for @iget_timeout milliseconds.  If an iget call indicates that the inode is
632  * waiting to be inactivated, the CPU will relax for @iget_retry_delay
633  * milliseconds after pushing the inactivation workers.
634  */
635 void
636 xchk_iscan_start(
637 	struct xfs_scrub	*sc,
638 	unsigned int		iget_timeout,
639 	unsigned int		iget_retry_delay,
640 	struct xchk_iscan	*iscan)
641 {
642 	xfs_ino_t		start_ino;
643 
644 	start_ino = xchk_iscan_rotor(sc->mp);
645 
646 	iscan->__batch_ino = NULLFSINO;
647 	iscan->__skipped_inomask = 0;
648 
649 	iscan->sc = sc;
650 	clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
651 	iscan->iget_timeout = iget_timeout;
652 	iscan->iget_retry_delay = iget_retry_delay;
653 	iscan->__visited_ino = start_ino;
654 	iscan->cursor_ino = start_ino;
655 	iscan->scan_start_ino = start_ino;
656 	mutex_init(&iscan->lock);
657 	memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
658 
659 	trace_xchk_iscan_start(iscan, start_ino);
660 }
661 
662 /*
663  * Mark this inode as having been visited.  Callers must hold a sufficiently
664  * exclusive lock on the inode to prevent concurrent modifications.
665  */
666 void
667 xchk_iscan_mark_visited(
668 	struct xchk_iscan	*iscan,
669 	struct xfs_inode	*ip)
670 {
671 	mutex_lock(&iscan->lock);
672 	iscan->__visited_ino = ip->i_ino;
673 	trace_xchk_iscan_visit(iscan);
674 	mutex_unlock(&iscan->lock);
675 }
676 
677 /*
678  * Did we skip this inode because it wasn't allocated when we loaded the batch?
679  * If so, it is newly allocated and will not be scanned.  All live updates to
680  * this inode must be passed to the caller to maintain scan correctness.
681  */
682 static inline bool
683 xchk_iscan_skipped(
684 	const struct xchk_iscan	*iscan,
685 	xfs_ino_t		ino)
686 {
687 	if (iscan->__batch_ino == NULLFSINO)
688 		return false;
689 	if (ino < iscan->__batch_ino)
690 		return false;
691 	if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
692 		return false;
693 
694 	return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
695 }
696 
697 /*
698  * Do we need a live update for this inode?  This is true if the scanner thread
699  * has visited this inode and the scan hasn't been aborted due to errors.
700  * Callers must hold a sufficiently exclusive lock on the inode to prevent
701  * scanners from reading any inode metadata.
702  */
703 bool
704 xchk_iscan_want_live_update(
705 	struct xchk_iscan	*iscan,
706 	xfs_ino_t		ino)
707 {
708 	bool			ret = false;
709 
710 	if (xchk_iscan_aborted(iscan))
711 		return false;
712 
713 	mutex_lock(&iscan->lock);
714 
715 	trace_xchk_iscan_want_live_update(iscan, ino);
716 
717 	/* Scan is finished, caller should receive all updates. */
718 	if (iscan->__visited_ino == NULLFSINO) {
719 		ret = true;
720 		goto unlock;
721 	}
722 
723 	/*
724 	 * No inodes have been visited yet, so the visited cursor points at the
725 	 * start of the scan range.  The caller should not receive any updates.
726 	 */
727 	if (iscan->scan_start_ino == iscan->__visited_ino) {
728 		ret = false;
729 		goto unlock;
730 	}
731 
732 	/*
733 	 * This inode was not allocated at the time of the iscan batch.
734 	 * The caller should receive all updates.
735 	 */
736 	if (xchk_iscan_skipped(iscan, ino)) {
737 		ret = true;
738 		goto unlock;
739 	}
740 
741 	/*
742 	 * The visited cursor hasn't yet wrapped around the end of the FS.  If
743 	 * @ino is inside the starred range, the caller should receive updates:
744 	 *
745 	 * 0 ------------ S ************ V ------------ EOFS
746 	 */
747 	if (iscan->scan_start_ino <= iscan->__visited_ino) {
748 		if (ino >= iscan->scan_start_ino &&
749 		    ino <= iscan->__visited_ino)
750 			ret = true;
751 
752 		goto unlock;
753 	}
754 
755 	/*
756 	 * The visited cursor wrapped around the end of the FS.  If @ino is
757 	 * inside the starred range, the caller should receive updates:
758 	 *
759 	 * 0 ************ V ------------ S ************ EOFS
760 	 */
761 	if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
762 		ret = true;
763 
764 unlock:
765 	mutex_unlock(&iscan->lock);
766 	return ret;
767 }
768