xref: /linux/fs/xfs/scrub/iscan.c (revision 5302a5c8beb21d01b7b8d92cc73b6871bc27d7bf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_ialloc.h"
17 #include "xfs_ialloc_btree.h"
18 #include "xfs_ag.h"
19 #include "xfs_error.h"
20 #include "xfs_bit.h"
21 #include "xfs_icache.h"
22 #include "scrub/scrub.h"
23 #include "scrub/iscan.h"
24 #include "scrub/common.h"
25 #include "scrub/trace.h"
26 
27 /*
28  * Live File Scan
29  * ==============
30  *
31  * Live file scans walk every inode in a live filesystem.  This is more or
32  * less like a regular iwalk, except that when we're advancing the scan cursor,
33  * we must ensure that inodes cannot be added or deleted anywhere between the
34  * old cursor value and the new cursor value.  If we're advancing the cursor
35  * by one inode, the caller must hold that inode; if we're finding the next
36  * inode to scan, we must grab the AGI and hold it until we've updated the
37  * scan cursor.
38  *
39  * Callers are expected to use this code to scan all files in the filesystem to
40  * construct a new metadata index of some kind.  The scan races against other
41  * live updates, which means there must be a provision to update the new index
42  * when updates are made to inodes that already been scanned.  The iscan lock
43  * can be used in live update hook code to stop the scan and protect this data
44  * structure.
45  *
46  * To keep the new index up to date with other metadata updates being made to
47  * the live filesystem, it is assumed that the caller will add hooks as needed
48  * to be notified when a metadata update occurs.  The inode scanner must tell
49  * the hook code when an inode has been visited with xchk_iscan_mark_visit.
50  * Hook functions can use xchk_iscan_want_live_update to decide if the
51  * scanner's observations must be updated.
52  */
53 
54 /*
55  * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
56  * that the scan ignores that inode.
57  */
58 STATIC void
59 xchk_iscan_mask_skipino(
60 	struct xchk_iscan	*iscan,
61 	struct xfs_perag	*pag,
62 	struct xfs_inobt_rec_incore	*rec,
63 	xfs_agino_t		lastrecino)
64 {
65 	struct xfs_scrub	*sc = iscan->sc;
66 	struct xfs_mount	*mp = sc->mp;
67 	xfs_agnumber_t		skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
68 	xfs_agnumber_t		skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
69 
70 	if (pag->pag_agno != skip_agno)
71 		return;
72 	if (skip_agino < rec->ir_startino)
73 		return;
74 	if (skip_agino > lastrecino)
75 		return;
76 
77 	rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
78 }
79 
80 /*
81  * Set *cursor to the next allocated inode after whatever it's set to now.
82  * If there are no more inodes in this AG, cursor is set to NULLAGINO.
83  */
84 STATIC int
85 xchk_iscan_find_next(
86 	struct xchk_iscan	*iscan,
87 	struct xfs_buf		*agi_bp,
88 	struct xfs_perag	*pag,
89 	xfs_inofree_t		*allocmaskp,
90 	xfs_agino_t		*cursor,
91 	uint8_t			*nr_inodesp)
92 {
93 	struct xfs_scrub	*sc = iscan->sc;
94 	struct xfs_inobt_rec_incore	rec;
95 	struct xfs_btree_cur	*cur;
96 	struct xfs_mount	*mp = sc->mp;
97 	struct xfs_trans	*tp = sc->tp;
98 	xfs_agnumber_t		agno = pag->pag_agno;
99 	xfs_agino_t		lastino = NULLAGINO;
100 	xfs_agino_t		first, last;
101 	xfs_agino_t		agino = *cursor;
102 	int			has_rec;
103 	int			error;
104 
105 	/* If the cursor is beyond the end of this AG, move to the next one. */
106 	xfs_agino_range(mp, agno, &first, &last);
107 	if (agino > last) {
108 		*cursor = NULLAGINO;
109 		return 0;
110 	}
111 
112 	/*
113 	 * Look up the inode chunk for the current cursor position.  If there
114 	 * is no chunk here, we want the next one.
115 	 */
116 	cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
117 	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
118 	if (!error && !has_rec)
119 		error = xfs_btree_increment(cur, 0, &has_rec);
120 	for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
121 		xfs_inofree_t	allocmask;
122 
123 		/*
124 		 * If we've run out of inobt records in this AG, move the
125 		 * cursor on to the next AG and exit.  The caller can try
126 		 * again with the next AG.
127 		 */
128 		if (!has_rec) {
129 			*cursor = NULLAGINO;
130 			break;
131 		}
132 
133 		error = xfs_inobt_get_rec(cur, &rec, &has_rec);
134 		if (error)
135 			break;
136 		if (!has_rec) {
137 			error = -EFSCORRUPTED;
138 			break;
139 		}
140 
141 		/* Make sure that we always move forward. */
142 		if (lastino != NULLAGINO &&
143 		    XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
144 			error = -EFSCORRUPTED;
145 			break;
146 		}
147 		lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
148 
149 		/*
150 		 * If this record only covers inodes that come before the
151 		 * cursor, advance to the next record.
152 		 */
153 		if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
154 			continue;
155 
156 		if (iscan->skip_ino)
157 			xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
158 
159 		/*
160 		 * If the incoming lookup put us in the middle of an inobt
161 		 * record, mark it and the previous inodes "free" so that the
162 		 * search for allocated inodes will start at the cursor.
163 		 * We don't care about ir_freecount here.
164 		 */
165 		if (agino >= rec.ir_startino)
166 			rec.ir_free |= xfs_inobt_maskn(0,
167 						agino + 1 - rec.ir_startino);
168 
169 		/*
170 		 * If there are allocated inodes in this chunk, find them
171 		 * and update the scan cursor.
172 		 */
173 		allocmask = ~rec.ir_free;
174 		if (hweight64(allocmask) > 0) {
175 			int	next = xfs_lowbit64(allocmask);
176 
177 			ASSERT(next >= 0);
178 			*cursor = rec.ir_startino + next;
179 			*allocmaskp = allocmask >> next;
180 			*nr_inodesp = XFS_INODES_PER_CHUNK - next;
181 			break;
182 		}
183 	}
184 
185 	xfs_btree_del_cursor(cur, error);
186 	return error;
187 }
188 
189 /*
190  * Advance both the scan and the visited cursors.
191  *
192  * The inumber address space for a given filesystem is sparse, which means that
193  * the scan cursor can jump a long ways in a single iter() call.  There are no
194  * inodes in these sparse areas, so we must move the visited cursor forward at
195  * the same time so that the scan user can receive live updates for inodes that
196  * may get created once we release the AGI buffer.
197  */
198 static inline void
199 xchk_iscan_move_cursor(
200 	struct xchk_iscan	*iscan,
201 	xfs_agnumber_t		agno,
202 	xfs_agino_t		agino)
203 {
204 	struct xfs_scrub	*sc = iscan->sc;
205 	struct xfs_mount	*mp = sc->mp;
206 	xfs_ino_t		cursor, visited;
207 
208 	BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
209 
210 	/*
211 	 * Special-case ino == 0 here so that we never set visited_ino to
212 	 * NULLFSINO when wrapping around EOFS, for that will let through all
213 	 * live updates.
214 	 */
215 	cursor = XFS_AGINO_TO_INO(mp, agno, agino);
216 	if (cursor == 0)
217 		visited = XFS_MAXINUMBER;
218 	else
219 		visited = cursor - 1;
220 
221 	mutex_lock(&iscan->lock);
222 	iscan->cursor_ino = cursor;
223 	iscan->__visited_ino = visited;
224 	trace_xchk_iscan_move_cursor(iscan);
225 	mutex_unlock(&iscan->lock);
226 }
227 
228 /*
229  * Prepare to return agno/agino to the iscan caller by moving the lastino
230  * cursor to the previous inode.  Do this while we still hold the AGI so that
231  * no other threads can create or delete inodes in this AG.
232  */
233 static inline void
234 xchk_iscan_finish(
235 	struct xchk_iscan	*iscan)
236 {
237 	mutex_lock(&iscan->lock);
238 	iscan->cursor_ino = NULLFSINO;
239 
240 	/* All live updates will be applied from now on */
241 	iscan->__visited_ino = NULLFSINO;
242 
243 	mutex_unlock(&iscan->lock);
244 }
245 
246 /*
247  * Grab the AGI to advance the inode scan.  Returns 0 if *agi_bpp is now set,
248  * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
249  * or the usual negative errno.
250  */
251 STATIC int
252 xchk_iscan_read_agi(
253 	struct xchk_iscan	*iscan,
254 	struct xfs_perag	*pag,
255 	struct xfs_buf		**agi_bpp)
256 {
257 	struct xfs_scrub	*sc = iscan->sc;
258 	unsigned long		relax;
259 	int			ret;
260 
261 	if (!xchk_iscan_agi_needs_trylock(iscan))
262 		return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);
263 
264 	relax = msecs_to_jiffies(iscan->iget_retry_delay);
265 	do {
266 		ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
267 				agi_bpp);
268 		if (ret != -EAGAIN)
269 			return ret;
270 		if (!iscan->iget_timeout ||
271 		    time_is_before_jiffies(iscan->__iget_deadline))
272 			return -EBUSY;
273 
274 		trace_xchk_iscan_agi_retry_wait(iscan);
275 	} while (!schedule_timeout_killable(relax) &&
276 		 !xchk_iscan_aborted(iscan));
277 	return -ECANCELED;
278 }
279 
280 /*
281  * Advance ino to the next inode that the inobt thinks is allocated, being
282  * careful to jump to the next AG if we've reached the right end of this AG's
283  * inode btree.  Advancing ino effectively means that we've pushed the inode
284  * scan forward, so set the iscan cursor to (ino - 1) so that our live update
285  * predicates will track inode allocations in that part of the inode number
286  * key space once we release the AGI buffer.
287  *
288  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
289  * -ECANCELED if the live scan aborted, or the usual negative errno.
290  */
291 STATIC int
292 xchk_iscan_advance(
293 	struct xchk_iscan	*iscan,
294 	struct xfs_perag	**pagp,
295 	struct xfs_buf		**agi_bpp,
296 	xfs_inofree_t		*allocmaskp,
297 	uint8_t			*nr_inodesp)
298 {
299 	struct xfs_scrub	*sc = iscan->sc;
300 	struct xfs_mount	*mp = sc->mp;
301 	struct xfs_buf		*agi_bp;
302 	struct xfs_perag	*pag;
303 	xfs_agnumber_t		agno;
304 	xfs_agino_t		agino;
305 	int			ret;
306 
307 	ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
308 
309 	do {
310 		if (xchk_iscan_aborted(iscan))
311 			return -ECANCELED;
312 
313 		agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
314 		pag = xfs_perag_get(mp, agno);
315 		if (!pag)
316 			return -ECANCELED;
317 
318 		ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
319 		if (ret)
320 			goto out_pag;
321 
322 		agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
323 		ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
324 				&agino, nr_inodesp);
325 		if (ret)
326 			goto out_buf;
327 
328 		if (agino != NULLAGINO) {
329 			/*
330 			 * Found the next inode in this AG, so return it along
331 			 * with the AGI buffer and the perag structure to
332 			 * ensure it cannot go away.
333 			 */
334 			xchk_iscan_move_cursor(iscan, agno, agino);
335 			*agi_bpp = agi_bp;
336 			*pagp = pag;
337 			return 1;
338 		}
339 
340 		/*
341 		 * Did not find any more inodes in this AG, move on to the next
342 		 * AG.
343 		 */
344 		agno = (agno + 1) % mp->m_sb.sb_agcount;
345 		xchk_iscan_move_cursor(iscan, agno, 0);
346 		xfs_trans_brelse(sc->tp, agi_bp);
347 		xfs_perag_put(pag);
348 
349 		trace_xchk_iscan_advance_ag(iscan);
350 	} while (iscan->cursor_ino != iscan->scan_start_ino);
351 
352 	xchk_iscan_finish(iscan);
353 	return 0;
354 
355 out_buf:
356 	xfs_trans_brelse(sc->tp, agi_bp);
357 out_pag:
358 	xfs_perag_put(pag);
359 	return ret;
360 }
361 
362 /*
363  * Grabbing the inode failed, so we need to back up the scan and ask the caller
364  * to try to _advance the scan again.  Returns -EBUSY if we've run out of retry
365  * opportunities, -ECANCELED if the process has a fatal signal pending, or
366  * -EAGAIN if we should try again.
367  */
368 STATIC int
369 xchk_iscan_iget_retry(
370 	struct xchk_iscan	*iscan,
371 	bool			wait)
372 {
373 	ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
374 
375 	if (!iscan->iget_timeout ||
376 	    time_is_before_jiffies(iscan->__iget_deadline))
377 		return -EBUSY;
378 
379 	if (wait) {
380 		unsigned long	relax;
381 
382 		/*
383 		 * Sleep for a period of time to let the rest of the system
384 		 * catch up.  If we return early, someone sent a kill signal to
385 		 * the calling process.
386 		 */
387 		relax = msecs_to_jiffies(iscan->iget_retry_delay);
388 		trace_xchk_iscan_iget_retry_wait(iscan);
389 
390 		if (schedule_timeout_killable(relax) ||
391 		    xchk_iscan_aborted(iscan))
392 			return -ECANCELED;
393 	}
394 
395 	iscan->cursor_ino--;
396 	return -EAGAIN;
397 }
398 
399 /*
400  * Grab an inode as part of an inode scan.  While scanning this inode, the
401  * caller must ensure that no other threads can modify the inode until a call
402  * to xchk_iscan_visit succeeds.
403  *
404  * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
405  * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
406  * -ECANCELED if there's a fatal signal pending; or some other negative errno.
407  */
408 STATIC int
409 xchk_iscan_iget(
410 	struct xchk_iscan	*iscan,
411 	struct xfs_perag	*pag,
412 	struct xfs_buf		*agi_bp,
413 	xfs_inofree_t		allocmask,
414 	uint8_t			nr_inodes)
415 {
416 	struct xfs_scrub	*sc = iscan->sc;
417 	struct xfs_mount	*mp = sc->mp;
418 	xfs_ino_t		ino = iscan->cursor_ino;
419 	unsigned int		idx = 0;
420 	unsigned int		i;
421 	int			error;
422 
423 	ASSERT(iscan->__inodes[0] == NULL);
424 
425 	/* Fill the first slot in the inode array. */
426 	error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0,
427 			&iscan->__inodes[idx]);
428 
429 	trace_xchk_iscan_iget(iscan, error);
430 
431 	if (error == -ENOENT || error == -EAGAIN) {
432 		xfs_trans_brelse(sc->tp, agi_bp);
433 		xfs_perag_put(pag);
434 
435 		/*
436 		 * It's possible that this inode has lost all of its links but
437 		 * hasn't yet been inactivated.  If we don't have a transaction
438 		 * or it's not writable, flush the inodegc workers and wait.
439 		 */
440 		xfs_inodegc_flush(mp);
441 		return xchk_iscan_iget_retry(iscan, true);
442 	}
443 
444 	if (error == -EINVAL) {
445 		xfs_trans_brelse(sc->tp, agi_bp);
446 		xfs_perag_put(pag);
447 
448 		/*
449 		 * We thought the inode was allocated, but the inode btree
450 		 * lookup failed, which means that it was freed since the last
451 		 * time we advanced the cursor.  Back up and try again.  This
452 		 * should never happen since still hold the AGI buffer from the
453 		 * inobt check, but we need to be careful about infinite loops.
454 		 */
455 		return xchk_iscan_iget_retry(iscan, false);
456 	}
457 
458 	if (error) {
459 		xfs_trans_brelse(sc->tp, agi_bp);
460 		xfs_perag_put(pag);
461 		return error;
462 	}
463 	idx++;
464 	ino++;
465 	allocmask >>= 1;
466 
467 	/*
468 	 * Now that we've filled the first slot in __inodes, try to fill the
469 	 * rest of the batch with consecutively ordered inodes.  to reduce the
470 	 * number of _iter calls.  Make a bitmap of unallocated inodes from the
471 	 * zeroes in the inuse bitmap; these inodes will not be scanned, but
472 	 * the _want_live_update predicate will pass through all live updates.
473 	 *
474 	 * If we can't iget an allocated inode, stop and return what we have.
475 	 */
476 	mutex_lock(&iscan->lock);
477 	iscan->__batch_ino = ino - 1;
478 	iscan->__skipped_inomask = 0;
479 	mutex_unlock(&iscan->lock);
480 
481 	for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
482 		if (!(allocmask & 1)) {
483 			ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
484 
485 			mutex_lock(&iscan->lock);
486 			iscan->cursor_ino = ino;
487 			iscan->__skipped_inomask |= (1ULL << i);
488 			mutex_unlock(&iscan->lock);
489 			continue;
490 		}
491 
492 		ASSERT(iscan->__inodes[idx] == NULL);
493 
494 		error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0,
495 				&iscan->__inodes[idx]);
496 		if (error)
497 			break;
498 
499 		mutex_lock(&iscan->lock);
500 		iscan->cursor_ino = ino;
501 		mutex_unlock(&iscan->lock);
502 		idx++;
503 	}
504 
505 	trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
506 	xfs_trans_brelse(sc->tp, agi_bp);
507 	xfs_perag_put(pag);
508 	return idx;
509 }
510 
511 /*
512  * Advance the visit cursor to reflect skipped inodes beyond whatever we
513  * scanned.
514  */
515 STATIC void
516 xchk_iscan_finish_batch(
517 	struct xchk_iscan	*iscan)
518 {
519 	xfs_ino_t		highest_skipped;
520 
521 	mutex_lock(&iscan->lock);
522 
523 	if (iscan->__batch_ino != NULLFSINO) {
524 		highest_skipped = iscan->__batch_ino +
525 					xfs_highbit64(iscan->__skipped_inomask);
526 		iscan->__visited_ino = max(iscan->__visited_ino,
527 					   highest_skipped);
528 
529 		trace_xchk_iscan_skip(iscan);
530 	}
531 
532 	iscan->__batch_ino = NULLFSINO;
533 	iscan->__skipped_inomask = 0;
534 
535 	mutex_unlock(&iscan->lock);
536 }
537 
538 /*
539  * Advance the inode scan cursor to the next allocated inode and return up to
540  * 64 consecutive allocated inodes starting with the cursor position.
541  */
542 STATIC int
543 xchk_iscan_iter_batch(
544 	struct xchk_iscan	*iscan)
545 {
546 	struct xfs_scrub	*sc = iscan->sc;
547 	int			ret;
548 
549 	xchk_iscan_finish_batch(iscan);
550 
551 	if (iscan->iget_timeout)
552 		iscan->__iget_deadline = jiffies +
553 					 msecs_to_jiffies(iscan->iget_timeout);
554 
555 	do {
556 		struct xfs_buf	*agi_bp = NULL;
557 		struct xfs_perag *pag = NULL;
558 		xfs_inofree_t	allocmask = 0;
559 		uint8_t		nr_inodes = 0;
560 
561 		ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
562 				&nr_inodes);
563 		if (ret != 1)
564 			return ret;
565 
566 		if (xchk_iscan_aborted(iscan)) {
567 			xfs_trans_brelse(sc->tp, agi_bp);
568 			xfs_perag_put(pag);
569 			ret = -ECANCELED;
570 			break;
571 		}
572 
573 		ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
574 	} while (ret == -EAGAIN);
575 
576 	return ret;
577 }
578 
579 /*
580  * Advance the inode scan cursor to the next allocated inode and return the
581  * incore inode structure associated with it.
582  *
583  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
584  * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
585  * grabbed, or the usual negative errno.
586  *
587  * If the function returns -EBUSY and the caller can handle skipping an inode,
588  * it may call this function again to continue the scan with the next allocated
589  * inode.
590  */
591 int
592 xchk_iscan_iter(
593 	struct xchk_iscan	*iscan,
594 	struct xfs_inode	**ipp)
595 {
596 	unsigned int		i;
597 	int			error;
598 
599 	/* Find a cached inode, or go get another batch. */
600 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
601 		if (iscan->__inodes[i])
602 			goto foundit;
603 	}
604 
605 	error = xchk_iscan_iter_batch(iscan);
606 	if (error <= 0)
607 		return error;
608 
609 	ASSERT(iscan->__inodes[0] != NULL);
610 	i = 0;
611 
612 foundit:
613 	/* Give the caller our reference. */
614 	*ipp = iscan->__inodes[i];
615 	iscan->__inodes[i] = NULL;
616 	return 1;
617 }
618 
619 /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
620 void
621 xchk_iscan_iter_finish(
622 	struct xchk_iscan	*iscan)
623 {
624 	struct xfs_scrub	*sc = iscan->sc;
625 	unsigned int		i;
626 
627 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
628 		if (iscan->__inodes[i]) {
629 			xchk_irele(sc, iscan->__inodes[i]);
630 			iscan->__inodes[i] = NULL;
631 		}
632 	}
633 }
634 
635 /* Mark this inode scan finished and release resources. */
636 void
637 xchk_iscan_teardown(
638 	struct xchk_iscan	*iscan)
639 {
640 	xchk_iscan_iter_finish(iscan);
641 	xchk_iscan_finish(iscan);
642 	mutex_destroy(&iscan->lock);
643 }
644 
645 /* Pick an AG from which to start a scan. */
646 static inline xfs_ino_t
647 xchk_iscan_rotor(
648 	struct xfs_mount	*mp)
649 {
650 	static atomic_t		agi_rotor;
651 	unsigned int		r = atomic_inc_return(&agi_rotor) - 1;
652 
653 	/*
654 	 * Rotoring *backwards* through the AGs, so we add one here before
655 	 * subtracting from the agcount to arrive at an AG number.
656 	 */
657 	r = (r % mp->m_sb.sb_agcount) + 1;
658 
659 	return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
660 }
661 
662 /*
663  * Set ourselves up to start an inode scan.  If the @iget_timeout and
664  * @iget_retry_delay parameters are set, the scan will try to iget each inode
665  * for @iget_timeout milliseconds.  If an iget call indicates that the inode is
666  * waiting to be inactivated, the CPU will relax for @iget_retry_delay
667  * milliseconds after pushing the inactivation workers.
668  */
669 void
670 xchk_iscan_start(
671 	struct xfs_scrub	*sc,
672 	unsigned int		iget_timeout,
673 	unsigned int		iget_retry_delay,
674 	struct xchk_iscan	*iscan)
675 {
676 	xfs_ino_t		start_ino;
677 
678 	start_ino = xchk_iscan_rotor(sc->mp);
679 
680 	iscan->__batch_ino = NULLFSINO;
681 	iscan->__skipped_inomask = 0;
682 
683 	iscan->sc = sc;
684 	clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
685 	iscan->iget_timeout = iget_timeout;
686 	iscan->iget_retry_delay = iget_retry_delay;
687 	iscan->__visited_ino = start_ino;
688 	iscan->cursor_ino = start_ino;
689 	iscan->scan_start_ino = start_ino;
690 	mutex_init(&iscan->lock);
691 	memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
692 
693 	trace_xchk_iscan_start(iscan, start_ino);
694 }
695 
696 /*
697  * Mark this inode as having been visited.  Callers must hold a sufficiently
698  * exclusive lock on the inode to prevent concurrent modifications.
699  */
700 void
701 xchk_iscan_mark_visited(
702 	struct xchk_iscan	*iscan,
703 	struct xfs_inode	*ip)
704 {
705 	mutex_lock(&iscan->lock);
706 	iscan->__visited_ino = ip->i_ino;
707 	trace_xchk_iscan_visit(iscan);
708 	mutex_unlock(&iscan->lock);
709 }
710 
711 /*
712  * Did we skip this inode because it wasn't allocated when we loaded the batch?
713  * If so, it is newly allocated and will not be scanned.  All live updates to
714  * this inode must be passed to the caller to maintain scan correctness.
715  */
716 static inline bool
717 xchk_iscan_skipped(
718 	const struct xchk_iscan	*iscan,
719 	xfs_ino_t		ino)
720 {
721 	if (iscan->__batch_ino == NULLFSINO)
722 		return false;
723 	if (ino < iscan->__batch_ino)
724 		return false;
725 	if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
726 		return false;
727 
728 	return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
729 }
730 
731 /*
732  * Do we need a live update for this inode?  This is true if the scanner thread
733  * has visited this inode and the scan hasn't been aborted due to errors.
734  * Callers must hold a sufficiently exclusive lock on the inode to prevent
735  * scanners from reading any inode metadata.
736  */
737 bool
738 xchk_iscan_want_live_update(
739 	struct xchk_iscan	*iscan,
740 	xfs_ino_t		ino)
741 {
742 	bool			ret = false;
743 
744 	if (xchk_iscan_aborted(iscan))
745 		return false;
746 
747 	mutex_lock(&iscan->lock);
748 
749 	trace_xchk_iscan_want_live_update(iscan, ino);
750 
751 	/* Scan is finished, caller should receive all updates. */
752 	if (iscan->__visited_ino == NULLFSINO) {
753 		ret = true;
754 		goto unlock;
755 	}
756 
757 	/*
758 	 * No inodes have been visited yet, so the visited cursor points at the
759 	 * start of the scan range.  The caller should not receive any updates.
760 	 */
761 	if (iscan->scan_start_ino == iscan->__visited_ino) {
762 		ret = false;
763 		goto unlock;
764 	}
765 
766 	/*
767 	 * This inode was not allocated at the time of the iscan batch.
768 	 * The caller should receive all updates.
769 	 */
770 	if (xchk_iscan_skipped(iscan, ino)) {
771 		ret = true;
772 		goto unlock;
773 	}
774 
775 	/*
776 	 * The visited cursor hasn't yet wrapped around the end of the FS.  If
777 	 * @ino is inside the starred range, the caller should receive updates:
778 	 *
779 	 * 0 ------------ S ************ V ------------ EOFS
780 	 */
781 	if (iscan->scan_start_ino <= iscan->__visited_ino) {
782 		if (ino >= iscan->scan_start_ino &&
783 		    ino <= iscan->__visited_ino)
784 			ret = true;
785 
786 		goto unlock;
787 	}
788 
789 	/*
790 	 * The visited cursor wrapped around the end of the FS.  If @ino is
791 	 * inside the starred range, the caller should receive updates:
792 	 *
793 	 * 0 ************ V ------------ S ************ EOFS
794 	 */
795 	if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
796 		ret = true;
797 
798 unlock:
799 	mutex_unlock(&iscan->lock);
800 	return ret;
801 }
802