1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_iwalk.h"
17 #include "xfs_ialloc.h"
18 #include "xfs_dir2.h"
19 #include "xfs_dir2_priv.h"
20 #include "xfs_ag.h"
21 #include "xfs_parent.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/repair.h"
25 #include "scrub/xfile.h"
26 #include "scrub/xfarray.h"
27 #include "scrub/iscan.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/nlinks.h"
30 #include "scrub/trace.h"
31 #include "scrub/readdir.h"
32 #include "scrub/tempfile.h"
33 #include "scrub/listxattr.h"
34
35 /*
36 * Live Inode Link Count Checking
37 * ==============================
38 *
39 * Inode link counts are "summary" metadata, in the sense that they are
40 * computed as the number of directory entries referencing each file on the
41 * filesystem. Therefore, we compute the correct link counts by creating a
42 * shadow link count structure and walking every inode.
43 */
44
45 /* Set us up to scrub inode link counts. */
46 int
xchk_setup_nlinks(struct xfs_scrub * sc)47 xchk_setup_nlinks(
48 struct xfs_scrub *sc)
49 {
50 struct xchk_nlink_ctrs *xnc;
51 int error;
52
53 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
54
55 if (xchk_could_repair(sc)) {
56 error = xrep_setup_nlinks(sc);
57 if (error)
58 return error;
59 }
60
61 xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
62 if (!xnc)
63 return -ENOMEM;
64 xnc->xname.name = xnc->namebuf;
65 xnc->sc = sc;
66 sc->buf = xnc;
67
68 return xchk_setup_fs(sc);
69 }
70
71 /*
72 * Part 1: Collecting file link counts. For each file, we create a shadow link
73 * counting structure, then walk the entire directory tree, incrementing parent
74 * and child link counts for each directory entry seen.
75 *
76 * To avoid false corruption reports in part 2, any failure in this part must
77 * set the INCOMPLETE flag even when a negative errno is returned. This care
78 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
79 * ECANCELED) that are absorbed into a scrub state flag update by
80 * xchk_*_process_error. Scrub and repair share the same incore data
81 * structures, so the INCOMPLETE flag is critical to prevent a repair based on
82 * insufficient information.
83 *
84 * Because we are scanning a live filesystem, it's possible that another thread
85 * will try to update the link counts for an inode that we've already scanned.
86 * This will cause our counts to be incorrect. Therefore, we hook all
87 * directory entry updates because that is when link count updates occur. By
88 * shadowing transaction updates in this manner, live nlink check can ensure by
89 * locking the inode and the shadow structure that its own copies are not out
90 * of date. Because the hook code runs in a different process context from the
91 * scrub code and the scrub state flags are not accessed atomically, failures
92 * in the hook code must abort the iscan and the scrubber must notice the
93 * aborted scan and set the incomplete flag.
94 *
95 * Note that we use jump labels and srcu notifier hooks to minimize the
96 * overhead when live nlinks is /not/ running. Locking order for nlink
97 * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
98 */
99
100 /*
101 * Add a delta to an nlink counter, clamping the value to U32_MAX. Because
102 * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
103 * even if we lose some precision.
104 */
105 static inline void
careful_add(xfs_nlink_t * nlinkp,int delta)106 careful_add(
107 xfs_nlink_t *nlinkp,
108 int delta)
109 {
110 uint64_t new_value = (uint64_t)(*nlinkp) + delta;
111
112 BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
113 *nlinkp = min_t(uint64_t, new_value, U32_MAX);
114 }
115
116 /* Update incore link count information. Caller must hold the nlinks lock. */
117 STATIC int
xchk_nlinks_update_incore(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino,int parents_delta,int backrefs_delta,int children_delta)118 xchk_nlinks_update_incore(
119 struct xchk_nlink_ctrs *xnc,
120 xfs_ino_t ino,
121 int parents_delta,
122 int backrefs_delta,
123 int children_delta)
124 {
125 struct xchk_nlink nl;
126 int error;
127
128 if (!xnc->nlinks)
129 return 0;
130
131 error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
132 if (error)
133 return error;
134
135 trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
136 backrefs_delta, children_delta);
137
138 careful_add(&nl.parents, parents_delta);
139 careful_add(&nl.backrefs, backrefs_delta);
140 careful_add(&nl.children, children_delta);
141
142 nl.flags |= XCHK_NLINK_WRITTEN;
143 error = xfarray_store(xnc->nlinks, ino, &nl);
144 if (error == -EFBIG) {
145 /*
146 * EFBIG means we tried to store data at too high a byte offset
147 * in the sparse array. IOWs, we cannot complete the check and
148 * must notify userspace that the check was incomplete.
149 */
150 error = -ECANCELED;
151 }
152 return error;
153 }
154
155 /*
156 * Apply a link count change from the regular filesystem into our shadow link
157 * count structure based on a directory update in progress.
158 */
159 STATIC int
xchk_nlinks_live_update(struct notifier_block * nb,unsigned long action,void * data)160 xchk_nlinks_live_update(
161 struct notifier_block *nb,
162 unsigned long action,
163 void *data)
164 {
165 struct xfs_dir_update_params *p = data;
166 struct xchk_nlink_ctrs *xnc;
167 int error;
168
169 xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
170
171 /*
172 * Ignore temporary directories being used to stage dir repairs, since
173 * we don't bump the link counts of the children.
174 */
175 if (xrep_is_tempfile(p->dp))
176 return NOTIFY_DONE;
177
178 trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
179 p->delta, p->name->name, p->name->len);
180
181 /*
182 * If we've already scanned @dp, update the number of parents that link
183 * to @ip. If @ip is a subdirectory, update the number of child links
184 * going out of @dp.
185 */
186 if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
187 mutex_lock(&xnc->lock);
188 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
189 0, 0);
190 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
191 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
192 0, p->delta);
193 mutex_unlock(&xnc->lock);
194 if (error)
195 goto out_abort;
196 }
197
198 /*
199 * If @ip is a subdirectory and we've already scanned it, update the
200 * number of backrefs pointing to @dp.
201 */
202 if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
203 xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
204 mutex_lock(&xnc->lock);
205 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
206 p->delta, 0);
207 mutex_unlock(&xnc->lock);
208 if (error)
209 goto out_abort;
210 }
211
212 return NOTIFY_DONE;
213
214 out_abort:
215 xchk_iscan_abort(&xnc->collect_iscan);
216 return NOTIFY_DONE;
217 }
218
219 /* Bump the observed link count for the inode referenced by this entry. */
220 STATIC int
xchk_nlinks_collect_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)221 xchk_nlinks_collect_dirent(
222 struct xfs_scrub *sc,
223 struct xfs_inode *dp,
224 xfs_dir2_dataptr_t dapos,
225 const struct xfs_name *name,
226 xfs_ino_t ino,
227 void *priv)
228 {
229 struct xchk_nlink_ctrs *xnc = priv;
230 bool dot = false, dotdot = false;
231 int error;
232
233 /* Does this name make sense? */
234 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
235 error = -ECANCELED;
236 goto out_abort;
237 }
238
239 if (name->len == 1 && name->name[0] == '.')
240 dot = true;
241 else if (name->len == 2 && name->name[0] == '.' &&
242 name->name[1] == '.')
243 dotdot = true;
244
245 /* Don't accept a '.' entry that points somewhere else. */
246 if (dot && ino != dp->i_ino) {
247 error = -ECANCELED;
248 goto out_abort;
249 }
250
251 /* Don't accept an invalid inode number. */
252 if (!xfs_verify_dir_ino(sc->mp, ino)) {
253 error = -ECANCELED;
254 goto out_abort;
255 }
256
257 /* Update the shadow link counts if we haven't already failed. */
258
259 if (xchk_iscan_aborted(&xnc->collect_iscan)) {
260 error = -ECANCELED;
261 goto out_incomplete;
262 }
263
264 trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
265
266 mutex_lock(&xnc->lock);
267
268 /*
269 * If this is a dotdot entry, it is a back link from dp to ino. How
270 * we handle this depends on whether or not dp is the root directory.
271 *
272 * The root directory is its own parent, so we pretend the dotdot entry
273 * establishes the "parent" of the root directory. Increment the
274 * number of parents of the root directory.
275 *
276 * Otherwise, increment the number of backrefs pointing back to ino.
277 *
278 * If the filesystem has parent pointers, we walk the pptrs to
279 * determine the backref count.
280 */
281 if (dotdot) {
282 if (xchk_inode_is_dirtree_root(dp))
283 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
284 else if (!xfs_has_parent(sc->mp))
285 error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
286 else
287 error = 0;
288 if (error)
289 goto out_unlock;
290 }
291
292 /*
293 * If this dirent is a forward link from dp to ino, increment the
294 * number of parents linking into ino.
295 */
296 if (!dot && !dotdot) {
297 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
298 if (error)
299 goto out_unlock;
300 }
301
302 /*
303 * If this dirent is a forward link to a subdirectory, increment the
304 * number of child links of dp.
305 */
306 if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
307 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
308 if (error)
309 goto out_unlock;
310 }
311
312 mutex_unlock(&xnc->lock);
313 return 0;
314
315 out_unlock:
316 mutex_unlock(&xnc->lock);
317 out_abort:
318 xchk_iscan_abort(&xnc->collect_iscan);
319 out_incomplete:
320 xchk_set_incomplete(sc);
321 return error;
322 }
323
324 /* Bump the backref count for the inode referenced by this parent pointer. */
325 STATIC int
xchk_nlinks_collect_pptr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)326 xchk_nlinks_collect_pptr(
327 struct xfs_scrub *sc,
328 struct xfs_inode *ip,
329 unsigned int attr_flags,
330 const unsigned char *name,
331 unsigned int namelen,
332 const void *value,
333 unsigned int valuelen,
334 void *priv)
335 {
336 struct xfs_name xname = {
337 .name = name,
338 .len = namelen,
339 };
340 struct xchk_nlink_ctrs *xnc = priv;
341 const struct xfs_parent_rec *pptr_rec = value;
342 xfs_ino_t parent_ino;
343 int error;
344
345 /* Update the shadow link counts if we haven't already failed. */
346
347 if (xchk_iscan_aborted(&xnc->collect_iscan)) {
348 error = -ECANCELED;
349 goto out_incomplete;
350 }
351
352 if (!(attr_flags & XFS_ATTR_PARENT))
353 return 0;
354
355 error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
356 valuelen, &parent_ino, NULL);
357 if (error)
358 return error;
359
360 trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec);
361
362 mutex_lock(&xnc->lock);
363
364 error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0);
365 if (error)
366 goto out_unlock;
367
368 mutex_unlock(&xnc->lock);
369 return 0;
370
371 out_unlock:
372 mutex_unlock(&xnc->lock);
373 xchk_iscan_abort(&xnc->collect_iscan);
374 out_incomplete:
375 xchk_set_incomplete(sc);
376 return error;
377 }
378
379 static uint
xchk_nlinks_ilock_dir(struct xfs_inode * ip)380 xchk_nlinks_ilock_dir(
381 struct xfs_inode *ip)
382 {
383 uint lock_mode = XFS_ILOCK_SHARED;
384
385 /*
386 * We're going to scan the directory entries, so we must be ready to
387 * pull the data fork mappings into memory if they aren't already.
388 */
389 if (xfs_need_iread_extents(&ip->i_df))
390 lock_mode = XFS_ILOCK_EXCL;
391
392 /*
393 * We're going to scan the parent pointers, so we must be ready to
394 * pull the attr fork mappings into memory if they aren't already.
395 */
396 if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) &&
397 xfs_need_iread_extents(&ip->i_af))
398 lock_mode = XFS_ILOCK_EXCL;
399
400 /*
401 * Take the IOLOCK so that other threads cannot start a directory
402 * update while we're scanning.
403 */
404 lock_mode |= XFS_IOLOCK_SHARED;
405 xfs_ilock(ip, lock_mode);
406 return lock_mode;
407 }
408
409 /* Walk a directory to bump the observed link counts of the children. */
410 STATIC int
xchk_nlinks_collect_dir(struct xchk_nlink_ctrs * xnc,struct xfs_inode * dp)411 xchk_nlinks_collect_dir(
412 struct xchk_nlink_ctrs *xnc,
413 struct xfs_inode *dp)
414 {
415 struct xfs_scrub *sc = xnc->sc;
416 unsigned int lock_mode;
417 int error = 0;
418
419 /*
420 * Ignore temporary directories being used to stage dir repairs, since
421 * we don't bump the link counts of the children.
422 */
423 if (xrep_is_tempfile(dp))
424 return 0;
425
426 /* Prevent anyone from changing this directory while we walk it. */
427 lock_mode = xchk_nlinks_ilock_dir(dp);
428
429 /*
430 * The dotdot entry of an unlinked directory still points to the last
431 * parent, but the parent no longer links to this directory. Skip the
432 * directory to avoid overcounting.
433 */
434 if (VFS_I(dp)->i_nlink == 0)
435 goto out_unlock;
436
437 /*
438 * We cannot count file links if the directory looks as though it has
439 * been zapped by the inode record repair code.
440 */
441 if (xchk_dir_looks_zapped(dp)) {
442 error = -EBUSY;
443 goto out_abort;
444 }
445
446 error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
447 if (error == -ECANCELED) {
448 error = 0;
449 goto out_unlock;
450 }
451 if (error)
452 goto out_abort;
453
454 /* Walk the parent pointers to get real backref counts. */
455 if (xfs_has_parent(sc->mp)) {
456 /*
457 * If the extended attributes look as though they has been
458 * zapped by the inode record repair code, we cannot scan for
459 * parent pointers.
460 */
461 if (xchk_pptr_looks_zapped(dp)) {
462 error = -EBUSY;
463 goto out_unlock;
464 }
465
466 error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL,
467 xnc);
468 if (error == -ECANCELED) {
469 error = 0;
470 goto out_unlock;
471 }
472 if (error)
473 goto out_abort;
474 }
475
476 xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
477 goto out_unlock;
478
479 out_abort:
480 xchk_set_incomplete(sc);
481 xchk_iscan_abort(&xnc->collect_iscan);
482 out_unlock:
483 xfs_iunlock(dp, lock_mode);
484 return error;
485 }
486
487 /* If this looks like a valid pointer, count it. */
488 static inline int
xchk_nlinks_collect_metafile(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino)489 xchk_nlinks_collect_metafile(
490 struct xchk_nlink_ctrs *xnc,
491 xfs_ino_t ino)
492 {
493 if (!xfs_verify_ino(xnc->sc->mp, ino))
494 return 0;
495
496 trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
497 return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
498 }
499
500 /* Bump the link counts of metadata files rooted in the superblock. */
501 STATIC int
xchk_nlinks_collect_metafiles(struct xchk_nlink_ctrs * xnc)502 xchk_nlinks_collect_metafiles(
503 struct xchk_nlink_ctrs *xnc)
504 {
505 struct xfs_mount *mp = xnc->sc->mp;
506 int error = -ECANCELED;
507
508
509 if (xchk_iscan_aborted(&xnc->collect_iscan))
510 goto out_incomplete;
511
512 mutex_lock(&xnc->lock);
513 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
514 if (error)
515 goto out_abort;
516
517 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
518 if (error)
519 goto out_abort;
520
521 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
522 if (error)
523 goto out_abort;
524
525 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
526 if (error)
527 goto out_abort;
528
529 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
530 if (error)
531 goto out_abort;
532 mutex_unlock(&xnc->lock);
533
534 return 0;
535
536 out_abort:
537 mutex_unlock(&xnc->lock);
538 xchk_iscan_abort(&xnc->collect_iscan);
539 out_incomplete:
540 xchk_set_incomplete(xnc->sc);
541 return error;
542 }
543
544 /* Advance the collection scan cursor for this non-directory file. */
545 static inline int
xchk_nlinks_collect_file(struct xchk_nlink_ctrs * xnc,struct xfs_inode * ip)546 xchk_nlinks_collect_file(
547 struct xchk_nlink_ctrs *xnc,
548 struct xfs_inode *ip)
549 {
550 xfs_ilock(ip, XFS_IOLOCK_SHARED);
551 xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
552 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
553 return 0;
554 }
555
556 /* Walk all directories and count inode links. */
557 STATIC int
xchk_nlinks_collect(struct xchk_nlink_ctrs * xnc)558 xchk_nlinks_collect(
559 struct xchk_nlink_ctrs *xnc)
560 {
561 struct xfs_scrub *sc = xnc->sc;
562 struct xfs_inode *ip;
563 int error;
564
565 /* Count the rt and quota files that are rooted in the superblock. */
566 error = xchk_nlinks_collect_metafiles(xnc);
567 if (error)
568 return error;
569
570 /*
571 * Set up for a potentially lengthy filesystem scan by reducing our
572 * transaction resource usage for the duration. Specifically:
573 *
574 * Cancel the transaction to release the log grant space while we scan
575 * the filesystem.
576 *
577 * Create a new empty transaction to eliminate the possibility of the
578 * inode scan deadlocking on cyclical metadata.
579 *
580 * We pass the empty transaction to the file scanning function to avoid
581 * repeatedly cycling empty transactions. This can be done even though
582 * we take the IOLOCK to quiesce the file because empty transactions
583 * do not take sb_internal.
584 */
585 xchk_trans_cancel(sc);
586 xchk_trans_alloc_empty(sc);
587
588 while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
589 if (S_ISDIR(VFS_I(ip)->i_mode))
590 error = xchk_nlinks_collect_dir(xnc, ip);
591 else
592 error = xchk_nlinks_collect_file(xnc, ip);
593 xchk_irele(sc, ip);
594 if (error)
595 break;
596
597 if (xchk_should_terminate(sc, &error))
598 break;
599 }
600 xchk_iscan_iter_finish(&xnc->collect_iscan);
601 if (error) {
602 xchk_set_incomplete(sc);
603 /*
604 * If we couldn't grab an inode that was busy with a state
605 * change, change the error code so that we exit to userspace
606 * as quickly as possible.
607 */
608 if (error == -EBUSY)
609 return -ECANCELED;
610 return error;
611 }
612
613 /*
614 * Switch out for a real transaction in preparation for building a new
615 * tree.
616 */
617 xchk_trans_cancel(sc);
618 return xchk_setup_fs(sc);
619 }
620
621 /*
622 * Part 2: Comparing file link counters. Walk each inode and compare the link
623 * counts against our shadow information; and then walk each shadow link count
624 * structure (that wasn't covered in the first part), comparing it against the
625 * file.
626 */
627
628 /* Read the observed link count for comparison with the actual inode. */
629 STATIC int
xchk_nlinks_comparison_read(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino,struct xchk_nlink * obs)630 xchk_nlinks_comparison_read(
631 struct xchk_nlink_ctrs *xnc,
632 xfs_ino_t ino,
633 struct xchk_nlink *obs)
634 {
635 struct xchk_nlink nl;
636 int error;
637
638 error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
639 if (error)
640 return error;
641
642 nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
643
644 error = xfarray_store(xnc->nlinks, ino, &nl);
645 if (error == -EFBIG) {
646 /*
647 * EFBIG means we tried to store data at too high a byte offset
648 * in the sparse array. IOWs, we cannot complete the check and
649 * must notify userspace that the check was incomplete. This
650 * shouldn't really happen outside of the collection phase.
651 */
652 xchk_set_incomplete(xnc->sc);
653 return -ECANCELED;
654 }
655 if (error)
656 return error;
657
658 /* Copy the counters, but do not expose the internal state. */
659 obs->parents = nl.parents;
660 obs->backrefs = nl.backrefs;
661 obs->children = nl.children;
662 obs->flags = 0;
663 return 0;
664 }
665
666 /* Check our link count against an inode. */
667 STATIC int
xchk_nlinks_compare_inode(struct xchk_nlink_ctrs * xnc,struct xfs_inode * ip)668 xchk_nlinks_compare_inode(
669 struct xchk_nlink_ctrs *xnc,
670 struct xfs_inode *ip)
671 {
672 struct xchk_nlink obs;
673 struct xfs_scrub *sc = xnc->sc;
674 uint64_t total_links;
675 unsigned int actual_nlink;
676 int error;
677
678 /*
679 * Ignore temporary files being used to stage repairs, since we assume
680 * they're correct for non-directories, and the directory repair code
681 * doesn't bump the link counts for the children.
682 */
683 if (xrep_is_tempfile(ip))
684 return 0;
685
686 xfs_ilock(ip, XFS_ILOCK_SHARED);
687 mutex_lock(&xnc->lock);
688
689 if (xchk_iscan_aborted(&xnc->collect_iscan)) {
690 xchk_set_incomplete(xnc->sc);
691 error = -ECANCELED;
692 goto out_scanlock;
693 }
694
695 error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
696 if (error)
697 goto out_scanlock;
698
699 /*
700 * If we don't have ftype to get an accurate count of the subdirectory
701 * entries in this directory, take advantage of the fact that on a
702 * consistent ftype=0 filesystem, the number of subdirectory
703 * backreferences (dotdot entries) pointing towards this directory
704 * should be equal to the number of subdirectory entries in the
705 * directory.
706 */
707 if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
708 obs.children = obs.backrefs;
709
710 total_links = xchk_nlink_total(ip, &obs);
711 actual_nlink = VFS_I(ip)->i_nlink;
712
713 trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
714
715 /*
716 * If we found so many parents that we'd overflow i_nlink, we must flag
717 * this as a corruption. The VFS won't let users increase the link
718 * count, but it will let them decrease it.
719 */
720 if (total_links > XFS_NLINK_PINNED) {
721 xchk_ino_set_corrupt(sc, ip->i_ino);
722 goto out_corrupt;
723 } else if (total_links > XFS_MAXLINK) {
724 xchk_ino_set_warning(sc, ip->i_ino);
725 }
726
727 /* Link counts should match. */
728 if (total_links != actual_nlink) {
729 xchk_ino_set_corrupt(sc, ip->i_ino);
730 goto out_corrupt;
731 }
732
733 if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
734 /*
735 * The collection phase ignores directories with zero link
736 * count, so we ignore them here too.
737 *
738 * The number of subdirectory backreferences (dotdot entries)
739 * pointing towards this directory should be equal to the
740 * number of subdirectory entries in the directory.
741 */
742 if (obs.children != obs.backrefs)
743 xchk_ino_xref_set_corrupt(sc, ip->i_ino);
744 } else {
745 /*
746 * Non-directories and unlinked directories should not have
747 * back references.
748 */
749 if (obs.backrefs != 0) {
750 xchk_ino_set_corrupt(sc, ip->i_ino);
751 goto out_corrupt;
752 }
753
754 /*
755 * Non-directories and unlinked directories should not have
756 * children.
757 */
758 if (obs.children != 0) {
759 xchk_ino_set_corrupt(sc, ip->i_ino);
760 goto out_corrupt;
761 }
762 }
763
764 if (xchk_inode_is_dirtree_root(ip)) {
765 /*
766 * For the root of a directory tree, both the '.' and '..'
767 * entries should point to the root directory. The dotdot
768 * entry is counted as a parent of the root /and/ a backref of
769 * the root directory.
770 */
771 if (obs.parents != 1) {
772 xchk_ino_set_corrupt(sc, ip->i_ino);
773 goto out_corrupt;
774 }
775 } else if (actual_nlink > 0) {
776 /*
777 * Linked files that are not the root directory should have at
778 * least one parent.
779 */
780 if (obs.parents == 0) {
781 xchk_ino_set_corrupt(sc, ip->i_ino);
782 goto out_corrupt;
783 }
784 }
785
786 out_corrupt:
787 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
788 error = -ECANCELED;
789 out_scanlock:
790 mutex_unlock(&xnc->lock);
791 xfs_iunlock(ip, XFS_ILOCK_SHARED);
792 return error;
793 }
794
795 /*
796 * Check our link count against an inode that wasn't checked previously. This
797 * is intended to catch directories with dangling links, though we could be
798 * racing with inode allocation in other threads.
799 */
800 STATIC int
xchk_nlinks_compare_inum(struct xchk_nlink_ctrs * xnc,xfs_ino_t ino)801 xchk_nlinks_compare_inum(
802 struct xchk_nlink_ctrs *xnc,
803 xfs_ino_t ino)
804 {
805 struct xchk_nlink obs;
806 struct xfs_mount *mp = xnc->sc->mp;
807 struct xfs_trans *tp = xnc->sc->tp;
808 struct xfs_buf *agi_bp;
809 struct xfs_inode *ip;
810 int error;
811
812 /*
813 * The first iget failed, so try again with the variant that returns
814 * either an incore inode or the AGI buffer. If the function returns
815 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
816 * can guarantee that the inode won't be allocated while we check for
817 * a zero link count in the observed link count data.
818 */
819 error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
820 if (!error) {
821 /* Actually got an inode, so use the inode compare. */
822 error = xchk_nlinks_compare_inode(xnc, ip);
823 xchk_irele(xnc->sc, ip);
824 return error;
825 }
826 if (error == -ENOENT || error == -EINVAL) {
827 /* No inode was found. Check for zero link count below. */
828 error = 0;
829 }
830 if (error)
831 goto out_agi;
832
833 /* Ensure that we have protected against inode allocation/freeing. */
834 if (agi_bp == NULL) {
835 ASSERT(agi_bp != NULL);
836 xchk_set_incomplete(xnc->sc);
837 return -ECANCELED;
838 }
839
840 if (xchk_iscan_aborted(&xnc->collect_iscan)) {
841 xchk_set_incomplete(xnc->sc);
842 error = -ECANCELED;
843 goto out_agi;
844 }
845
846 mutex_lock(&xnc->lock);
847 error = xchk_nlinks_comparison_read(xnc, ino, &obs);
848 if (error)
849 goto out_scanlock;
850
851 trace_xchk_nlinks_check_zero(mp, ino, &obs);
852
853 /*
854 * If we can't grab the inode, the link count had better be zero. We
855 * still hold the AGI to prevent inode allocation/freeing.
856 */
857 if (xchk_nlink_total(NULL, &obs) != 0) {
858 xchk_ino_set_corrupt(xnc->sc, ino);
859 error = -ECANCELED;
860 }
861
862 out_scanlock:
863 mutex_unlock(&xnc->lock);
864 out_agi:
865 if (agi_bp)
866 xfs_trans_brelse(tp, agi_bp);
867 return error;
868 }
869
870 /*
871 * Try to visit every inode in the filesystem to compare the link count. Move
872 * on if we can't grab an inode, since we'll revisit unchecked nlink records in
873 * the second part.
874 */
875 static int
xchk_nlinks_compare_iter(struct xchk_nlink_ctrs * xnc,struct xfs_inode ** ipp)876 xchk_nlinks_compare_iter(
877 struct xchk_nlink_ctrs *xnc,
878 struct xfs_inode **ipp)
879 {
880 int error;
881
882 do {
883 error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
884 } while (error == -EBUSY);
885
886 return error;
887 }
888
889 /* Compare the link counts we observed against the live information. */
890 STATIC int
xchk_nlinks_compare(struct xchk_nlink_ctrs * xnc)891 xchk_nlinks_compare(
892 struct xchk_nlink_ctrs *xnc)
893 {
894 struct xchk_nlink nl;
895 struct xfs_scrub *sc = xnc->sc;
896 struct xfs_inode *ip;
897 xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
898 int error;
899
900 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
901 return 0;
902
903 /*
904 * Create a new empty transaction so that we can advance the iscan
905 * cursor without deadlocking if the inobt has a cycle and push on the
906 * inactivation workqueue.
907 */
908 xchk_trans_cancel(sc);
909 xchk_trans_alloc_empty(sc);
910
911 /*
912 * Use the inobt to walk all allocated inodes to compare the link
913 * counts. Inodes skipped by _compare_iter will be tried again in the
914 * next phase of the scan.
915 */
916 xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
917 while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
918 error = xchk_nlinks_compare_inode(xnc, ip);
919 xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
920 xchk_irele(sc, ip);
921 if (error)
922 break;
923
924 if (xchk_should_terminate(sc, &error))
925 break;
926 }
927 xchk_iscan_iter_finish(&xnc->compare_iscan);
928 xchk_iscan_teardown(&xnc->compare_iscan);
929 if (error)
930 return error;
931
932 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
933 return 0;
934
935 /*
936 * Walk all the non-null nlink observations that weren't checked in the
937 * previous step.
938 */
939 mutex_lock(&xnc->lock);
940 while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
941 xfs_ino_t ino = cur - 1;
942
943 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
944 continue;
945
946 mutex_unlock(&xnc->lock);
947
948 error = xchk_nlinks_compare_inum(xnc, ino);
949 if (error)
950 return error;
951
952 if (xchk_should_terminate(xnc->sc, &error))
953 return error;
954
955 mutex_lock(&xnc->lock);
956 }
957 mutex_unlock(&xnc->lock);
958
959 return error;
960 }
961
962 /* Tear down everything associated with a nlinks check. */
963 static void
xchk_nlinks_teardown_scan(void * priv)964 xchk_nlinks_teardown_scan(
965 void *priv)
966 {
967 struct xchk_nlink_ctrs *xnc = priv;
968
969 /* Discourage any hook functions that might be running. */
970 xchk_iscan_abort(&xnc->collect_iscan);
971
972 xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
973
974 xfarray_destroy(xnc->nlinks);
975 xnc->nlinks = NULL;
976
977 xchk_iscan_teardown(&xnc->collect_iscan);
978 mutex_destroy(&xnc->lock);
979 xnc->sc = NULL;
980 }
981
982 /*
983 * Scan all inodes in the entire filesystem to generate link count data. If
984 * the scan is successful, the counts will be left alive for a repair. If any
985 * error occurs, we'll tear everything down.
986 */
987 STATIC int
xchk_nlinks_setup_scan(struct xfs_scrub * sc,struct xchk_nlink_ctrs * xnc)988 xchk_nlinks_setup_scan(
989 struct xfs_scrub *sc,
990 struct xchk_nlink_ctrs *xnc)
991 {
992 struct xfs_mount *mp = sc->mp;
993 char *descr;
994 unsigned long long max_inos;
995 xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
996 xfs_agino_t first_agino, last_agino;
997 int error;
998
999 mutex_init(&xnc->lock);
1000
1001 /* Retry iget every tenth of a second for up to 30 seconds. */
1002 xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
1003
1004 /*
1005 * Set up enough space to store an nlink record for the highest
1006 * possible inode number in this system.
1007 */
1008 xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
1009 max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
1010 descr = xchk_xfile_descr(sc, "file link counts");
1011 error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
1012 sizeof(struct xchk_nlink), &xnc->nlinks);
1013 kfree(descr);
1014 if (error)
1015 goto out_teardown;
1016
1017 /*
1018 * Hook into the directory entry code so that we can capture updates to
1019 * file link counts. The hook only triggers for inodes that were
1020 * already scanned, and the scanner thread takes each inode's ILOCK,
1021 * which means that any in-progress inode updates will finish before we
1022 * can scan the inode.
1023 */
1024 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1025 xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
1026 error = xfs_dir_hook_add(mp, &xnc->dhook);
1027 if (error)
1028 goto out_teardown;
1029
1030 /* Use deferred cleanup to pass the inode link count data to repair. */
1031 sc->buf_cleanup = xchk_nlinks_teardown_scan;
1032 return 0;
1033
1034 out_teardown:
1035 xchk_nlinks_teardown_scan(xnc);
1036 return error;
1037 }
1038
1039 /* Scrub the link count of all inodes on the filesystem. */
1040 int
xchk_nlinks(struct xfs_scrub * sc)1041 xchk_nlinks(
1042 struct xfs_scrub *sc)
1043 {
1044 struct xchk_nlink_ctrs *xnc = sc->buf;
1045 int error = 0;
1046
1047 /* Set ourselves up to check link counts on the live filesystem. */
1048 error = xchk_nlinks_setup_scan(sc, xnc);
1049 if (error)
1050 return error;
1051
1052 /* Walk all inodes, picking up link count information. */
1053 error = xchk_nlinks_collect(xnc);
1054 if (!xchk_xref_process_error(sc, 0, 0, &error))
1055 return error;
1056
1057 /* Fail fast if we're not playing with a full dataset. */
1058 if (xchk_iscan_aborted(&xnc->collect_iscan))
1059 xchk_set_incomplete(sc);
1060 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
1061 return 0;
1062
1063 /* Compare link counts. */
1064 error = xchk_nlinks_compare(xnc);
1065 if (!xchk_xref_process_error(sc, 0, 0, &error))
1066 return error;
1067
1068 /* Check one last time for an incomplete dataset. */
1069 if (xchk_iscan_aborted(&xnc->collect_iscan))
1070 xchk_set_incomplete(sc);
1071
1072 return 0;
1073 }
1074