1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_icache.h"
16 #include "xfs_dir2.h"
17 #include "xfs_dir2_priv.h"
18 #include "xfs_health.h"
19 #include "xfs_attr.h"
20 #include "xfs_parent.h"
21 #include "scrub/scrub.h"
22 #include "scrub/common.h"
23 #include "scrub/dabtree.h"
24 #include "scrub/readdir.h"
25 #include "scrub/health.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/xfile.h"
29 #include "scrub/xfarray.h"
30 #include "scrub/xfblob.h"
31
32 /* Set us up to scrub directories. */
33 int
xchk_setup_directory(struct xfs_scrub * sc)34 xchk_setup_directory(
35 struct xfs_scrub *sc)
36 {
37 int error;
38
39 if (xchk_could_repair(sc)) {
40 error = xrep_setup_directory(sc);
41 if (error)
42 return error;
43 }
44
45 return xchk_setup_inode_contents(sc, 0);
46 }
47
48 /* Directories */
49
50 /* Deferred directory entry that we saved for later. */
51 struct xchk_dirent {
52 /* Cookie for retrieval of the dirent name. */
53 xfblob_cookie name_cookie;
54
55 /* Child inode number. */
56 xfs_ino_t ino;
57
58 /* Length of the pptr name. */
59 uint8_t namelen;
60 };
61
62 struct xchk_dir {
63 struct xfs_scrub *sc;
64
65 /* information for parent pointer validation. */
66 struct xfs_parent_rec pptr_rec;
67 struct xfs_da_args pptr_args;
68
69 /* Fixed-size array of xchk_dirent structures. */
70 struct xfarray *dir_entries;
71
72 /* Blobs containing dirent names. */
73 struct xfblob *dir_names;
74
75 /* If we've cycled the ILOCK, we must revalidate deferred dirents. */
76 bool need_revalidate;
77
78 /* Name buffer for dirent revalidation. */
79 struct xfs_name xname;
80 uint8_t namebuf[MAXNAMELEN];
81 };
82
83 /* Scrub a directory entry. */
84
85 /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */
86 STATIC void
xchk_dir_check_ftype(struct xfs_scrub * sc,xfs_fileoff_t offset,struct xfs_inode * ip,int ftype)87 xchk_dir_check_ftype(
88 struct xfs_scrub *sc,
89 xfs_fileoff_t offset,
90 struct xfs_inode *ip,
91 int ftype)
92 {
93 struct xfs_mount *mp = sc->mp;
94
95 if (!xfs_has_ftype(mp)) {
96 if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR)
97 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
98 return;
99 }
100
101 if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
102 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
103
104 /*
105 * Metadata and regular inodes cannot cross trees. This property
106 * cannot change without a full inode free and realloc cycle, so it's
107 * safe to check this without holding locks.
108 */
109 if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip))
110 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
111 }
112
113 /*
114 * Try to lock a child file for checking parent pointers. Returns the inode
115 * flags for the locks we now hold, or zero if we failed.
116 */
117 STATIC unsigned int
xchk_dir_lock_child(struct xfs_scrub * sc,struct xfs_inode * ip)118 xchk_dir_lock_child(
119 struct xfs_scrub *sc,
120 struct xfs_inode *ip)
121 {
122 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
123 return 0;
124
125 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
126 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
127 return 0;
128 }
129
130 if (!xfs_inode_has_attr_fork(ip) || !xfs_need_iread_extents(&ip->i_af))
131 return XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED;
132
133 xfs_iunlock(ip, XFS_ILOCK_SHARED);
134
135 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
136 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
137 return 0;
138 }
139
140 return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
141 }
142
143 /* Check the backwards link (parent pointer) associated with this dirent. */
144 STATIC int
xchk_dir_parent_pointer(struct xchk_dir * sd,const struct xfs_name * name,struct xfs_inode * ip)145 xchk_dir_parent_pointer(
146 struct xchk_dir *sd,
147 const struct xfs_name *name,
148 struct xfs_inode *ip)
149 {
150 struct xfs_scrub *sc = sd->sc;
151 int error;
152
153 xfs_inode_to_parent_rec(&sd->pptr_rec, sc->ip);
154 error = xfs_parent_lookup(sc->tp, ip, name, &sd->pptr_rec,
155 &sd->pptr_args);
156 if (error == -ENOATTR)
157 xchk_fblock_xref_set_corrupt(sc, XFS_DATA_FORK, 0);
158
159 return 0;
160 }
161
162 /* Look for a parent pointer matching this dirent, if the child isn't busy. */
163 STATIC int
xchk_dir_check_pptr_fast(struct xchk_dir * sd,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,struct xfs_inode * ip)164 xchk_dir_check_pptr_fast(
165 struct xchk_dir *sd,
166 xfs_dir2_dataptr_t dapos,
167 const struct xfs_name *name,
168 struct xfs_inode *ip)
169 {
170 struct xfs_scrub *sc = sd->sc;
171 unsigned int lockmode;
172 int error;
173
174 /* dot and dotdot entries do not have parent pointers */
175 if (xfs_dir2_samename(name, &xfs_name_dot) ||
176 xfs_dir2_samename(name, &xfs_name_dotdot))
177 return 0;
178
179 /* No self-referential non-dot or dotdot dirents. */
180 if (ip == sc->ip) {
181 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
182 return -ECANCELED;
183 }
184
185 /* Try to lock the inode. */
186 lockmode = xchk_dir_lock_child(sc, ip);
187 if (!lockmode) {
188 struct xchk_dirent save_de = {
189 .namelen = name->len,
190 .ino = ip->i_ino,
191 };
192
193 /* Couldn't lock the inode, so save the dirent for later. */
194 trace_xchk_dir_defer(sc->ip, name, ip->i_ino);
195
196 error = xfblob_storename(sd->dir_names, &save_de.name_cookie,
197 name);
198 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
199 &error))
200 return error;
201
202 error = xfarray_append(sd->dir_entries, &save_de);
203 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
204 &error))
205 return error;
206
207 return 0;
208 }
209
210 error = xchk_dir_parent_pointer(sd, name, ip);
211 xfs_iunlock(ip, lockmode);
212 return error;
213 }
214
215 /*
216 * Scrub a single directory entry.
217 *
218 * Check the inode number to make sure it's sane, then we check that we can
219 * look up this filename. Finally, we check the ftype.
220 */
221 STATIC int
xchk_dir_actor(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)222 xchk_dir_actor(
223 struct xfs_scrub *sc,
224 struct xfs_inode *dp,
225 xfs_dir2_dataptr_t dapos,
226 const struct xfs_name *name,
227 xfs_ino_t ino,
228 void *priv)
229 {
230 struct xfs_mount *mp = dp->i_mount;
231 struct xfs_inode *ip;
232 struct xchk_dir *sd = priv;
233 xfs_ino_t lookup_ino;
234 xfs_dablk_t offset;
235 int error = 0;
236
237 offset = xfs_dir2_db_to_da(mp->m_dir_geo,
238 xfs_dir2_dataptr_to_db(mp->m_dir_geo, dapos));
239
240 if (xchk_should_terminate(sc, &error))
241 return error;
242
243 /* Does this inode number make sense? */
244 if (!xfs_verify_dir_ino(mp, ino)) {
245 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
246 return -ECANCELED;
247 }
248
249 /* Does this name make sense? */
250 if (!xfs_dir2_namecheck(name->name, name->len)) {
251 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
252 return -ECANCELED;
253 }
254
255 if (xfs_dir2_samename(name, &xfs_name_dot)) {
256 /* If this is "." then check that the inum matches the dir. */
257 if (ino != dp->i_ino)
258 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
259 } else if (xfs_dir2_samename(name, &xfs_name_dotdot)) {
260 /*
261 * If this is ".." in the root inode, check that the inum
262 * matches this dir.
263 */
264 if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino)
265 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
266 }
267
268 /* Verify that we can look up this name by hash. */
269 error = xchk_dir_lookup(sc, dp, name, &lookup_ino);
270 /* ENOENT means the hash lookup failed and the dir is corrupt */
271 if (error == -ENOENT)
272 error = -EFSCORRUPTED;
273 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
274 goto out;
275 if (lookup_ino != ino) {
276 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
277 return -ECANCELED;
278 }
279
280 /*
281 * Grab the inode pointed to by the dirent. We release the inode
282 * before we cancel the scrub transaction.
283 *
284 * If _iget returns -EINVAL or -ENOENT then the child inode number is
285 * garbage and the directory is corrupt. If the _iget returns
286 * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
287 * cross referencing error. Any other error is an operational error.
288 */
289 error = xchk_iget(sc, ino, &ip);
290 if (error == -EINVAL || error == -ENOENT) {
291 error = -EFSCORRUPTED;
292 xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
293 goto out;
294 }
295 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error))
296 goto out;
297
298 xchk_dir_check_ftype(sc, offset, ip, name->type);
299
300 if (xfs_has_parent(mp)) {
301 error = xchk_dir_check_pptr_fast(sd, dapos, name, ip);
302 if (error)
303 goto out_rele;
304 }
305
306 out_rele:
307 xchk_irele(sc, ip);
308 out:
309 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
310 return -ECANCELED;
311 return error;
312 }
313
314 /* Scrub a directory btree record. */
315 STATIC int
xchk_dir_rec(struct xchk_da_btree * ds,int level)316 xchk_dir_rec(
317 struct xchk_da_btree *ds,
318 int level)
319 {
320 struct xfs_name dname = { };
321 struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
322 struct xfs_mount *mp = ds->state->mp;
323 struct xfs_inode *dp = ds->dargs.dp;
324 struct xfs_da_geometry *geo = mp->m_dir_geo;
325 struct xfs_dir2_data_entry *dent;
326 struct xfs_buf *bp;
327 struct xfs_dir2_leaf_entry *ent;
328 unsigned int end;
329 unsigned int iter_off;
330 xfs_ino_t ino;
331 xfs_dablk_t rec_bno;
332 xfs_dir2_db_t db;
333 xfs_dir2_data_aoff_t off;
334 xfs_dir2_dataptr_t ptr;
335 xfs_dahash_t calc_hash;
336 xfs_dahash_t hash;
337 struct xfs_dir3_icleaf_hdr hdr;
338 unsigned int tag;
339 int error;
340
341 ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC ||
342 blk->magic == XFS_DIR2_LEAFN_MAGIC);
343
344 xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr);
345 ent = hdr.ents + blk->index;
346
347 /* Check the hash of the entry. */
348 error = xchk_da_btree_hash(ds, level, &ent->hashval);
349 if (error)
350 goto out;
351
352 /* Valid hash pointer? */
353 ptr = be32_to_cpu(ent->address);
354 if (ptr == 0)
355 return 0;
356
357 /* Find the directory entry's location. */
358 db = xfs_dir2_dataptr_to_db(geo, ptr);
359 off = xfs_dir2_dataptr_to_off(geo, ptr);
360 rec_bno = xfs_dir2_db_to_da(geo, db);
361
362 if (rec_bno >= geo->leafblk) {
363 xchk_da_set_corrupt(ds, level);
364 goto out;
365 }
366 error = xfs_dir3_data_read(ds->dargs.trans, dp, ds->dargs.owner,
367 rec_bno, XFS_DABUF_MAP_HOLE_OK, &bp);
368 if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
369 &error))
370 goto out;
371 if (!bp) {
372 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
373 goto out;
374 }
375 xchk_buffer_recheck(ds->sc, bp);
376
377 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
378 goto out_relse;
379
380 dent = bp->b_addr + off;
381
382 /* Make sure we got a real directory entry. */
383 iter_off = geo->data_entry_offset;
384 end = xfs_dir3_data_end_offset(geo, bp->b_addr);
385 if (!end) {
386 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
387 goto out_relse;
388 }
389 for (;;) {
390 struct xfs_dir2_data_entry *dep = bp->b_addr + iter_off;
391 struct xfs_dir2_data_unused *dup = bp->b_addr + iter_off;
392
393 if (iter_off >= end) {
394 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
395 goto out_relse;
396 }
397
398 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
399 iter_off += be16_to_cpu(dup->length);
400 continue;
401 }
402 if (dep == dent)
403 break;
404 iter_off += xfs_dir2_data_entsize(mp, dep->namelen);
405 }
406
407 /* Retrieve the entry, sanity check it, and compare hashes. */
408 ino = be64_to_cpu(dent->inumber);
409 hash = be32_to_cpu(ent->hashval);
410 tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent));
411 if (!xfs_verify_dir_ino(mp, ino) || tag != off)
412 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
413 if (dent->namelen == 0) {
414 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
415 goto out_relse;
416 }
417
418 /* Does the directory hash match? */
419 dname.name = dent->name;
420 dname.len = dent->namelen;
421 calc_hash = xfs_dir2_hashname(mp, &dname);
422 if (calc_hash != hash)
423 xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
424
425 out_relse:
426 xfs_trans_brelse(ds->dargs.trans, bp);
427 out:
428 return error;
429 }
430
431 /*
432 * Is this unused entry either in the bestfree or smaller than all of
433 * them? We've already checked that the bestfrees are sorted longest to
434 * shortest, and that there aren't any bogus entries.
435 */
436 STATIC void
xchk_directory_check_free_entry(struct xfs_scrub * sc,xfs_dablk_t lblk,struct xfs_dir2_data_free * bf,struct xfs_dir2_data_unused * dup)437 xchk_directory_check_free_entry(
438 struct xfs_scrub *sc,
439 xfs_dablk_t lblk,
440 struct xfs_dir2_data_free *bf,
441 struct xfs_dir2_data_unused *dup)
442 {
443 struct xfs_dir2_data_free *dfp;
444 unsigned int dup_length;
445
446 dup_length = be16_to_cpu(dup->length);
447
448 /* Unused entry is shorter than any of the bestfrees */
449 if (dup_length < be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
450 return;
451
452 for (dfp = &bf[XFS_DIR2_DATA_FD_COUNT - 1]; dfp >= bf; dfp--)
453 if (dup_length == be16_to_cpu(dfp->length))
454 return;
455
456 /* Unused entry should be in the bestfrees but wasn't found. */
457 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
458 }
459
460 /* Check free space info in a directory data block. */
461 STATIC int
xchk_directory_data_bestfree(struct xfs_scrub * sc,xfs_dablk_t lblk,bool is_block)462 xchk_directory_data_bestfree(
463 struct xfs_scrub *sc,
464 xfs_dablk_t lblk,
465 bool is_block)
466 {
467 struct xfs_dir2_data_unused *dup;
468 struct xfs_dir2_data_free *dfp;
469 struct xfs_buf *bp;
470 struct xfs_dir2_data_free *bf;
471 struct xfs_mount *mp = sc->mp;
472 u16 tag;
473 unsigned int nr_bestfrees = 0;
474 unsigned int nr_frees = 0;
475 unsigned int smallest_bestfree;
476 int newlen;
477 unsigned int offset;
478 unsigned int end;
479 int error;
480
481 if (is_block) {
482 /* dir block format */
483 if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
484 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
485 error = xfs_dir3_block_read(sc->tp, sc->ip, sc->ip->i_ino, &bp);
486 } else {
487 /* dir data format */
488 error = xfs_dir3_data_read(sc->tp, sc->ip, sc->ip->i_ino, lblk,
489 0, &bp);
490 }
491 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
492 goto out;
493 xchk_buffer_recheck(sc, bp);
494
495 if (xfs_has_crc(sc->mp)) {
496 struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
497
498 if (hdr3->pad)
499 xchk_fblock_set_preen(sc, XFS_DATA_FORK, lblk);
500 }
501
502 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
503 goto out_buf;
504
505 /* Do the bestfrees correspond to actual free space? */
506 bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
507 smallest_bestfree = UINT_MAX;
508 for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
509 offset = be16_to_cpu(dfp->offset);
510 if (offset == 0)
511 continue;
512 if (offset >= mp->m_dir_geo->blksize) {
513 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
514 goto out_buf;
515 }
516 dup = bp->b_addr + offset;
517 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
518
519 /* bestfree doesn't match the entry it points at? */
520 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
521 be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
522 tag != offset) {
523 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
524 goto out_buf;
525 }
526
527 /* bestfree records should be ordered largest to smallest */
528 if (smallest_bestfree < be16_to_cpu(dfp->length)) {
529 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
530 goto out_buf;
531 }
532
533 smallest_bestfree = be16_to_cpu(dfp->length);
534 nr_bestfrees++;
535 }
536
537 /* Make sure the bestfrees are actually the best free spaces. */
538 offset = mp->m_dir_geo->data_entry_offset;
539 end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr);
540
541 /* Iterate the entries, stopping when we hit or go past the end. */
542 while (offset < end) {
543 dup = bp->b_addr + offset;
544
545 /* Skip real entries */
546 if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
547 struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
548
549 newlen = xfs_dir2_data_entsize(mp, dep->namelen);
550 if (newlen <= 0) {
551 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
552 lblk);
553 goto out_buf;
554 }
555 offset += newlen;
556 continue;
557 }
558
559 /* Spot check this free entry */
560 tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
561 if (tag != offset) {
562 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
563 goto out_buf;
564 }
565
566 /*
567 * Either this entry is a bestfree or it's smaller than
568 * any of the bestfrees.
569 */
570 xchk_directory_check_free_entry(sc, lblk, bf, dup);
571 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
572 goto out_buf;
573
574 /* Move on. */
575 newlen = be16_to_cpu(dup->length);
576 if (newlen <= 0) {
577 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
578 goto out_buf;
579 }
580 offset += newlen;
581 if (offset <= end)
582 nr_frees++;
583 }
584
585 /* We're required to fill all the space. */
586 if (offset != end)
587 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
588
589 /* Did we see at least as many free slots as there are bestfrees? */
590 if (nr_frees < nr_bestfrees)
591 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
592 out_buf:
593 xfs_trans_brelse(sc->tp, bp);
594 out:
595 return error;
596 }
597
598 /*
599 * Does the free space length in the free space index block ($len) match
600 * the longest length in the directory data block's bestfree array?
601 * Assume that we've already checked that the data block's bestfree
602 * array is in order.
603 */
604 STATIC void
xchk_directory_check_freesp(struct xfs_scrub * sc,xfs_dablk_t lblk,struct xfs_buf * dbp,unsigned int len)605 xchk_directory_check_freesp(
606 struct xfs_scrub *sc,
607 xfs_dablk_t lblk,
608 struct xfs_buf *dbp,
609 unsigned int len)
610 {
611 struct xfs_dir2_data_free *dfp;
612
613 dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr);
614
615 if (len != be16_to_cpu(dfp->length))
616 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
617
618 if (len > 0 && be16_to_cpu(dfp->offset) == 0)
619 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
620 }
621
622 /* Check free space info in a directory leaf1 block. */
623 STATIC int
xchk_directory_leaf1_bestfree(struct xfs_scrub * sc,struct xfs_da_args * args,xfs_dir2_db_t last_data_db,xfs_dablk_t lblk)624 xchk_directory_leaf1_bestfree(
625 struct xfs_scrub *sc,
626 struct xfs_da_args *args,
627 xfs_dir2_db_t last_data_db,
628 xfs_dablk_t lblk)
629 {
630 struct xfs_dir3_icleaf_hdr leafhdr;
631 struct xfs_dir2_leaf_tail *ltp;
632 struct xfs_dir2_leaf *leaf;
633 struct xfs_buf *dbp;
634 struct xfs_buf *bp;
635 struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
636 __be16 *bestp;
637 __u16 best;
638 __u32 hash;
639 __u32 lasthash = 0;
640 __u32 bestcount;
641 unsigned int stale = 0;
642 int i;
643 int error;
644
645 /* Read the free space block. */
646 error = xfs_dir3_leaf_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
647 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
648 return error;
649 xchk_buffer_recheck(sc, bp);
650
651 leaf = bp->b_addr;
652 xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf);
653 ltp = xfs_dir2_leaf_tail_p(geo, leaf);
654 bestcount = be32_to_cpu(ltp->bestcount);
655 bestp = xfs_dir2_leaf_bests_p(ltp);
656
657 if (xfs_has_crc(sc->mp)) {
658 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
659
660 if (hdr3->pad != cpu_to_be32(0))
661 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
662 }
663
664 /*
665 * There must be enough bestfree slots to cover all the directory data
666 * blocks that we scanned. It is possible for there to be a hole
667 * between the last data block and i_disk_size. This seems like an
668 * oversight to the scrub author, but as we have been writing out
669 * directories like this (and xfs_repair doesn't mind them) for years,
670 * that's what we have to check.
671 */
672 if (bestcount != last_data_db + 1) {
673 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
674 goto out;
675 }
676
677 /* Is the leaf count even remotely sane? */
678 if (leafhdr.count > geo->leaf_max_ents) {
679 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
680 goto out;
681 }
682
683 /* Leaves and bests don't overlap in leaf format. */
684 if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) {
685 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
686 goto out;
687 }
688
689 /* Check hash value order, count stale entries. */
690 for (i = 0; i < leafhdr.count; i++) {
691 hash = be32_to_cpu(leafhdr.ents[i].hashval);
692 if (i > 0 && lasthash > hash)
693 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
694 lasthash = hash;
695 if (leafhdr.ents[i].address ==
696 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
697 stale++;
698 }
699 if (leafhdr.stale != stale)
700 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
701 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
702 goto out;
703
704 /* Check all the bestfree entries. */
705 for (i = 0; i < bestcount; i++, bestp++) {
706 best = be16_to_cpu(*bestp);
707 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
708 xfs_dir2_db_to_da(args->geo, i),
709 XFS_DABUF_MAP_HOLE_OK, &dbp);
710 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
711 &error))
712 break;
713
714 if (!dbp) {
715 if (best != NULLDATAOFF) {
716 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
717 lblk);
718 break;
719 }
720 continue;
721 }
722
723 if (best == NULLDATAOFF)
724 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
725 else
726 xchk_directory_check_freesp(sc, lblk, dbp, best);
727 xfs_trans_brelse(sc->tp, dbp);
728 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
729 break;
730 }
731 out:
732 xfs_trans_brelse(sc->tp, bp);
733 return error;
734 }
735
736 /* Check free space info in a directory freespace block. */
737 STATIC int
xchk_directory_free_bestfree(struct xfs_scrub * sc,struct xfs_da_args * args,xfs_dablk_t lblk)738 xchk_directory_free_bestfree(
739 struct xfs_scrub *sc,
740 struct xfs_da_args *args,
741 xfs_dablk_t lblk)
742 {
743 struct xfs_dir3_icfree_hdr freehdr;
744 struct xfs_buf *dbp;
745 struct xfs_buf *bp;
746 __u16 best;
747 unsigned int stale = 0;
748 int i;
749 int error;
750
751 /* Read the free space block */
752 error = xfs_dir2_free_read(sc->tp, sc->ip, sc->ip->i_ino, lblk, &bp);
753 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
754 return error;
755 xchk_buffer_recheck(sc, bp);
756
757 if (xfs_has_crc(sc->mp)) {
758 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
759
760 if (hdr3->pad != cpu_to_be32(0))
761 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
762 }
763
764 /* Check all the entries. */
765 xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr);
766 for (i = 0; i < freehdr.nvalid; i++) {
767 best = be16_to_cpu(freehdr.bests[i]);
768 if (best == NULLDATAOFF) {
769 stale++;
770 continue;
771 }
772 error = xfs_dir3_data_read(sc->tp, sc->ip, args->owner,
773 (freehdr.firstdb + i) * args->geo->fsbcount,
774 0, &dbp);
775 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
776 &error))
777 goto out;
778 xchk_directory_check_freesp(sc, lblk, dbp, best);
779 xfs_trans_brelse(sc->tp, dbp);
780 }
781
782 if (freehdr.nused + stale != freehdr.nvalid)
783 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
784 out:
785 xfs_trans_brelse(sc->tp, bp);
786 return error;
787 }
788
789 /* Check free space information in directories. */
790 STATIC int
xchk_directory_blocks(struct xfs_scrub * sc)791 xchk_directory_blocks(
792 struct xfs_scrub *sc)
793 {
794 struct xfs_bmbt_irec got;
795 struct xfs_da_args args = {
796 .dp = sc->ip,
797 .whichfork = XFS_DATA_FORK,
798 .geo = sc->mp->m_dir_geo,
799 .trans = sc->tp,
800 .owner = sc->ip->i_ino,
801 };
802 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
803 struct xfs_mount *mp = sc->mp;
804 xfs_fileoff_t leaf_lblk;
805 xfs_fileoff_t free_lblk;
806 xfs_fileoff_t lblk;
807 struct xfs_iext_cursor icur;
808 xfs_dablk_t dabno;
809 xfs_dir2_db_t last_data_db = 0;
810 bool found;
811 bool is_block = false;
812 int error;
813
814 /* Ignore local format directories. */
815 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS &&
816 ifp->if_format != XFS_DINODE_FMT_BTREE)
817 return 0;
818
819 lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
820 leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
821 free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
822
823 /* Is this a block dir? */
824 if (xfs_dir2_format(&args, &error) == XFS_DIR2_FMT_BLOCK)
825 is_block = true;
826 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
827 goto out;
828
829 /* Iterate all the data extents in the directory... */
830 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
831 while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
832 /* No more data blocks... */
833 if (got.br_startoff >= leaf_lblk)
834 break;
835
836 /*
837 * Check each data block's bestfree data.
838 *
839 * Iterate all the fsbcount-aligned block offsets in
840 * this directory. The directory block reading code is
841 * smart enough to do its own bmap lookups to handle
842 * discontiguous directory blocks. When we're done
843 * with the extent record, re-query the bmap at the
844 * next fsbcount-aligned offset to avoid redundant
845 * block checks.
846 */
847 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
848 args.geo->fsbcount);
849 lblk < got.br_startoff + got.br_blockcount;
850 lblk += args.geo->fsbcount) {
851 last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
852 error = xchk_directory_data_bestfree(sc, lblk,
853 is_block);
854 if (error)
855 goto out;
856 }
857 dabno = got.br_startoff + got.br_blockcount;
858 lblk = roundup(dabno, args.geo->fsbcount);
859 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
860 }
861
862 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
863 goto out;
864
865 /* Look for a leaf1 block, which has free info. */
866 if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &icur, &got) &&
867 got.br_startoff == leaf_lblk &&
868 got.br_blockcount == args.geo->fsbcount &&
869 !xfs_iext_next_extent(ifp, &icur, &got)) {
870 if (is_block) {
871 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
872 goto out;
873 }
874 error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
875 leaf_lblk);
876 if (error)
877 goto out;
878 }
879
880 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
881 goto out;
882
883 /* Scan for free blocks */
884 lblk = free_lblk;
885 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
886 while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
887 /*
888 * Dirs can't have blocks mapped above 2^32.
889 * Single-block dirs shouldn't even be here.
890 */
891 lblk = got.br_startoff;
892 if (lblk & ~0xFFFFFFFFULL) {
893 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
894 goto out;
895 }
896 if (is_block) {
897 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
898 goto out;
899 }
900
901 /*
902 * Check each dir free block's bestfree data.
903 *
904 * Iterate all the fsbcount-aligned block offsets in
905 * this directory. The directory block reading code is
906 * smart enough to do its own bmap lookups to handle
907 * discontiguous directory blocks. When we're done
908 * with the extent record, re-query the bmap at the
909 * next fsbcount-aligned offset to avoid redundant
910 * block checks.
911 */
912 for (lblk = roundup((xfs_dablk_t)got.br_startoff,
913 args.geo->fsbcount);
914 lblk < got.br_startoff + got.br_blockcount;
915 lblk += args.geo->fsbcount) {
916 error = xchk_directory_free_bestfree(sc, &args,
917 lblk);
918 if (error)
919 goto out;
920 }
921 dabno = got.br_startoff + got.br_blockcount;
922 lblk = roundup(dabno, args.geo->fsbcount);
923 found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
924 }
925 out:
926 return error;
927 }
928
929 /*
930 * Revalidate a dirent that we collected in the past but couldn't check because
931 * of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it
932 * has gone away on us, or a negative errno.
933 */
934 STATIC int
xchk_dir_revalidate_dirent(struct xchk_dir * sd,const struct xfs_name * xname,xfs_ino_t ino)935 xchk_dir_revalidate_dirent(
936 struct xchk_dir *sd,
937 const struct xfs_name *xname,
938 xfs_ino_t ino)
939 {
940 struct xfs_scrub *sc = sd->sc;
941 xfs_ino_t child_ino;
942 int error;
943
944 /*
945 * Look up the directory entry. If we get -ENOENT, the directory entry
946 * went away and there's nothing to revalidate. Return any other
947 * error.
948 */
949 error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino);
950 if (error)
951 return error;
952
953 /* The inode number changed, nothing to revalidate. */
954 if (ino != child_ino)
955 return -ENOENT;
956
957 return 0;
958 }
959
960 /*
961 * Check a directory entry's parent pointers the slow way, which means we cycle
962 * locks a bunch and put up with revalidation until we get it done.
963 */
964 STATIC int
xchk_dir_slow_dirent(struct xchk_dir * sd,struct xchk_dirent * dirent,const struct xfs_name * xname)965 xchk_dir_slow_dirent(
966 struct xchk_dir *sd,
967 struct xchk_dirent *dirent,
968 const struct xfs_name *xname)
969 {
970 struct xfs_scrub *sc = sd->sc;
971 struct xfs_inode *ip;
972 unsigned int lockmode;
973 int error;
974
975 /* Check that the deferred dirent still exists. */
976 if (sd->need_revalidate) {
977 error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
978 if (error == -ENOENT)
979 return 0;
980 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
981 &error))
982 return error;
983 }
984
985 error = xchk_iget(sc, dirent->ino, &ip);
986 if (error == -EINVAL || error == -ENOENT) {
987 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
988 return 0;
989 }
990 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
991 return error;
992
993 /*
994 * If we can grab both IOLOCK and ILOCK of the alleged child, we can
995 * proceed with the validation.
996 */
997 lockmode = xchk_dir_lock_child(sc, ip);
998 if (lockmode) {
999 trace_xchk_dir_slowpath(sc->ip, xname, ip->i_ino);
1000 goto check_pptr;
1001 }
1002
1003 /*
1004 * We couldn't lock the child file. Drop all the locks and try to
1005 * get them again, one at a time.
1006 */
1007 xchk_iunlock(sc, sc->ilock_flags);
1008 sd->need_revalidate = true;
1009
1010 trace_xchk_dir_ultraslowpath(sc->ip, xname, ip->i_ino);
1011
1012 error = xchk_dir_trylock_for_pptrs(sc, ip, &lockmode);
1013 if (error)
1014 goto out_rele;
1015
1016 /* Revalidate, since we just cycled the locks. */
1017 error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
1018 if (error == -ENOENT) {
1019 error = 0;
1020 goto out_unlock;
1021 }
1022 if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
1023 goto out_unlock;
1024
1025 check_pptr:
1026 error = xchk_dir_parent_pointer(sd, xname, ip);
1027 out_unlock:
1028 xfs_iunlock(ip, lockmode);
1029 out_rele:
1030 xchk_irele(sc, ip);
1031 return error;
1032 }
1033
1034 /* Check all the dirents that we deferred the first time around. */
1035 STATIC int
xchk_dir_finish_slow_dirents(struct xchk_dir * sd)1036 xchk_dir_finish_slow_dirents(
1037 struct xchk_dir *sd)
1038 {
1039 xfarray_idx_t array_cur;
1040 int error;
1041
1042 foreach_xfarray_idx(sd->dir_entries, array_cur) {
1043 struct xchk_dirent dirent;
1044
1045 if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1046 return 0;
1047
1048 error = xfarray_load(sd->dir_entries, array_cur, &dirent);
1049 if (error)
1050 return error;
1051
1052 error = xfblob_loadname(sd->dir_names, dirent.name_cookie,
1053 &sd->xname, dirent.namelen);
1054 if (error)
1055 return error;
1056
1057 error = xchk_dir_slow_dirent(sd, &dirent, &sd->xname);
1058 if (error)
1059 return error;
1060 }
1061
1062 return 0;
1063 }
1064
1065 /* Scrub a whole directory. */
1066 int
xchk_directory(struct xfs_scrub * sc)1067 xchk_directory(
1068 struct xfs_scrub *sc)
1069 {
1070 struct xchk_dir *sd;
1071 int error;
1072
1073 if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
1074 return -ENOENT;
1075
1076 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_DIR_ZAPPED)) {
1077 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
1078 return 0;
1079 }
1080
1081 /* Plausible size? */
1082 if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
1083 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1084 return 0;
1085 }
1086
1087 /* Check directory tree structure */
1088 error = xchk_da_btree(sc, XFS_DATA_FORK, xchk_dir_rec, NULL);
1089 if (error)
1090 return error;
1091
1092 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1093 return 0;
1094
1095 /* Check the freespace. */
1096 error = xchk_directory_blocks(sc);
1097 if (error)
1098 return error;
1099
1100 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1101 return 0;
1102
1103 sd = kvzalloc_obj(struct xchk_dir, XCHK_GFP_FLAGS);
1104 if (!sd)
1105 return -ENOMEM;
1106 sd->sc = sc;
1107 sd->xname.name = sd->namebuf;
1108
1109 if (xfs_has_parent(sc->mp)) {
1110 /*
1111 * Set up some staging memory for dirents that we can't check
1112 * due to locking contention.
1113 */
1114 error = xfarray_create("slow directory entries", 0,
1115 sizeof(struct xchk_dirent), &sd->dir_entries);
1116 if (error)
1117 goto out_sd;
1118
1119 error = xfblob_create("slow directory entry names",
1120 &sd->dir_names);
1121 if (error)
1122 goto out_entries;
1123 }
1124
1125 /* Look up every name in this directory by hash. */
1126 error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
1127 if (error == -ECANCELED)
1128 error = 0;
1129 if (error)
1130 goto out_names;
1131
1132 if (xfs_has_parent(sc->mp)) {
1133 error = xchk_dir_finish_slow_dirents(sd);
1134 if (error == -ETIMEDOUT) {
1135 /* Couldn't grab a lock, scrub was marked incomplete */
1136 error = 0;
1137 goto out_names;
1138 }
1139 if (error)
1140 goto out_names;
1141 }
1142
1143 out_names:
1144 if (sd->dir_names)
1145 xfblob_destroy(sd->dir_names);
1146 out_entries:
1147 if (sd->dir_entries)
1148 xfarray_destroy(sd->dir_entries);
1149 out_sd:
1150 kvfree(sd);
1151 if (error)
1152 return error;
1153
1154 /* If the dir is clean, it is clearly not zapped. */
1155 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_DIR_ZAPPED);
1156 return 0;
1157 }
1158
1159 /*
1160 * Decide if this directory has been zapped to satisfy the inode and ifork
1161 * verifiers. Checking and repairing should be postponed until the directory
1162 * is fixed.
1163 */
1164 bool
xchk_dir_looks_zapped(struct xfs_inode * dp)1165 xchk_dir_looks_zapped(
1166 struct xfs_inode *dp)
1167 {
1168 /* Repair zapped this dir's data fork a short time ago */
1169 if (xfs_ifork_zapped(dp, XFS_DATA_FORK))
1170 return true;
1171
1172 /*
1173 * If the dinode repair found a bad data fork, it will reset the fork
1174 * to extents format with zero records and wait for the bmapbtd
1175 * scrubber to reconstruct the block mappings. Directories always
1176 * contain some content, so this is a clear sign of a zapped directory.
1177 * The state checked by xfs_ifork_zapped is not persisted, so this is
1178 * the secondary strategy if repairs are interrupted by a crash or an
1179 * unmount.
1180 */
1181 return dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS &&
1182 dp->i_df.if_nextents == 0;
1183 }
1184