1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22
23 /* Directory/Attribute Btree */
24
25 /*
26 * Check for da btree operation errors. See the section about handling
27 * operational errors in common.c.
28 */
29 bool
xchk_da_process_error(struct xchk_da_btree * ds,int level,int * error)30 xchk_da_process_error(
31 struct xchk_da_btree *ds,
32 int level,
33 int *error)
34 {
35 struct xfs_scrub *sc = ds->sc;
36
37 if (*error == 0)
38 return true;
39
40 switch (*error) {
41 case -EDEADLOCK:
42 case -ECHRNG:
43 /* Used to restart an op with deadlock avoidance. */
44 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
45 break;
46 case -EFSBADCRC:
47 case -EFSCORRUPTED:
48 case -EIO:
49 case -ENODATA:
50 /* Note the badness but don't abort. */
51 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
52 *error = 0;
53 fallthrough;
54 default:
55 trace_xchk_file_op_error(sc, ds->dargs.whichfork,
56 xfs_dir2_da_to_db(ds->dargs.geo,
57 ds->state->path.blk[level].blkno),
58 *error, __return_address);
59 break;
60 }
61 return false;
62 }
63
64 /*
65 * Check for da btree corruption. See the section about handling
66 * operational errors in common.c.
67 */
68 void
xchk_da_set_corrupt(struct xchk_da_btree * ds,int level)69 xchk_da_set_corrupt(
70 struct xchk_da_btree *ds,
71 int level)
72 {
73 struct xfs_scrub *sc = ds->sc;
74
75 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
76
77 trace_xchk_fblock_error(sc, ds->dargs.whichfork,
78 xfs_dir2_da_to_db(ds->dargs.geo,
79 ds->state->path.blk[level].blkno),
80 __return_address);
81 }
82
83 /* Flag a da btree node in need of optimization. */
84 void
xchk_da_set_preen(struct xchk_da_btree * ds,int level)85 xchk_da_set_preen(
86 struct xchk_da_btree *ds,
87 int level)
88 {
89 struct xfs_scrub *sc = ds->sc;
90
91 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
92 trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
93 xfs_dir2_da_to_db(ds->dargs.geo,
94 ds->state->path.blk[level].blkno),
95 __return_address);
96 }
97
98 /* Find an entry at a certain level in a da btree. */
99 static struct xfs_da_node_entry *
xchk_da_btree_node_entry(struct xchk_da_btree * ds,int level)100 xchk_da_btree_node_entry(
101 struct xchk_da_btree *ds,
102 int level)
103 {
104 struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
105 struct xfs_da3_icnode_hdr hdr;
106
107 ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
108
109 xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
110 return hdr.btree + blk->index;
111 }
112
113 /* Scrub a da btree hash (key). */
114 int
xchk_da_btree_hash(struct xchk_da_btree * ds,int level,__be32 * hashp)115 xchk_da_btree_hash(
116 struct xchk_da_btree *ds,
117 int level,
118 __be32 *hashp)
119 {
120 struct xfs_da_node_entry *entry;
121 xfs_dahash_t hash;
122 xfs_dahash_t parent_hash;
123
124 /* Is this hash in order? */
125 hash = be32_to_cpu(*hashp);
126 if (hash < ds->hashes[level])
127 xchk_da_set_corrupt(ds, level);
128 ds->hashes[level] = hash;
129
130 if (level == 0)
131 return 0;
132
133 /* Is this hash no larger than the parent hash? */
134 entry = xchk_da_btree_node_entry(ds, level - 1);
135 parent_hash = be32_to_cpu(entry->hashval);
136 if (parent_hash < hash)
137 xchk_da_set_corrupt(ds, level);
138
139 return 0;
140 }
141
142 /*
143 * Check a da btree pointer. Returns true if it's ok to use this
144 * pointer.
145 */
146 STATIC bool
xchk_da_btree_ptr_ok(struct xchk_da_btree * ds,int level,xfs_dablk_t blkno)147 xchk_da_btree_ptr_ok(
148 struct xchk_da_btree *ds,
149 int level,
150 xfs_dablk_t blkno)
151 {
152 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
153 xchk_da_set_corrupt(ds, level);
154 return false;
155 }
156
157 return true;
158 }
159
160 /*
161 * The da btree scrubber can handle leaf1 blocks as a degenerate
162 * form of leafn blocks. Since the regular da code doesn't handle
163 * leaf1, we must multiplex the verifiers.
164 */
165 static void
xchk_da_btree_read_verify(struct xfs_buf * bp)166 xchk_da_btree_read_verify(
167 struct xfs_buf *bp)
168 {
169 struct xfs_da_blkinfo *info = bp->b_addr;
170
171 switch (be16_to_cpu(info->magic)) {
172 case XFS_DIR2_LEAF1_MAGIC:
173 case XFS_DIR3_LEAF1_MAGIC:
174 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
175 bp->b_ops->verify_read(bp);
176 return;
177 default:
178 /*
179 * xfs_da3_node_buf_ops already know how to handle
180 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
181 */
182 bp->b_ops = &xfs_da3_node_buf_ops;
183 bp->b_ops->verify_read(bp);
184 return;
185 }
186 }
187 static void
xchk_da_btree_write_verify(struct xfs_buf * bp)188 xchk_da_btree_write_verify(
189 struct xfs_buf *bp)
190 {
191 struct xfs_da_blkinfo *info = bp->b_addr;
192
193 switch (be16_to_cpu(info->magic)) {
194 case XFS_DIR2_LEAF1_MAGIC:
195 case XFS_DIR3_LEAF1_MAGIC:
196 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
197 bp->b_ops->verify_write(bp);
198 return;
199 default:
200 /*
201 * xfs_da3_node_buf_ops already know how to handle
202 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
203 */
204 bp->b_ops = &xfs_da3_node_buf_ops;
205 bp->b_ops->verify_write(bp);
206 return;
207 }
208 }
209 static void *
xchk_da_btree_verify(struct xfs_buf * bp)210 xchk_da_btree_verify(
211 struct xfs_buf *bp)
212 {
213 struct xfs_da_blkinfo *info = bp->b_addr;
214
215 switch (be16_to_cpu(info->magic)) {
216 case XFS_DIR2_LEAF1_MAGIC:
217 case XFS_DIR3_LEAF1_MAGIC:
218 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
219 return bp->b_ops->verify_struct(bp);
220 default:
221 bp->b_ops = &xfs_da3_node_buf_ops;
222 return bp->b_ops->verify_struct(bp);
223 }
224 }
225
226 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
227 .name = "xchk_da_btree",
228 .verify_read = xchk_da_btree_read_verify,
229 .verify_write = xchk_da_btree_write_verify,
230 .verify_struct = xchk_da_btree_verify,
231 };
232
233 /* Check a block's sibling. */
234 STATIC int
xchk_da_btree_block_check_sibling(struct xchk_da_btree * ds,int level,int direction,xfs_dablk_t sibling)235 xchk_da_btree_block_check_sibling(
236 struct xchk_da_btree *ds,
237 int level,
238 int direction,
239 xfs_dablk_t sibling)
240 {
241 struct xfs_da_state_path *path = &ds->state->path;
242 struct xfs_da_state_path *altpath = &ds->state->altpath;
243 int retval;
244 int plevel;
245 int error;
246
247 memcpy(altpath, path, sizeof(ds->state->altpath));
248
249 /*
250 * If the pointer is null, we shouldn't be able to move the upper
251 * level pointer anywhere.
252 */
253 if (sibling == 0) {
254 error = xfs_da3_path_shift(ds->state, altpath, direction,
255 false, &retval);
256 if (error == 0 && retval == 0)
257 xchk_da_set_corrupt(ds, level);
258 error = 0;
259 goto out;
260 }
261
262 /* Move the alternate cursor one block in the direction given. */
263 error = xfs_da3_path_shift(ds->state, altpath, direction, false,
264 &retval);
265 if (!xchk_da_process_error(ds, level, &error))
266 goto out;
267 if (retval) {
268 xchk_da_set_corrupt(ds, level);
269 goto out;
270 }
271 if (altpath->blk[level].bp)
272 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
273
274 /* Compare upper level pointer to sibling pointer. */
275 if (altpath->blk[level].blkno != sibling)
276 xchk_da_set_corrupt(ds, level);
277
278 out:
279 /* Free all buffers in the altpath that aren't referenced from path. */
280 for (plevel = 0; plevel < altpath->active; plevel++) {
281 if (altpath->blk[plevel].bp == NULL ||
282 (plevel < path->active &&
283 altpath->blk[plevel].bp == path->blk[plevel].bp))
284 continue;
285
286 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
287 altpath->blk[plevel].bp = NULL;
288 }
289
290 return error;
291 }
292
293 /* Check a block's sibling pointers. */
294 STATIC int
xchk_da_btree_block_check_siblings(struct xchk_da_btree * ds,int level,struct xfs_da_blkinfo * hdr)295 xchk_da_btree_block_check_siblings(
296 struct xchk_da_btree *ds,
297 int level,
298 struct xfs_da_blkinfo *hdr)
299 {
300 xfs_dablk_t forw;
301 xfs_dablk_t back;
302 int error = 0;
303
304 forw = be32_to_cpu(hdr->forw);
305 back = be32_to_cpu(hdr->back);
306
307 /* Top level blocks should not have sibling pointers. */
308 if (level == 0) {
309 if (forw != 0 || back != 0)
310 xchk_da_set_corrupt(ds, level);
311 return 0;
312 }
313
314 /*
315 * Check back (left) and forw (right) pointers. These functions
316 * absorb error codes for us.
317 */
318 error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
319 if (error)
320 goto out;
321 error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
322
323 out:
324 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
325 return error;
326 }
327
328 /* Load a dir/attribute block from a btree. */
329 STATIC int
xchk_da_btree_block(struct xchk_da_btree * ds,int level,xfs_dablk_t blkno)330 xchk_da_btree_block(
331 struct xchk_da_btree *ds,
332 int level,
333 xfs_dablk_t blkno)
334 {
335 struct xfs_da_state_blk *blk;
336 struct xfs_da_intnode *node;
337 struct xfs_da_node_entry *btree;
338 struct xfs_da3_blkinfo *hdr3;
339 struct xfs_da_args *dargs = &ds->dargs;
340 struct xfs_inode *ip = ds->dargs.dp;
341 xfs_failaddr_t fa;
342 xfs_ino_t owner;
343 int *pmaxrecs;
344 struct xfs_da3_icnode_hdr nodehdr;
345 int error = 0;
346
347 blk = &ds->state->path.blk[level];
348 ds->state->path.active = level + 1;
349
350 /* Release old block. */
351 if (blk->bp) {
352 xfs_trans_brelse(dargs->trans, blk->bp);
353 blk->bp = NULL;
354 }
355
356 /* Check the pointer. */
357 blk->blkno = blkno;
358 if (!xchk_da_btree_ptr_ok(ds, level, blkno))
359 goto out_nobuf;
360
361 /* Read the buffer. */
362 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
363 XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
364 &xchk_da_btree_buf_ops);
365 if (!xchk_da_process_error(ds, level, &error))
366 goto out_nobuf;
367 if (blk->bp)
368 xchk_buffer_recheck(ds->sc, blk->bp);
369
370 /*
371 * We didn't find a dir btree root block, which means that
372 * there's no LEAF1/LEAFN tree (at least not where it's supposed
373 * to be), so jump out now.
374 */
375 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
376 blk->bp == NULL)
377 goto out_nobuf;
378
379 /* It's /not/ ok for attr trees not to have a da btree. */
380 if (blk->bp == NULL) {
381 xchk_da_set_corrupt(ds, level);
382 goto out_nobuf;
383 }
384
385 hdr3 = blk->bp->b_addr;
386 blk->magic = be16_to_cpu(hdr3->hdr.magic);
387 pmaxrecs = &ds->maxrecs[level];
388
389 /* We only started zeroing the header on v5 filesystems. */
390 if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
391 xchk_da_set_corrupt(ds, level);
392
393 /* Check the owner. */
394 if (xfs_has_crc(ip->i_mount)) {
395 owner = be64_to_cpu(hdr3->owner);
396 if (owner != ip->i_ino)
397 xchk_da_set_corrupt(ds, level);
398 }
399
400 /* Check the siblings. */
401 error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
402 if (error)
403 goto out;
404
405 /* Interpret the buffer. */
406 switch (blk->magic) {
407 case XFS_ATTR_LEAF_MAGIC:
408 case XFS_ATTR3_LEAF_MAGIC:
409 xfs_trans_buf_set_type(dargs->trans, blk->bp,
410 XFS_BLFT_ATTR_LEAF_BUF);
411 blk->magic = XFS_ATTR_LEAF_MAGIC;
412 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
413 if (ds->tree_level != 0)
414 xchk_da_set_corrupt(ds, level);
415 break;
416 case XFS_DIR2_LEAFN_MAGIC:
417 case XFS_DIR3_LEAFN_MAGIC:
418 xfs_trans_buf_set_type(dargs->trans, blk->bp,
419 XFS_BLFT_DIR_LEAFN_BUF);
420 blk->magic = XFS_DIR2_LEAFN_MAGIC;
421 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
422 if (ds->tree_level != 0)
423 xchk_da_set_corrupt(ds, level);
424 break;
425 case XFS_DIR2_LEAF1_MAGIC:
426 case XFS_DIR3_LEAF1_MAGIC:
427 xfs_trans_buf_set_type(dargs->trans, blk->bp,
428 XFS_BLFT_DIR_LEAF1_BUF);
429 blk->magic = XFS_DIR2_LEAF1_MAGIC;
430 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
431 if (ds->tree_level != 0)
432 xchk_da_set_corrupt(ds, level);
433 break;
434 case XFS_DA_NODE_MAGIC:
435 case XFS_DA3_NODE_MAGIC:
436 xfs_trans_buf_set_type(dargs->trans, blk->bp,
437 XFS_BLFT_DA_NODE_BUF);
438 blk->magic = XFS_DA_NODE_MAGIC;
439 node = blk->bp->b_addr;
440 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
441 btree = nodehdr.btree;
442 *pmaxrecs = nodehdr.count;
443 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
444 if (level == 0) {
445 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
446 xchk_da_set_corrupt(ds, level);
447 goto out_freebp;
448 }
449 ds->tree_level = nodehdr.level;
450 } else {
451 if (ds->tree_level != nodehdr.level) {
452 xchk_da_set_corrupt(ds, level);
453 goto out_freebp;
454 }
455 }
456
457 /* XXX: Check hdr3.pad32 once we know how to fix it. */
458 break;
459 default:
460 xchk_da_set_corrupt(ds, level);
461 goto out_freebp;
462 }
463
464 fa = xfs_da3_header_check(blk->bp, dargs->owner);
465 if (fa) {
466 xchk_da_set_corrupt(ds, level);
467 goto out_freebp;
468 }
469
470 /*
471 * If we've been handed a block that is below the dabtree root, does
472 * its hashval match what the parent block expected to see?
473 */
474 if (level > 0) {
475 struct xfs_da_node_entry *key;
476
477 key = xchk_da_btree_node_entry(ds, level - 1);
478 if (be32_to_cpu(key->hashval) != blk->hashval) {
479 xchk_da_set_corrupt(ds, level);
480 goto out_freebp;
481 }
482 }
483
484 out:
485 return error;
486 out_freebp:
487 xfs_trans_brelse(dargs->trans, blk->bp);
488 blk->bp = NULL;
489 out_nobuf:
490 blk->blkno = 0;
491 return error;
492 }
493
494 /* Visit all nodes and leaves of a da btree. */
495 int
xchk_da_btree(struct xfs_scrub * sc,int whichfork,xchk_da_btree_rec_fn scrub_fn,void * private)496 xchk_da_btree(
497 struct xfs_scrub *sc,
498 int whichfork,
499 xchk_da_btree_rec_fn scrub_fn,
500 void *private)
501 {
502 struct xchk_da_btree *ds;
503 struct xfs_mount *mp = sc->mp;
504 struct xfs_da_state_blk *blks;
505 struct xfs_da_node_entry *key;
506 xfs_dablk_t blkno;
507 int level;
508 int error;
509
510 /* Skip short format data structures; no btree to scan. */
511 if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
512 return 0;
513
514 /* Set up initial da state. */
515 ds = kzalloc_obj(struct xchk_da_btree, XCHK_GFP_FLAGS);
516 if (!ds)
517 return -ENOMEM;
518 ds->dargs.dp = sc->ip;
519 ds->dargs.whichfork = whichfork;
520 ds->dargs.trans = sc->tp;
521 ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
522 ds->dargs.owner = sc->ip->i_ino;
523 ds->state = xfs_da_state_alloc(&ds->dargs);
524 ds->sc = sc;
525 ds->private = private;
526 if (whichfork == XFS_ATTR_FORK) {
527 ds->dargs.geo = mp->m_attr_geo;
528 ds->lowest = 0;
529 ds->highest = 0;
530 } else {
531 ds->dargs.geo = mp->m_dir_geo;
532 ds->lowest = ds->dargs.geo->leafblk;
533 ds->highest = ds->dargs.geo->freeblk;
534 }
535 blkno = ds->lowest;
536 level = 0;
537
538 /* Find the root of the da tree, if present. */
539 blks = ds->state->path.blk;
540 error = xchk_da_btree_block(ds, level, blkno);
541 if (error)
542 goto out_state;
543 /*
544 * We didn't find a block at ds->lowest, which means that there's
545 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
546 * so jump out now.
547 */
548 if (blks[level].bp == NULL)
549 goto out_state;
550
551 blks[level].index = 0;
552 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
553 /* Handle leaf block. */
554 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
555 /* End of leaf, pop back towards the root. */
556 if (blks[level].index >= ds->maxrecs[level]) {
557 if (level > 0)
558 blks[level - 1].index++;
559 ds->tree_level++;
560 level--;
561 continue;
562 }
563
564 /* Dispatch record scrubbing. */
565 error = scrub_fn(ds, level);
566 if (error)
567 break;
568 if (xchk_should_terminate(sc, &error) ||
569 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
570 break;
571
572 blks[level].index++;
573 continue;
574 }
575
576
577 /* End of node, pop back towards the root. */
578 if (blks[level].index >= ds->maxrecs[level]) {
579 if (level > 0)
580 blks[level - 1].index++;
581 ds->tree_level++;
582 level--;
583 continue;
584 }
585
586 /* Hashes in order for scrub? */
587 key = xchk_da_btree_node_entry(ds, level);
588 error = xchk_da_btree_hash(ds, level, &key->hashval);
589 if (error)
590 goto out;
591
592 /* Drill another level deeper. */
593 blkno = be32_to_cpu(key->before);
594 level++;
595 if (level >= XFS_DA_NODE_MAXDEPTH) {
596 /* Too deep! */
597 xchk_da_set_corrupt(ds, level - 1);
598 break;
599 }
600 ds->tree_level--;
601 error = xchk_da_btree_block(ds, level, blkno);
602 if (error)
603 goto out;
604 if (blks[level].bp == NULL)
605 goto out;
606
607 blks[level].index = 0;
608 }
609
610 out:
611 /* Release all the buffers we're tracking. */
612 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
613 if (blks[level].bp == NULL)
614 continue;
615 xfs_trans_brelse(sc->tp, blks[level].bp);
616 blks[level].bp = NULL;
617 }
618
619 out_state:
620 xfs_da_state_free(ds->state);
621 kfree(ds);
622 return error;
623 }
624