xref: /linux/fs/xfs/scrub/dabtree.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 	case -ECHRNG:
43 		/* Used to restart an op with deadlock avoidance. */
44 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
45 		break;
46 	case -EFSBADCRC:
47 	case -EFSCORRUPTED:
48 	case -EIO:
49 	case -ENODATA:
50 		/* Note the badness but don't abort. */
51 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
52 		*error = 0;
53 		fallthrough;
54 	default:
55 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
56 				xfs_dir2_da_to_db(ds->dargs.geo,
57 					ds->state->path.blk[level].blkno),
58 				*error, __return_address);
59 		break;
60 	}
61 	return false;
62 }
63 
64 /*
65  * Check for da btree corruption.  See the section about handling
66  * operational errors in common.c.
67  */
68 void
69 xchk_da_set_corrupt(
70 	struct xchk_da_btree	*ds,
71 	int			level)
72 {
73 	struct xfs_scrub	*sc = ds->sc;
74 
75 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
76 
77 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
78 			xfs_dir2_da_to_db(ds->dargs.geo,
79 				ds->state->path.blk[level].blkno),
80 			__return_address);
81 }
82 
83 /* Flag a da btree node in need of optimization. */
84 void
85 xchk_da_set_preen(
86 	struct xchk_da_btree	*ds,
87 	int			level)
88 {
89 	struct xfs_scrub	*sc = ds->sc;
90 
91 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
92 	trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
93 			xfs_dir2_da_to_db(ds->dargs.geo,
94 				ds->state->path.blk[level].blkno),
95 			__return_address);
96 }
97 
98 /* Find an entry at a certain level in a da btree. */
99 static struct xfs_da_node_entry *
100 xchk_da_btree_node_entry(
101 	struct xchk_da_btree		*ds,
102 	int				level)
103 {
104 	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
105 	struct xfs_da3_icnode_hdr	hdr;
106 
107 	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
108 
109 	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
110 	return hdr.btree + blk->index;
111 }
112 
113 /* Scrub a da btree hash (key). */
114 int
115 xchk_da_btree_hash(
116 	struct xchk_da_btree		*ds,
117 	int				level,
118 	__be32				*hashp)
119 {
120 	struct xfs_da_node_entry	*entry;
121 	xfs_dahash_t			hash;
122 	xfs_dahash_t			parent_hash;
123 
124 	/* Is this hash in order? */
125 	hash = be32_to_cpu(*hashp);
126 	if (hash < ds->hashes[level])
127 		xchk_da_set_corrupt(ds, level);
128 	ds->hashes[level] = hash;
129 
130 	if (level == 0)
131 		return 0;
132 
133 	/* Is this hash no larger than the parent hash? */
134 	entry = xchk_da_btree_node_entry(ds, level - 1);
135 	parent_hash = be32_to_cpu(entry->hashval);
136 	if (parent_hash < hash)
137 		xchk_da_set_corrupt(ds, level);
138 
139 	return 0;
140 }
141 
142 /*
143  * Check a da btree pointer.  Returns true if it's ok to use this
144  * pointer.
145  */
146 STATIC bool
147 xchk_da_btree_ptr_ok(
148 	struct xchk_da_btree	*ds,
149 	int			level,
150 	xfs_dablk_t		blkno)
151 {
152 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
153 		xchk_da_set_corrupt(ds, level);
154 		return false;
155 	}
156 
157 	return true;
158 }
159 
160 /*
161  * The da btree scrubber can handle leaf1 blocks as a degenerate
162  * form of leafn blocks.  Since the regular da code doesn't handle
163  * leaf1, we must multiplex the verifiers.
164  */
165 static void
166 xchk_da_btree_read_verify(
167 	struct xfs_buf		*bp)
168 {
169 	struct xfs_da_blkinfo	*info = bp->b_addr;
170 
171 	switch (be16_to_cpu(info->magic)) {
172 	case XFS_DIR2_LEAF1_MAGIC:
173 	case XFS_DIR3_LEAF1_MAGIC:
174 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
175 		bp->b_ops->verify_read(bp);
176 		return;
177 	default:
178 		/*
179 		 * xfs_da3_node_buf_ops already know how to handle
180 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
181 		 */
182 		bp->b_ops = &xfs_da3_node_buf_ops;
183 		bp->b_ops->verify_read(bp);
184 		return;
185 	}
186 }
187 static void
188 xchk_da_btree_write_verify(
189 	struct xfs_buf		*bp)
190 {
191 	struct xfs_da_blkinfo	*info = bp->b_addr;
192 
193 	switch (be16_to_cpu(info->magic)) {
194 	case XFS_DIR2_LEAF1_MAGIC:
195 	case XFS_DIR3_LEAF1_MAGIC:
196 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
197 		bp->b_ops->verify_write(bp);
198 		return;
199 	default:
200 		/*
201 		 * xfs_da3_node_buf_ops already know how to handle
202 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
203 		 */
204 		bp->b_ops = &xfs_da3_node_buf_ops;
205 		bp->b_ops->verify_write(bp);
206 		return;
207 	}
208 }
209 static void *
210 xchk_da_btree_verify(
211 	struct xfs_buf		*bp)
212 {
213 	struct xfs_da_blkinfo	*info = bp->b_addr;
214 
215 	switch (be16_to_cpu(info->magic)) {
216 	case XFS_DIR2_LEAF1_MAGIC:
217 	case XFS_DIR3_LEAF1_MAGIC:
218 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
219 		return bp->b_ops->verify_struct(bp);
220 	default:
221 		bp->b_ops = &xfs_da3_node_buf_ops;
222 		return bp->b_ops->verify_struct(bp);
223 	}
224 }
225 
226 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
227 	.name = "xchk_da_btree",
228 	.verify_read = xchk_da_btree_read_verify,
229 	.verify_write = xchk_da_btree_write_verify,
230 	.verify_struct = xchk_da_btree_verify,
231 };
232 
233 /* Check a block's sibling. */
234 STATIC int
235 xchk_da_btree_block_check_sibling(
236 	struct xchk_da_btree	*ds,
237 	int			level,
238 	int			direction,
239 	xfs_dablk_t		sibling)
240 {
241 	struct xfs_da_state_path *path = &ds->state->path;
242 	struct xfs_da_state_path *altpath = &ds->state->altpath;
243 	int			retval;
244 	int			plevel;
245 	int			error;
246 
247 	memcpy(altpath, path, sizeof(ds->state->altpath));
248 
249 	/*
250 	 * If the pointer is null, we shouldn't be able to move the upper
251 	 * level pointer anywhere.
252 	 */
253 	if (sibling == 0) {
254 		error = xfs_da3_path_shift(ds->state, altpath, direction,
255 				false, &retval);
256 		if (error == 0 && retval == 0)
257 			xchk_da_set_corrupt(ds, level);
258 		error = 0;
259 		goto out;
260 	}
261 
262 	/* Move the alternate cursor one block in the direction given. */
263 	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
264 			&retval);
265 	if (!xchk_da_process_error(ds, level, &error))
266 		goto out;
267 	if (retval) {
268 		xchk_da_set_corrupt(ds, level);
269 		goto out;
270 	}
271 	if (altpath->blk[level].bp)
272 		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
273 
274 	/* Compare upper level pointer to sibling pointer. */
275 	if (altpath->blk[level].blkno != sibling)
276 		xchk_da_set_corrupt(ds, level);
277 
278 out:
279 	/* Free all buffers in the altpath that aren't referenced from path. */
280 	for (plevel = 0; plevel < altpath->active; plevel++) {
281 		if (altpath->blk[plevel].bp == NULL ||
282 		    (plevel < path->active &&
283 		     altpath->blk[plevel].bp == path->blk[plevel].bp))
284 			continue;
285 
286 		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
287 		altpath->blk[plevel].bp = NULL;
288 	}
289 
290 	return error;
291 }
292 
293 /* Check a block's sibling pointers. */
294 STATIC int
295 xchk_da_btree_block_check_siblings(
296 	struct xchk_da_btree	*ds,
297 	int			level,
298 	struct xfs_da_blkinfo	*hdr)
299 {
300 	xfs_dablk_t		forw;
301 	xfs_dablk_t		back;
302 	int			error = 0;
303 
304 	forw = be32_to_cpu(hdr->forw);
305 	back = be32_to_cpu(hdr->back);
306 
307 	/* Top level blocks should not have sibling pointers. */
308 	if (level == 0) {
309 		if (forw != 0 || back != 0)
310 			xchk_da_set_corrupt(ds, level);
311 		return 0;
312 	}
313 
314 	/*
315 	 * Check back (left) and forw (right) pointers.  These functions
316 	 * absorb error codes for us.
317 	 */
318 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
319 	if (error)
320 		goto out;
321 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
322 
323 out:
324 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
325 	return error;
326 }
327 
328 /* Load a dir/attribute block from a btree. */
329 STATIC int
330 xchk_da_btree_block(
331 	struct xchk_da_btree		*ds,
332 	int				level,
333 	xfs_dablk_t			blkno)
334 {
335 	struct xfs_da_state_blk		*blk;
336 	struct xfs_da_intnode		*node;
337 	struct xfs_da_node_entry	*btree;
338 	struct xfs_da3_blkinfo		*hdr3;
339 	struct xfs_da_args		*dargs = &ds->dargs;
340 	struct xfs_inode		*ip = ds->dargs.dp;
341 	xfs_failaddr_t			fa;
342 	xfs_ino_t			owner;
343 	int				*pmaxrecs;
344 	struct xfs_da3_icnode_hdr	nodehdr;
345 	int				error = 0;
346 
347 	blk = &ds->state->path.blk[level];
348 	ds->state->path.active = level + 1;
349 
350 	/* Release old block. */
351 	if (blk->bp) {
352 		xfs_trans_brelse(dargs->trans, blk->bp);
353 		blk->bp = NULL;
354 	}
355 
356 	/* Check the pointer. */
357 	blk->blkno = blkno;
358 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
359 		goto out_nobuf;
360 
361 	/* Read the buffer. */
362 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
363 			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
364 			&xchk_da_btree_buf_ops);
365 	if (!xchk_da_process_error(ds, level, &error))
366 		goto out_nobuf;
367 	if (blk->bp)
368 		xchk_buffer_recheck(ds->sc, blk->bp);
369 
370 	/*
371 	 * We didn't find a dir btree root block, which means that
372 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
373 	 * to be), so jump out now.
374 	 */
375 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
376 			blk->bp == NULL)
377 		goto out_nobuf;
378 
379 	/* It's /not/ ok for attr trees not to have a da btree. */
380 	if (blk->bp == NULL) {
381 		xchk_da_set_corrupt(ds, level);
382 		goto out_nobuf;
383 	}
384 
385 	hdr3 = blk->bp->b_addr;
386 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
387 	pmaxrecs = &ds->maxrecs[level];
388 
389 	/* We only started zeroing the header on v5 filesystems. */
390 	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
391 		xchk_da_set_corrupt(ds, level);
392 
393 	/* Check the owner. */
394 	if (xfs_has_crc(ip->i_mount)) {
395 		owner = be64_to_cpu(hdr3->owner);
396 		if (owner != ip->i_ino)
397 			xchk_da_set_corrupt(ds, level);
398 	}
399 
400 	/* Check the siblings. */
401 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
402 	if (error)
403 		goto out;
404 
405 	/* Interpret the buffer. */
406 	switch (blk->magic) {
407 	case XFS_ATTR_LEAF_MAGIC:
408 	case XFS_ATTR3_LEAF_MAGIC:
409 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
410 				XFS_BLFT_ATTR_LEAF_BUF);
411 		blk->magic = XFS_ATTR_LEAF_MAGIC;
412 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
413 		if (ds->tree_level != 0)
414 			xchk_da_set_corrupt(ds, level);
415 		break;
416 	case XFS_DIR2_LEAFN_MAGIC:
417 	case XFS_DIR3_LEAFN_MAGIC:
418 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
419 				XFS_BLFT_DIR_LEAFN_BUF);
420 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
421 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
422 		if (ds->tree_level != 0)
423 			xchk_da_set_corrupt(ds, level);
424 		break;
425 	case XFS_DIR2_LEAF1_MAGIC:
426 	case XFS_DIR3_LEAF1_MAGIC:
427 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
428 				XFS_BLFT_DIR_LEAF1_BUF);
429 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
430 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
431 		if (ds->tree_level != 0)
432 			xchk_da_set_corrupt(ds, level);
433 		break;
434 	case XFS_DA_NODE_MAGIC:
435 	case XFS_DA3_NODE_MAGIC:
436 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
437 				XFS_BLFT_DA_NODE_BUF);
438 		blk->magic = XFS_DA_NODE_MAGIC;
439 		node = blk->bp->b_addr;
440 		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
441 		btree = nodehdr.btree;
442 		*pmaxrecs = nodehdr.count;
443 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
444 		if (level == 0) {
445 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
446 				xchk_da_set_corrupt(ds, level);
447 				goto out_freebp;
448 			}
449 			ds->tree_level = nodehdr.level;
450 		} else {
451 			if (ds->tree_level != nodehdr.level) {
452 				xchk_da_set_corrupt(ds, level);
453 				goto out_freebp;
454 			}
455 		}
456 
457 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
458 		break;
459 	default:
460 		xchk_da_set_corrupt(ds, level);
461 		goto out_freebp;
462 	}
463 
464 	fa = xfs_da3_header_check(blk->bp, dargs->owner);
465 	if (fa) {
466 		xchk_da_set_corrupt(ds, level);
467 		goto out_freebp;
468 	}
469 
470 	/*
471 	 * If we've been handed a block that is below the dabtree root, does
472 	 * its hashval match what the parent block expected to see?
473 	 */
474 	if (level > 0) {
475 		struct xfs_da_node_entry	*key;
476 
477 		key = xchk_da_btree_node_entry(ds, level - 1);
478 		if (be32_to_cpu(key->hashval) != blk->hashval) {
479 			xchk_da_set_corrupt(ds, level);
480 			goto out_freebp;
481 		}
482 	}
483 
484 out:
485 	return error;
486 out_freebp:
487 	xfs_trans_brelse(dargs->trans, blk->bp);
488 	blk->bp = NULL;
489 out_nobuf:
490 	blk->blkno = 0;
491 	return error;
492 }
493 
494 /* Visit all nodes and leaves of a da btree. */
495 int
496 xchk_da_btree(
497 	struct xfs_scrub		*sc,
498 	int				whichfork,
499 	xchk_da_btree_rec_fn		scrub_fn,
500 	void				*private)
501 {
502 	struct xchk_da_btree		*ds;
503 	struct xfs_mount		*mp = sc->mp;
504 	struct xfs_da_state_blk		*blks;
505 	struct xfs_da_node_entry	*key;
506 	xfs_dablk_t			blkno;
507 	int				level;
508 	int				error;
509 
510 	/* Skip short format data structures; no btree to scan. */
511 	if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
512 		return 0;
513 
514 	/* Set up initial da state. */
515 	ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
516 	if (!ds)
517 		return -ENOMEM;
518 	ds->dargs.dp = sc->ip;
519 	ds->dargs.whichfork = whichfork;
520 	ds->dargs.trans = sc->tp;
521 	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
522 	ds->dargs.owner = sc->ip->i_ino;
523 	ds->state = xfs_da_state_alloc(&ds->dargs);
524 	ds->sc = sc;
525 	ds->private = private;
526 	if (whichfork == XFS_ATTR_FORK) {
527 		ds->dargs.geo = mp->m_attr_geo;
528 		ds->lowest = 0;
529 		ds->highest = 0;
530 	} else {
531 		ds->dargs.geo = mp->m_dir_geo;
532 		ds->lowest = ds->dargs.geo->leafblk;
533 		ds->highest = ds->dargs.geo->freeblk;
534 	}
535 	blkno = ds->lowest;
536 	level = 0;
537 
538 	/* Find the root of the da tree, if present. */
539 	blks = ds->state->path.blk;
540 	error = xchk_da_btree_block(ds, level, blkno);
541 	if (error)
542 		goto out_state;
543 	/*
544 	 * We didn't find a block at ds->lowest, which means that there's
545 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
546 	 * so jump out now.
547 	 */
548 	if (blks[level].bp == NULL)
549 		goto out_state;
550 
551 	blks[level].index = 0;
552 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
553 		/* Handle leaf block. */
554 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
555 			/* End of leaf, pop back towards the root. */
556 			if (blks[level].index >= ds->maxrecs[level]) {
557 				if (level > 0)
558 					blks[level - 1].index++;
559 				ds->tree_level++;
560 				level--;
561 				continue;
562 			}
563 
564 			/* Dispatch record scrubbing. */
565 			error = scrub_fn(ds, level);
566 			if (error)
567 				break;
568 			if (xchk_should_terminate(sc, &error) ||
569 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
570 				break;
571 
572 			blks[level].index++;
573 			continue;
574 		}
575 
576 
577 		/* End of node, pop back towards the root. */
578 		if (blks[level].index >= ds->maxrecs[level]) {
579 			if (level > 0)
580 				blks[level - 1].index++;
581 			ds->tree_level++;
582 			level--;
583 			continue;
584 		}
585 
586 		/* Hashes in order for scrub? */
587 		key = xchk_da_btree_node_entry(ds, level);
588 		error = xchk_da_btree_hash(ds, level, &key->hashval);
589 		if (error)
590 			goto out;
591 
592 		/* Drill another level deeper. */
593 		blkno = be32_to_cpu(key->before);
594 		level++;
595 		if (level >= XFS_DA_NODE_MAXDEPTH) {
596 			/* Too deep! */
597 			xchk_da_set_corrupt(ds, level - 1);
598 			break;
599 		}
600 		ds->tree_level--;
601 		error = xchk_da_btree_block(ds, level, blkno);
602 		if (error)
603 			goto out;
604 		if (blks[level].bp == NULL)
605 			goto out;
606 
607 		blks[level].index = 0;
608 	}
609 
610 out:
611 	/* Release all the buffers we're tracking. */
612 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
613 		if (blks[level].bp == NULL)
614 			continue;
615 		xfs_trans_brelse(sc->tp, blks[level].bp);
616 		blks[level].bp = NULL;
617 	}
618 
619 out_state:
620 	xfs_da_state_free(ds->state);
621 	kfree(ds);
622 	return error;
623 }
624