xref: /linux/fs/xfs/scrub/dabtree.c (revision 4e4d9c72c946b77f0278988d0bf1207fa1b2cd0f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 	case -ECHRNG:
43 		/* Used to restart an op with deadlock avoidance. */
44 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
45 		break;
46 	case -EFSBADCRC:
47 	case -EFSCORRUPTED:
48 		/* Note the badness but don't abort. */
49 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
50 		*error = 0;
51 		fallthrough;
52 	default:
53 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
54 				xfs_dir2_da_to_db(ds->dargs.geo,
55 					ds->state->path.blk[level].blkno),
56 				*error, __return_address);
57 		break;
58 	}
59 	return false;
60 }
61 
62 /*
63  * Check for da btree corruption.  See the section about handling
64  * operational errors in common.c.
65  */
66 void
67 xchk_da_set_corrupt(
68 	struct xchk_da_btree	*ds,
69 	int			level)
70 {
71 	struct xfs_scrub	*sc = ds->sc;
72 
73 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
74 
75 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
76 			xfs_dir2_da_to_db(ds->dargs.geo,
77 				ds->state->path.blk[level].blkno),
78 			__return_address);
79 }
80 
81 /* Flag a da btree node in need of optimization. */
82 void
83 xchk_da_set_preen(
84 	struct xchk_da_btree	*ds,
85 	int			level)
86 {
87 	struct xfs_scrub	*sc = ds->sc;
88 
89 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
90 	trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
91 			xfs_dir2_da_to_db(ds->dargs.geo,
92 				ds->state->path.blk[level].blkno),
93 			__return_address);
94 }
95 
96 /* Find an entry at a certain level in a da btree. */
97 static struct xfs_da_node_entry *
98 xchk_da_btree_node_entry(
99 	struct xchk_da_btree		*ds,
100 	int				level)
101 {
102 	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
103 	struct xfs_da3_icnode_hdr	hdr;
104 
105 	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
106 
107 	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
108 	return hdr.btree + blk->index;
109 }
110 
111 /* Scrub a da btree hash (key). */
112 int
113 xchk_da_btree_hash(
114 	struct xchk_da_btree		*ds,
115 	int				level,
116 	__be32				*hashp)
117 {
118 	struct xfs_da_node_entry	*entry;
119 	xfs_dahash_t			hash;
120 	xfs_dahash_t			parent_hash;
121 
122 	/* Is this hash in order? */
123 	hash = be32_to_cpu(*hashp);
124 	if (hash < ds->hashes[level])
125 		xchk_da_set_corrupt(ds, level);
126 	ds->hashes[level] = hash;
127 
128 	if (level == 0)
129 		return 0;
130 
131 	/* Is this hash no larger than the parent hash? */
132 	entry = xchk_da_btree_node_entry(ds, level - 1);
133 	parent_hash = be32_to_cpu(entry->hashval);
134 	if (parent_hash < hash)
135 		xchk_da_set_corrupt(ds, level);
136 
137 	return 0;
138 }
139 
140 /*
141  * Check a da btree pointer.  Returns true if it's ok to use this
142  * pointer.
143  */
144 STATIC bool
145 xchk_da_btree_ptr_ok(
146 	struct xchk_da_btree	*ds,
147 	int			level,
148 	xfs_dablk_t		blkno)
149 {
150 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
151 		xchk_da_set_corrupt(ds, level);
152 		return false;
153 	}
154 
155 	return true;
156 }
157 
158 /*
159  * The da btree scrubber can handle leaf1 blocks as a degenerate
160  * form of leafn blocks.  Since the regular da code doesn't handle
161  * leaf1, we must multiplex the verifiers.
162  */
163 static void
164 xchk_da_btree_read_verify(
165 	struct xfs_buf		*bp)
166 {
167 	struct xfs_da_blkinfo	*info = bp->b_addr;
168 
169 	switch (be16_to_cpu(info->magic)) {
170 	case XFS_DIR2_LEAF1_MAGIC:
171 	case XFS_DIR3_LEAF1_MAGIC:
172 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
173 		bp->b_ops->verify_read(bp);
174 		return;
175 	default:
176 		/*
177 		 * xfs_da3_node_buf_ops already know how to handle
178 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
179 		 */
180 		bp->b_ops = &xfs_da3_node_buf_ops;
181 		bp->b_ops->verify_read(bp);
182 		return;
183 	}
184 }
185 static void
186 xchk_da_btree_write_verify(
187 	struct xfs_buf		*bp)
188 {
189 	struct xfs_da_blkinfo	*info = bp->b_addr;
190 
191 	switch (be16_to_cpu(info->magic)) {
192 	case XFS_DIR2_LEAF1_MAGIC:
193 	case XFS_DIR3_LEAF1_MAGIC:
194 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
195 		bp->b_ops->verify_write(bp);
196 		return;
197 	default:
198 		/*
199 		 * xfs_da3_node_buf_ops already know how to handle
200 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
201 		 */
202 		bp->b_ops = &xfs_da3_node_buf_ops;
203 		bp->b_ops->verify_write(bp);
204 		return;
205 	}
206 }
207 static void *
208 xchk_da_btree_verify(
209 	struct xfs_buf		*bp)
210 {
211 	struct xfs_da_blkinfo	*info = bp->b_addr;
212 
213 	switch (be16_to_cpu(info->magic)) {
214 	case XFS_DIR2_LEAF1_MAGIC:
215 	case XFS_DIR3_LEAF1_MAGIC:
216 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
217 		return bp->b_ops->verify_struct(bp);
218 	default:
219 		bp->b_ops = &xfs_da3_node_buf_ops;
220 		return bp->b_ops->verify_struct(bp);
221 	}
222 }
223 
224 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
225 	.name = "xchk_da_btree",
226 	.verify_read = xchk_da_btree_read_verify,
227 	.verify_write = xchk_da_btree_write_verify,
228 	.verify_struct = xchk_da_btree_verify,
229 };
230 
231 /* Check a block's sibling. */
232 STATIC int
233 xchk_da_btree_block_check_sibling(
234 	struct xchk_da_btree	*ds,
235 	int			level,
236 	int			direction,
237 	xfs_dablk_t		sibling)
238 {
239 	struct xfs_da_state_path *path = &ds->state->path;
240 	struct xfs_da_state_path *altpath = &ds->state->altpath;
241 	int			retval;
242 	int			plevel;
243 	int			error;
244 
245 	memcpy(altpath, path, sizeof(ds->state->altpath));
246 
247 	/*
248 	 * If the pointer is null, we shouldn't be able to move the upper
249 	 * level pointer anywhere.
250 	 */
251 	if (sibling == 0) {
252 		error = xfs_da3_path_shift(ds->state, altpath, direction,
253 				false, &retval);
254 		if (error == 0 && retval == 0)
255 			xchk_da_set_corrupt(ds, level);
256 		error = 0;
257 		goto out;
258 	}
259 
260 	/* Move the alternate cursor one block in the direction given. */
261 	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
262 			&retval);
263 	if (!xchk_da_process_error(ds, level, &error))
264 		goto out;
265 	if (retval) {
266 		xchk_da_set_corrupt(ds, level);
267 		goto out;
268 	}
269 	if (altpath->blk[level].bp)
270 		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
271 
272 	/* Compare upper level pointer to sibling pointer. */
273 	if (altpath->blk[level].blkno != sibling)
274 		xchk_da_set_corrupt(ds, level);
275 
276 out:
277 	/* Free all buffers in the altpath that aren't referenced from path. */
278 	for (plevel = 0; plevel < altpath->active; plevel++) {
279 		if (altpath->blk[plevel].bp == NULL ||
280 		    (plevel < path->active &&
281 		     altpath->blk[plevel].bp == path->blk[plevel].bp))
282 			continue;
283 
284 		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
285 		altpath->blk[plevel].bp = NULL;
286 	}
287 
288 	return error;
289 }
290 
291 /* Check a block's sibling pointers. */
292 STATIC int
293 xchk_da_btree_block_check_siblings(
294 	struct xchk_da_btree	*ds,
295 	int			level,
296 	struct xfs_da_blkinfo	*hdr)
297 {
298 	xfs_dablk_t		forw;
299 	xfs_dablk_t		back;
300 	int			error = 0;
301 
302 	forw = be32_to_cpu(hdr->forw);
303 	back = be32_to_cpu(hdr->back);
304 
305 	/* Top level blocks should not have sibling pointers. */
306 	if (level == 0) {
307 		if (forw != 0 || back != 0)
308 			xchk_da_set_corrupt(ds, level);
309 		return 0;
310 	}
311 
312 	/*
313 	 * Check back (left) and forw (right) pointers.  These functions
314 	 * absorb error codes for us.
315 	 */
316 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
317 	if (error)
318 		goto out;
319 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
320 
321 out:
322 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
323 	return error;
324 }
325 
326 /* Load a dir/attribute block from a btree. */
327 STATIC int
328 xchk_da_btree_block(
329 	struct xchk_da_btree		*ds,
330 	int				level,
331 	xfs_dablk_t			blkno)
332 {
333 	struct xfs_da_state_blk		*blk;
334 	struct xfs_da_intnode		*node;
335 	struct xfs_da_node_entry	*btree;
336 	struct xfs_da3_blkinfo		*hdr3;
337 	struct xfs_da_args		*dargs = &ds->dargs;
338 	struct xfs_inode		*ip = ds->dargs.dp;
339 	xfs_failaddr_t			fa;
340 	xfs_ino_t			owner;
341 	int				*pmaxrecs;
342 	struct xfs_da3_icnode_hdr	nodehdr;
343 	int				error = 0;
344 
345 	blk = &ds->state->path.blk[level];
346 	ds->state->path.active = level + 1;
347 
348 	/* Release old block. */
349 	if (blk->bp) {
350 		xfs_trans_brelse(dargs->trans, blk->bp);
351 		blk->bp = NULL;
352 	}
353 
354 	/* Check the pointer. */
355 	blk->blkno = blkno;
356 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
357 		goto out_nobuf;
358 
359 	/* Read the buffer. */
360 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
361 			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
362 			&xchk_da_btree_buf_ops);
363 	if (!xchk_da_process_error(ds, level, &error))
364 		goto out_nobuf;
365 	if (blk->bp)
366 		xchk_buffer_recheck(ds->sc, blk->bp);
367 
368 	/*
369 	 * We didn't find a dir btree root block, which means that
370 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
371 	 * to be), so jump out now.
372 	 */
373 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
374 			blk->bp == NULL)
375 		goto out_nobuf;
376 
377 	/* It's /not/ ok for attr trees not to have a da btree. */
378 	if (blk->bp == NULL) {
379 		xchk_da_set_corrupt(ds, level);
380 		goto out_nobuf;
381 	}
382 
383 	hdr3 = blk->bp->b_addr;
384 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
385 	pmaxrecs = &ds->maxrecs[level];
386 
387 	/* We only started zeroing the header on v5 filesystems. */
388 	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
389 		xchk_da_set_corrupt(ds, level);
390 
391 	/* Check the owner. */
392 	if (xfs_has_crc(ip->i_mount)) {
393 		owner = be64_to_cpu(hdr3->owner);
394 		if (owner != ip->i_ino)
395 			xchk_da_set_corrupt(ds, level);
396 	}
397 
398 	/* Check the siblings. */
399 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
400 	if (error)
401 		goto out;
402 
403 	/* Interpret the buffer. */
404 	switch (blk->magic) {
405 	case XFS_ATTR_LEAF_MAGIC:
406 	case XFS_ATTR3_LEAF_MAGIC:
407 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
408 				XFS_BLFT_ATTR_LEAF_BUF);
409 		blk->magic = XFS_ATTR_LEAF_MAGIC;
410 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
411 		if (ds->tree_level != 0)
412 			xchk_da_set_corrupt(ds, level);
413 		break;
414 	case XFS_DIR2_LEAFN_MAGIC:
415 	case XFS_DIR3_LEAFN_MAGIC:
416 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
417 				XFS_BLFT_DIR_LEAFN_BUF);
418 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
419 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
420 		if (ds->tree_level != 0)
421 			xchk_da_set_corrupt(ds, level);
422 		break;
423 	case XFS_DIR2_LEAF1_MAGIC:
424 	case XFS_DIR3_LEAF1_MAGIC:
425 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
426 				XFS_BLFT_DIR_LEAF1_BUF);
427 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
428 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
429 		if (ds->tree_level != 0)
430 			xchk_da_set_corrupt(ds, level);
431 		break;
432 	case XFS_DA_NODE_MAGIC:
433 	case XFS_DA3_NODE_MAGIC:
434 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
435 				XFS_BLFT_DA_NODE_BUF);
436 		blk->magic = XFS_DA_NODE_MAGIC;
437 		node = blk->bp->b_addr;
438 		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
439 		btree = nodehdr.btree;
440 		*pmaxrecs = nodehdr.count;
441 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
442 		if (level == 0) {
443 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
444 				xchk_da_set_corrupt(ds, level);
445 				goto out_freebp;
446 			}
447 			ds->tree_level = nodehdr.level;
448 		} else {
449 			if (ds->tree_level != nodehdr.level) {
450 				xchk_da_set_corrupt(ds, level);
451 				goto out_freebp;
452 			}
453 		}
454 
455 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
456 		break;
457 	default:
458 		xchk_da_set_corrupt(ds, level);
459 		goto out_freebp;
460 	}
461 
462 	fa = xfs_da3_header_check(blk->bp, dargs->owner);
463 	if (fa) {
464 		xchk_da_set_corrupt(ds, level);
465 		goto out_freebp;
466 	}
467 
468 	/*
469 	 * If we've been handed a block that is below the dabtree root, does
470 	 * its hashval match what the parent block expected to see?
471 	 */
472 	if (level > 0) {
473 		struct xfs_da_node_entry	*key;
474 
475 		key = xchk_da_btree_node_entry(ds, level - 1);
476 		if (be32_to_cpu(key->hashval) != blk->hashval) {
477 			xchk_da_set_corrupt(ds, level);
478 			goto out_freebp;
479 		}
480 	}
481 
482 out:
483 	return error;
484 out_freebp:
485 	xfs_trans_brelse(dargs->trans, blk->bp);
486 	blk->bp = NULL;
487 out_nobuf:
488 	blk->blkno = 0;
489 	return error;
490 }
491 
492 /* Visit all nodes and leaves of a da btree. */
493 int
494 xchk_da_btree(
495 	struct xfs_scrub		*sc,
496 	int				whichfork,
497 	xchk_da_btree_rec_fn		scrub_fn,
498 	void				*private)
499 {
500 	struct xchk_da_btree		*ds;
501 	struct xfs_mount		*mp = sc->mp;
502 	struct xfs_da_state_blk		*blks;
503 	struct xfs_da_node_entry	*key;
504 	xfs_dablk_t			blkno;
505 	int				level;
506 	int				error;
507 
508 	/* Skip short format data structures; no btree to scan. */
509 	if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
510 		return 0;
511 
512 	/* Set up initial da state. */
513 	ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
514 	if (!ds)
515 		return -ENOMEM;
516 	ds->dargs.dp = sc->ip;
517 	ds->dargs.whichfork = whichfork;
518 	ds->dargs.trans = sc->tp;
519 	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
520 	ds->dargs.owner = sc->ip->i_ino;
521 	ds->state = xfs_da_state_alloc(&ds->dargs);
522 	ds->sc = sc;
523 	ds->private = private;
524 	if (whichfork == XFS_ATTR_FORK) {
525 		ds->dargs.geo = mp->m_attr_geo;
526 		ds->lowest = 0;
527 		ds->highest = 0;
528 	} else {
529 		ds->dargs.geo = mp->m_dir_geo;
530 		ds->lowest = ds->dargs.geo->leafblk;
531 		ds->highest = ds->dargs.geo->freeblk;
532 	}
533 	blkno = ds->lowest;
534 	level = 0;
535 
536 	/* Find the root of the da tree, if present. */
537 	blks = ds->state->path.blk;
538 	error = xchk_da_btree_block(ds, level, blkno);
539 	if (error)
540 		goto out_state;
541 	/*
542 	 * We didn't find a block at ds->lowest, which means that there's
543 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
544 	 * so jump out now.
545 	 */
546 	if (blks[level].bp == NULL)
547 		goto out_state;
548 
549 	blks[level].index = 0;
550 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
551 		/* Handle leaf block. */
552 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
553 			/* End of leaf, pop back towards the root. */
554 			if (blks[level].index >= ds->maxrecs[level]) {
555 				if (level > 0)
556 					blks[level - 1].index++;
557 				ds->tree_level++;
558 				level--;
559 				continue;
560 			}
561 
562 			/* Dispatch record scrubbing. */
563 			error = scrub_fn(ds, level);
564 			if (error)
565 				break;
566 			if (xchk_should_terminate(sc, &error) ||
567 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
568 				break;
569 
570 			blks[level].index++;
571 			continue;
572 		}
573 
574 
575 		/* End of node, pop back towards the root. */
576 		if (blks[level].index >= ds->maxrecs[level]) {
577 			if (level > 0)
578 				blks[level - 1].index++;
579 			ds->tree_level++;
580 			level--;
581 			continue;
582 		}
583 
584 		/* Hashes in order for scrub? */
585 		key = xchk_da_btree_node_entry(ds, level);
586 		error = xchk_da_btree_hash(ds, level, &key->hashval);
587 		if (error)
588 			goto out;
589 
590 		/* Drill another level deeper. */
591 		blkno = be32_to_cpu(key->before);
592 		level++;
593 		if (level >= XFS_DA_NODE_MAXDEPTH) {
594 			/* Too deep! */
595 			xchk_da_set_corrupt(ds, level - 1);
596 			break;
597 		}
598 		ds->tree_level--;
599 		error = xchk_da_btree_block(ds, level, blkno);
600 		if (error)
601 			goto out;
602 		if (blks[level].bp == NULL)
603 			goto out;
604 
605 		blks[level].index = 0;
606 	}
607 
608 out:
609 	/* Release all the buffers we're tracking. */
610 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
611 		if (blks[level].bp == NULL)
612 			continue;
613 		xfs_trans_brelse(sc->tp, blks[level].bp);
614 		blks[level].bp = NULL;
615 	}
616 
617 out_state:
618 	xfs_da_state_free(ds->state);
619 	kfree(ds);
620 	return error;
621 }
622