xref: /linux/fs/xfs/scrub/dabtree.c (revision 1b0975ee3bdd3eb19a47371c26fd7ef8f7f6b599)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_dir2.h"
16 #include "xfs_dir2_priv.h"
17 #include "xfs_attr_leaf.h"
18 #include "scrub/scrub.h"
19 #include "scrub/common.h"
20 #include "scrub/trace.h"
21 #include "scrub/dabtree.h"
22 
23 /* Directory/Attribute Btree */
24 
25 /*
26  * Check for da btree operation errors.  See the section about handling
27  * operational errors in common.c.
28  */
29 bool
30 xchk_da_process_error(
31 	struct xchk_da_btree	*ds,
32 	int			level,
33 	int			*error)
34 {
35 	struct xfs_scrub	*sc = ds->sc;
36 
37 	if (*error == 0)
38 		return true;
39 
40 	switch (*error) {
41 	case -EDEADLOCK:
42 	case -ECHRNG:
43 		/* Used to restart an op with deadlock avoidance. */
44 		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
45 		break;
46 	case -EFSBADCRC:
47 	case -EFSCORRUPTED:
48 		/* Note the badness but don't abort. */
49 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
50 		*error = 0;
51 		fallthrough;
52 	default:
53 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
54 				xfs_dir2_da_to_db(ds->dargs.geo,
55 					ds->state->path.blk[level].blkno),
56 				*error, __return_address);
57 		break;
58 	}
59 	return false;
60 }
61 
62 /*
63  * Check for da btree corruption.  See the section about handling
64  * operational errors in common.c.
65  */
66 void
67 xchk_da_set_corrupt(
68 	struct xchk_da_btree	*ds,
69 	int			level)
70 {
71 	struct xfs_scrub	*sc = ds->sc;
72 
73 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
74 
75 	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
76 			xfs_dir2_da_to_db(ds->dargs.geo,
77 				ds->state->path.blk[level].blkno),
78 			__return_address);
79 }
80 
81 static struct xfs_da_node_entry *
82 xchk_da_btree_node_entry(
83 	struct xchk_da_btree		*ds,
84 	int				level)
85 {
86 	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
87 	struct xfs_da3_icnode_hdr	hdr;
88 
89 	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
90 
91 	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
92 	return hdr.btree + blk->index;
93 }
94 
95 /* Scrub a da btree hash (key). */
96 int
97 xchk_da_btree_hash(
98 	struct xchk_da_btree		*ds,
99 	int				level,
100 	__be32				*hashp)
101 {
102 	struct xfs_da_node_entry	*entry;
103 	xfs_dahash_t			hash;
104 	xfs_dahash_t			parent_hash;
105 
106 	/* Is this hash in order? */
107 	hash = be32_to_cpu(*hashp);
108 	if (hash < ds->hashes[level])
109 		xchk_da_set_corrupt(ds, level);
110 	ds->hashes[level] = hash;
111 
112 	if (level == 0)
113 		return 0;
114 
115 	/* Is this hash no larger than the parent hash? */
116 	entry = xchk_da_btree_node_entry(ds, level - 1);
117 	parent_hash = be32_to_cpu(entry->hashval);
118 	if (parent_hash < hash)
119 		xchk_da_set_corrupt(ds, level);
120 
121 	return 0;
122 }
123 
124 /*
125  * Check a da btree pointer.  Returns true if it's ok to use this
126  * pointer.
127  */
128 STATIC bool
129 xchk_da_btree_ptr_ok(
130 	struct xchk_da_btree	*ds,
131 	int			level,
132 	xfs_dablk_t		blkno)
133 {
134 	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
135 		xchk_da_set_corrupt(ds, level);
136 		return false;
137 	}
138 
139 	return true;
140 }
141 
142 /*
143  * The da btree scrubber can handle leaf1 blocks as a degenerate
144  * form of leafn blocks.  Since the regular da code doesn't handle
145  * leaf1, we must multiplex the verifiers.
146  */
147 static void
148 xchk_da_btree_read_verify(
149 	struct xfs_buf		*bp)
150 {
151 	struct xfs_da_blkinfo	*info = bp->b_addr;
152 
153 	switch (be16_to_cpu(info->magic)) {
154 	case XFS_DIR2_LEAF1_MAGIC:
155 	case XFS_DIR3_LEAF1_MAGIC:
156 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
157 		bp->b_ops->verify_read(bp);
158 		return;
159 	default:
160 		/*
161 		 * xfs_da3_node_buf_ops already know how to handle
162 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
163 		 */
164 		bp->b_ops = &xfs_da3_node_buf_ops;
165 		bp->b_ops->verify_read(bp);
166 		return;
167 	}
168 }
169 static void
170 xchk_da_btree_write_verify(
171 	struct xfs_buf		*bp)
172 {
173 	struct xfs_da_blkinfo	*info = bp->b_addr;
174 
175 	switch (be16_to_cpu(info->magic)) {
176 	case XFS_DIR2_LEAF1_MAGIC:
177 	case XFS_DIR3_LEAF1_MAGIC:
178 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
179 		bp->b_ops->verify_write(bp);
180 		return;
181 	default:
182 		/*
183 		 * xfs_da3_node_buf_ops already know how to handle
184 		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
185 		 */
186 		bp->b_ops = &xfs_da3_node_buf_ops;
187 		bp->b_ops->verify_write(bp);
188 		return;
189 	}
190 }
191 static void *
192 xchk_da_btree_verify(
193 	struct xfs_buf		*bp)
194 {
195 	struct xfs_da_blkinfo	*info = bp->b_addr;
196 
197 	switch (be16_to_cpu(info->magic)) {
198 	case XFS_DIR2_LEAF1_MAGIC:
199 	case XFS_DIR3_LEAF1_MAGIC:
200 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
201 		return bp->b_ops->verify_struct(bp);
202 	default:
203 		bp->b_ops = &xfs_da3_node_buf_ops;
204 		return bp->b_ops->verify_struct(bp);
205 	}
206 }
207 
208 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
209 	.name = "xchk_da_btree",
210 	.verify_read = xchk_da_btree_read_verify,
211 	.verify_write = xchk_da_btree_write_verify,
212 	.verify_struct = xchk_da_btree_verify,
213 };
214 
215 /* Check a block's sibling. */
216 STATIC int
217 xchk_da_btree_block_check_sibling(
218 	struct xchk_da_btree	*ds,
219 	int			level,
220 	int			direction,
221 	xfs_dablk_t		sibling)
222 {
223 	struct xfs_da_state_path *path = &ds->state->path;
224 	struct xfs_da_state_path *altpath = &ds->state->altpath;
225 	int			retval;
226 	int			plevel;
227 	int			error;
228 
229 	memcpy(altpath, path, sizeof(ds->state->altpath));
230 
231 	/*
232 	 * If the pointer is null, we shouldn't be able to move the upper
233 	 * level pointer anywhere.
234 	 */
235 	if (sibling == 0) {
236 		error = xfs_da3_path_shift(ds->state, altpath, direction,
237 				false, &retval);
238 		if (error == 0 && retval == 0)
239 			xchk_da_set_corrupt(ds, level);
240 		error = 0;
241 		goto out;
242 	}
243 
244 	/* Move the alternate cursor one block in the direction given. */
245 	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
246 			&retval);
247 	if (!xchk_da_process_error(ds, level, &error))
248 		goto out;
249 	if (retval) {
250 		xchk_da_set_corrupt(ds, level);
251 		goto out;
252 	}
253 	if (altpath->blk[level].bp)
254 		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
255 
256 	/* Compare upper level pointer to sibling pointer. */
257 	if (altpath->blk[level].blkno != sibling)
258 		xchk_da_set_corrupt(ds, level);
259 
260 out:
261 	/* Free all buffers in the altpath that aren't referenced from path. */
262 	for (plevel = 0; plevel < altpath->active; plevel++) {
263 		if (altpath->blk[plevel].bp == NULL ||
264 		    (plevel < path->active &&
265 		     altpath->blk[plevel].bp == path->blk[plevel].bp))
266 			continue;
267 
268 		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
269 		altpath->blk[plevel].bp = NULL;
270 	}
271 
272 	return error;
273 }
274 
275 /* Check a block's sibling pointers. */
276 STATIC int
277 xchk_da_btree_block_check_siblings(
278 	struct xchk_da_btree	*ds,
279 	int			level,
280 	struct xfs_da_blkinfo	*hdr)
281 {
282 	xfs_dablk_t		forw;
283 	xfs_dablk_t		back;
284 	int			error = 0;
285 
286 	forw = be32_to_cpu(hdr->forw);
287 	back = be32_to_cpu(hdr->back);
288 
289 	/* Top level blocks should not have sibling pointers. */
290 	if (level == 0) {
291 		if (forw != 0 || back != 0)
292 			xchk_da_set_corrupt(ds, level);
293 		return 0;
294 	}
295 
296 	/*
297 	 * Check back (left) and forw (right) pointers.  These functions
298 	 * absorb error codes for us.
299 	 */
300 	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
301 	if (error)
302 		goto out;
303 	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
304 
305 out:
306 	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
307 	return error;
308 }
309 
310 /* Load a dir/attribute block from a btree. */
311 STATIC int
312 xchk_da_btree_block(
313 	struct xchk_da_btree		*ds,
314 	int				level,
315 	xfs_dablk_t			blkno)
316 {
317 	struct xfs_da_state_blk		*blk;
318 	struct xfs_da_intnode		*node;
319 	struct xfs_da_node_entry	*btree;
320 	struct xfs_da3_blkinfo		*hdr3;
321 	struct xfs_da_args		*dargs = &ds->dargs;
322 	struct xfs_inode		*ip = ds->dargs.dp;
323 	xfs_ino_t			owner;
324 	int				*pmaxrecs;
325 	struct xfs_da3_icnode_hdr	nodehdr;
326 	int				error = 0;
327 
328 	blk = &ds->state->path.blk[level];
329 	ds->state->path.active = level + 1;
330 
331 	/* Release old block. */
332 	if (blk->bp) {
333 		xfs_trans_brelse(dargs->trans, blk->bp);
334 		blk->bp = NULL;
335 	}
336 
337 	/* Check the pointer. */
338 	blk->blkno = blkno;
339 	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
340 		goto out_nobuf;
341 
342 	/* Read the buffer. */
343 	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
344 			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
345 			&xchk_da_btree_buf_ops);
346 	if (!xchk_da_process_error(ds, level, &error))
347 		goto out_nobuf;
348 	if (blk->bp)
349 		xchk_buffer_recheck(ds->sc, blk->bp);
350 
351 	/*
352 	 * We didn't find a dir btree root block, which means that
353 	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
354 	 * to be), so jump out now.
355 	 */
356 	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
357 			blk->bp == NULL)
358 		goto out_nobuf;
359 
360 	/* It's /not/ ok for attr trees not to have a da btree. */
361 	if (blk->bp == NULL) {
362 		xchk_da_set_corrupt(ds, level);
363 		goto out_nobuf;
364 	}
365 
366 	hdr3 = blk->bp->b_addr;
367 	blk->magic = be16_to_cpu(hdr3->hdr.magic);
368 	pmaxrecs = &ds->maxrecs[level];
369 
370 	/* We only started zeroing the header on v5 filesystems. */
371 	if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad)
372 		xchk_da_set_corrupt(ds, level);
373 
374 	/* Check the owner. */
375 	if (xfs_has_crc(ip->i_mount)) {
376 		owner = be64_to_cpu(hdr3->owner);
377 		if (owner != ip->i_ino)
378 			xchk_da_set_corrupt(ds, level);
379 	}
380 
381 	/* Check the siblings. */
382 	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
383 	if (error)
384 		goto out;
385 
386 	/* Interpret the buffer. */
387 	switch (blk->magic) {
388 	case XFS_ATTR_LEAF_MAGIC:
389 	case XFS_ATTR3_LEAF_MAGIC:
390 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
391 				XFS_BLFT_ATTR_LEAF_BUF);
392 		blk->magic = XFS_ATTR_LEAF_MAGIC;
393 		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
394 		if (ds->tree_level != 0)
395 			xchk_da_set_corrupt(ds, level);
396 		break;
397 	case XFS_DIR2_LEAFN_MAGIC:
398 	case XFS_DIR3_LEAFN_MAGIC:
399 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
400 				XFS_BLFT_DIR_LEAFN_BUF);
401 		blk->magic = XFS_DIR2_LEAFN_MAGIC;
402 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
403 		if (ds->tree_level != 0)
404 			xchk_da_set_corrupt(ds, level);
405 		break;
406 	case XFS_DIR2_LEAF1_MAGIC:
407 	case XFS_DIR3_LEAF1_MAGIC:
408 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
409 				XFS_BLFT_DIR_LEAF1_BUF);
410 		blk->magic = XFS_DIR2_LEAF1_MAGIC;
411 		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
412 		if (ds->tree_level != 0)
413 			xchk_da_set_corrupt(ds, level);
414 		break;
415 	case XFS_DA_NODE_MAGIC:
416 	case XFS_DA3_NODE_MAGIC:
417 		xfs_trans_buf_set_type(dargs->trans, blk->bp,
418 				XFS_BLFT_DA_NODE_BUF);
419 		blk->magic = XFS_DA_NODE_MAGIC;
420 		node = blk->bp->b_addr;
421 		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
422 		btree = nodehdr.btree;
423 		*pmaxrecs = nodehdr.count;
424 		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
425 		if (level == 0) {
426 			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
427 				xchk_da_set_corrupt(ds, level);
428 				goto out_freebp;
429 			}
430 			ds->tree_level = nodehdr.level;
431 		} else {
432 			if (ds->tree_level != nodehdr.level) {
433 				xchk_da_set_corrupt(ds, level);
434 				goto out_freebp;
435 			}
436 		}
437 
438 		/* XXX: Check hdr3.pad32 once we know how to fix it. */
439 		break;
440 	default:
441 		xchk_da_set_corrupt(ds, level);
442 		goto out_freebp;
443 	}
444 
445 	/*
446 	 * If we've been handed a block that is below the dabtree root, does
447 	 * its hashval match what the parent block expected to see?
448 	 */
449 	if (level > 0) {
450 		struct xfs_da_node_entry	*key;
451 
452 		key = xchk_da_btree_node_entry(ds, level - 1);
453 		if (be32_to_cpu(key->hashval) != blk->hashval) {
454 			xchk_da_set_corrupt(ds, level);
455 			goto out_freebp;
456 		}
457 	}
458 
459 out:
460 	return error;
461 out_freebp:
462 	xfs_trans_brelse(dargs->trans, blk->bp);
463 	blk->bp = NULL;
464 out_nobuf:
465 	blk->blkno = 0;
466 	return error;
467 }
468 
469 /* Visit all nodes and leaves of a da btree. */
470 int
471 xchk_da_btree(
472 	struct xfs_scrub		*sc,
473 	int				whichfork,
474 	xchk_da_btree_rec_fn		scrub_fn,
475 	void				*private)
476 {
477 	struct xchk_da_btree		*ds;
478 	struct xfs_mount		*mp = sc->mp;
479 	struct xfs_da_state_blk		*blks;
480 	struct xfs_da_node_entry	*key;
481 	xfs_dablk_t			blkno;
482 	int				level;
483 	int				error;
484 
485 	/* Skip short format data structures; no btree to scan. */
486 	if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork)))
487 		return 0;
488 
489 	/* Set up initial da state. */
490 	ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
491 	if (!ds)
492 		return -ENOMEM;
493 	ds->dargs.dp = sc->ip;
494 	ds->dargs.whichfork = whichfork;
495 	ds->dargs.trans = sc->tp;
496 	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
497 	ds->state = xfs_da_state_alloc(&ds->dargs);
498 	ds->sc = sc;
499 	ds->private = private;
500 	if (whichfork == XFS_ATTR_FORK) {
501 		ds->dargs.geo = mp->m_attr_geo;
502 		ds->lowest = 0;
503 		ds->highest = 0;
504 	} else {
505 		ds->dargs.geo = mp->m_dir_geo;
506 		ds->lowest = ds->dargs.geo->leafblk;
507 		ds->highest = ds->dargs.geo->freeblk;
508 	}
509 	blkno = ds->lowest;
510 	level = 0;
511 
512 	/* Find the root of the da tree, if present. */
513 	blks = ds->state->path.blk;
514 	error = xchk_da_btree_block(ds, level, blkno);
515 	if (error)
516 		goto out_state;
517 	/*
518 	 * We didn't find a block at ds->lowest, which means that there's
519 	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
520 	 * so jump out now.
521 	 */
522 	if (blks[level].bp == NULL)
523 		goto out_state;
524 
525 	blks[level].index = 0;
526 	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
527 		/* Handle leaf block. */
528 		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
529 			/* End of leaf, pop back towards the root. */
530 			if (blks[level].index >= ds->maxrecs[level]) {
531 				if (level > 0)
532 					blks[level - 1].index++;
533 				ds->tree_level++;
534 				level--;
535 				continue;
536 			}
537 
538 			/* Dispatch record scrubbing. */
539 			error = scrub_fn(ds, level);
540 			if (error)
541 				break;
542 			if (xchk_should_terminate(sc, &error) ||
543 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
544 				break;
545 
546 			blks[level].index++;
547 			continue;
548 		}
549 
550 
551 		/* End of node, pop back towards the root. */
552 		if (blks[level].index >= ds->maxrecs[level]) {
553 			if (level > 0)
554 				blks[level - 1].index++;
555 			ds->tree_level++;
556 			level--;
557 			continue;
558 		}
559 
560 		/* Hashes in order for scrub? */
561 		key = xchk_da_btree_node_entry(ds, level);
562 		error = xchk_da_btree_hash(ds, level, &key->hashval);
563 		if (error)
564 			goto out;
565 
566 		/* Drill another level deeper. */
567 		blkno = be32_to_cpu(key->before);
568 		level++;
569 		if (level >= XFS_DA_NODE_MAXDEPTH) {
570 			/* Too deep! */
571 			xchk_da_set_corrupt(ds, level - 1);
572 			break;
573 		}
574 		ds->tree_level--;
575 		error = xchk_da_btree_block(ds, level, blkno);
576 		if (error)
577 			goto out;
578 		if (blks[level].bp == NULL)
579 			goto out;
580 
581 		blks[level].index = 0;
582 	}
583 
584 out:
585 	/* Release all the buffers we're tracking. */
586 	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
587 		if (blks[level].bp == NULL)
588 			continue;
589 		xfs_trans_brelse(sc->tp, blks[level].bp);
590 		blks[level].bp = NULL;
591 	}
592 
593 out_state:
594 	xfs_da_state_free(ds->state);
595 	kfree(ds);
596 	return error;
597 }
598