1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
18
19 /* btree scrubbing */
20
21 /*
22 * Check for btree operation errors. See the section about handling
23 * operational errors in common.c.
24 */
25 static bool
__xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error,__u32 errflag,void * ret_ip)26 __xchk_btree_process_error(
27 struct xfs_scrub *sc,
28 struct xfs_btree_cur *cur,
29 int level,
30 int *error,
31 __u32 errflag,
32 void *ret_ip)
33 {
34 if (*error == 0)
35 return true;
36
37 switch (*error) {
38 case -EDEADLOCK:
39 case -ECHRNG:
40 /* Used to restart an op with deadlock avoidance. */
41 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
42 break;
43 case -EFSBADCRC:
44 case -EFSCORRUPTED:
45 case -EIO:
46 case -ENODATA:
47 /* Note the badness but don't abort. */
48 sc->sm->sm_flags |= errflag;
49 *error = 0;
50 fallthrough;
51 default:
52 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
53 trace_xchk_ifork_btree_op_error(sc, cur, level,
54 *error, ret_ip);
55 else
56 trace_xchk_btree_op_error(sc, cur, level,
57 *error, ret_ip);
58 break;
59 }
60 return false;
61 }
62
63 bool
xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)64 xchk_btree_process_error(
65 struct xfs_scrub *sc,
66 struct xfs_btree_cur *cur,
67 int level,
68 int *error)
69 {
70 return __xchk_btree_process_error(sc, cur, level, error,
71 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
72 }
73
74 bool
xchk_btree_xref_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)75 xchk_btree_xref_process_error(
76 struct xfs_scrub *sc,
77 struct xfs_btree_cur *cur,
78 int level,
79 int *error)
80 {
81 return __xchk_btree_process_error(sc, cur, level, error,
82 XFS_SCRUB_OFLAG_XFAIL, __return_address);
83 }
84
85 /* Record btree block corruption. */
86 static void
__xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,__u32 errflag,void * ret_ip)87 __xchk_btree_set_corrupt(
88 struct xfs_scrub *sc,
89 struct xfs_btree_cur *cur,
90 int level,
91 __u32 errflag,
92 void *ret_ip)
93 {
94 sc->sm->sm_flags |= errflag;
95
96 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
97 trace_xchk_ifork_btree_error(sc, cur, level,
98 ret_ip);
99 else
100 trace_xchk_btree_error(sc, cur, level,
101 ret_ip);
102 }
103
104 void
xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)105 xchk_btree_set_corrupt(
106 struct xfs_scrub *sc,
107 struct xfs_btree_cur *cur,
108 int level)
109 {
110 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
111 __return_address);
112 }
113
114 void
xchk_btree_xref_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)115 xchk_btree_xref_set_corrupt(
116 struct xfs_scrub *sc,
117 struct xfs_btree_cur *cur,
118 int level)
119 {
120 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
121 __return_address);
122 }
123
124 void
xchk_btree_set_preen(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)125 xchk_btree_set_preen(
126 struct xfs_scrub *sc,
127 struct xfs_btree_cur *cur,
128 int level)
129 {
130 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
131 __return_address);
132 }
133
134 /*
135 * Make sure this record is in order and doesn't stray outside of the parent
136 * keys.
137 */
138 STATIC void
xchk_btree_rec(struct xchk_btree * bs)139 xchk_btree_rec(
140 struct xchk_btree *bs)
141 {
142 struct xfs_btree_cur *cur = bs->cur;
143 union xfs_btree_rec *rec;
144 union xfs_btree_key key;
145 union xfs_btree_key hkey;
146 union xfs_btree_key *keyp;
147 struct xfs_btree_block *block;
148 struct xfs_btree_block *keyblock;
149 struct xfs_buf *bp;
150
151 block = xfs_btree_get_block(cur, 0, &bp);
152 rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
153
154 trace_xchk_btree_rec(bs->sc, cur, 0);
155
156 /* Are all records across all record blocks in order? */
157 if (bs->lastrec_valid &&
158 !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
159 xchk_btree_set_corrupt(bs->sc, cur, 0);
160 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
161 bs->lastrec_valid = true;
162
163 if (cur->bc_nlevels == 1)
164 return;
165
166 /* Is low_key(rec) at least as large as the parent low key? */
167 cur->bc_ops->init_key_from_rec(&key, rec);
168 keyblock = xfs_btree_get_block(cur, 1, &bp);
169 keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
170 if (xfs_btree_keycmp_lt(cur, &key, keyp))
171 xchk_btree_set_corrupt(bs->sc, cur, 1);
172
173 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
174 return;
175
176 /* Is high_key(rec) no larger than the parent high key? */
177 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
178 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
179 if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
180 xchk_btree_set_corrupt(bs->sc, cur, 1);
181 }
182
183 /*
184 * Make sure this key is in order and doesn't stray outside of the parent
185 * keys.
186 */
187 STATIC void
xchk_btree_key(struct xchk_btree * bs,int level)188 xchk_btree_key(
189 struct xchk_btree *bs,
190 int level)
191 {
192 struct xfs_btree_cur *cur = bs->cur;
193 union xfs_btree_key *key;
194 union xfs_btree_key *keyp;
195 struct xfs_btree_block *block;
196 struct xfs_btree_block *keyblock;
197 struct xfs_buf *bp;
198
199 block = xfs_btree_get_block(cur, level, &bp);
200 key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
201
202 trace_xchk_btree_key(bs->sc, cur, level);
203
204 /* Are all low keys across all node blocks in order? */
205 if (bs->lastkey[level - 1].valid &&
206 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
207 xchk_btree_set_corrupt(bs->sc, cur, level);
208 memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
209 bs->lastkey[level - 1].valid = true;
210
211 if (level + 1 >= cur->bc_nlevels)
212 return;
213
214 /* Is this block's low key at least as large as the parent low key? */
215 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
216 keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
217 if (xfs_btree_keycmp_lt(cur, key, keyp))
218 xchk_btree_set_corrupt(bs->sc, cur, level);
219
220 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
221 return;
222
223 /* Is this block's high key no larger than the parent high key? */
224 key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
225 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
226 keyblock);
227 if (xfs_btree_keycmp_lt(cur, keyp, key))
228 xchk_btree_set_corrupt(bs->sc, cur, level);
229 }
230
231 /*
232 * Check a btree pointer. Returns true if it's ok to use this pointer.
233 * Callers do not need to set the corrupt flag.
234 */
235 static bool
xchk_btree_ptr_ok(struct xchk_btree * bs,int level,union xfs_btree_ptr * ptr)236 xchk_btree_ptr_ok(
237 struct xchk_btree *bs,
238 int level,
239 union xfs_btree_ptr *ptr)
240 {
241 /* A btree rooted in an inode has no block pointer to the root. */
242 if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
243 level == bs->cur->bc_nlevels)
244 return true;
245
246 /* Otherwise, check the pointers. */
247 if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
248 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
249 return false;
250 }
251
252 return true;
253 }
254
255 /* Check that a btree block's sibling matches what we expect it. */
256 STATIC int
xchk_btree_block_check_sibling(struct xchk_btree * bs,int level,int direction,union xfs_btree_ptr * sibling)257 xchk_btree_block_check_sibling(
258 struct xchk_btree *bs,
259 int level,
260 int direction,
261 union xfs_btree_ptr *sibling)
262 {
263 struct xfs_btree_cur *cur = bs->cur;
264 struct xfs_btree_block *pblock;
265 struct xfs_buf *pbp;
266 struct xfs_btree_cur *ncur = NULL;
267 union xfs_btree_ptr *pp;
268 int success;
269 int error;
270
271 error = xfs_btree_dup_cursor(cur, &ncur);
272 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
273 !ncur)
274 return error;
275
276 /*
277 * If the pointer is null, we shouldn't be able to move the upper
278 * level pointer anywhere.
279 */
280 if (xfs_btree_ptr_is_null(cur, sibling)) {
281 if (direction > 0)
282 error = xfs_btree_increment(ncur, level + 1, &success);
283 else
284 error = xfs_btree_decrement(ncur, level + 1, &success);
285 if (error == 0 && success)
286 xchk_btree_set_corrupt(bs->sc, cur, level);
287 error = 0;
288 goto out;
289 }
290
291 /* Increment upper level pointer. */
292 if (direction > 0)
293 error = xfs_btree_increment(ncur, level + 1, &success);
294 else
295 error = xfs_btree_decrement(ncur, level + 1, &success);
296 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
297 goto out;
298 if (!success) {
299 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
300 goto out;
301 }
302
303 /* Compare upper level pointer to sibling pointer. */
304 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
305 pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
306 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
307 goto out;
308 if (pbp)
309 xchk_buffer_recheck(bs->sc, pbp);
310
311 if (xfs_btree_cmp_two_ptrs(cur, pp, sibling))
312 xchk_btree_set_corrupt(bs->sc, cur, level);
313 out:
314 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
315 return error;
316 }
317
318 /* Check the siblings of a btree block. */
319 STATIC int
xchk_btree_block_check_siblings(struct xchk_btree * bs,struct xfs_btree_block * block)320 xchk_btree_block_check_siblings(
321 struct xchk_btree *bs,
322 struct xfs_btree_block *block)
323 {
324 struct xfs_btree_cur *cur = bs->cur;
325 union xfs_btree_ptr leftsib;
326 union xfs_btree_ptr rightsib;
327 int level;
328 int error = 0;
329
330 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
331 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
332 level = xfs_btree_get_level(block);
333
334 /* Root block should never have siblings. */
335 if (level == cur->bc_nlevels - 1) {
336 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
337 !xfs_btree_ptr_is_null(cur, &rightsib))
338 xchk_btree_set_corrupt(bs->sc, cur, level);
339 goto out;
340 }
341
342 /*
343 * Does the left & right sibling pointers match the adjacent
344 * parent level pointers?
345 * (These function absorbs error codes for us.)
346 */
347 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
348 if (error)
349 return error;
350 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
351 if (error)
352 return error;
353 out:
354 return error;
355 }
356
357 struct check_owner {
358 struct list_head list;
359 xfs_daddr_t daddr;
360 int level;
361 };
362
363 /*
364 * Make sure this btree block isn't in the free list and that there's
365 * an rmap record for it.
366 */
367 STATIC int
xchk_btree_check_block_owner(struct xchk_btree * bs,int level,xfs_daddr_t daddr)368 xchk_btree_check_block_owner(
369 struct xchk_btree *bs,
370 int level,
371 xfs_daddr_t daddr)
372 {
373 xfs_agnumber_t agno;
374 xfs_agblock_t agbno;
375 bool is_bnobt, is_rmapbt;
376 bool init_sa;
377 int error = 0;
378
379 if (!bs->cur)
380 return 0;
381
382 is_bnobt = xfs_btree_is_bno(bs->cur->bc_ops);
383 is_rmapbt = xfs_btree_is_rmap(bs->cur->bc_ops);
384 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
385 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
386
387 /*
388 * If the btree being examined is not itself a per-AG btree, initialize
389 * sc->sa so that we can check for the presence of an ownership record
390 * in the rmap btree for the AG containing the block.
391 */
392 init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG;
393 if (init_sa) {
394 error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
395 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
396 level, &error))
397 goto out_free;
398 }
399
400 xchk_xref_is_used_space(bs->sc, agbno, 1);
401 /*
402 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
403 * have to nullify it (to shut down further block owner checks) if
404 * self-xref encounters problems.
405 */
406 if (!bs->sc->sa.bno_cur && is_bnobt)
407 bs->cur = NULL;
408
409 xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
410 if (!bs->sc->sa.rmap_cur && is_rmapbt)
411 bs->cur = NULL;
412
413 out_free:
414 if (init_sa)
415 xchk_ag_free(bs->sc, &bs->sc->sa);
416
417 return error;
418 }
419
420 /* Check the owner of a btree block. */
421 STATIC int
xchk_btree_check_owner(struct xchk_btree * bs,int level,struct xfs_buf * bp)422 xchk_btree_check_owner(
423 struct xchk_btree *bs,
424 int level,
425 struct xfs_buf *bp)
426 {
427 struct xfs_btree_cur *cur = bs->cur;
428
429 /*
430 * In theory, xfs_btree_get_block should only give us a null buffer
431 * pointer for the root of a root-in-inode btree type, but we need
432 * to check defensively here in case the cursor state is also screwed
433 * up.
434 */
435 if (bp == NULL) {
436 if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
437 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
438 return 0;
439 }
440
441 /*
442 * We want to cross-reference each btree block with the bnobt
443 * and the rmapbt. We cannot cross-reference the bnobt or
444 * rmapbt while scanning the bnobt or rmapbt, respectively,
445 * because we cannot alter the cursor and we'd prefer not to
446 * duplicate cursors. Therefore, save the buffer daddr for
447 * later scanning.
448 */
449 if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) {
450 struct check_owner *co;
451
452 co = kmalloc_obj(struct check_owner, XCHK_GFP_FLAGS);
453 if (!co)
454 return -ENOMEM;
455
456 INIT_LIST_HEAD(&co->list);
457 co->level = level;
458 co->daddr = xfs_buf_daddr(bp);
459 list_add_tail(&co->list, &bs->to_check);
460 return 0;
461 }
462
463 return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
464 }
465
466 /* Decide if we want to check minrecs of a btree block in the inode root. */
467 static inline bool
xchk_btree_check_iroot_minrecs(struct xchk_btree * bs)468 xchk_btree_check_iroot_minrecs(
469 struct xchk_btree *bs)
470 {
471 /*
472 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
473 * would miscalculate the space required for the data fork bmbt root
474 * when adding an attr fork, and promote the iroot contents to an
475 * external block unnecessarily. This went unnoticed for many years
476 * until scrub found filesystems in this state. Inode rooted btrees are
477 * not supposed to have immediate child blocks that are small enough
478 * that the contents could fit in the inode root, but we can't fail
479 * existing filesystems, so instead we disable the check for data fork
480 * bmap btrees when there's an attr fork.
481 */
482 if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
483 bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
484 xfs_inode_has_attr_fork(bs->sc->ip))
485 return false;
486
487 return true;
488 }
489
490 /*
491 * Check that this btree block has at least minrecs records or is one of the
492 * special blocks that don't require that.
493 */
494 STATIC void
xchk_btree_check_minrecs(struct xchk_btree * bs,int level,struct xfs_btree_block * block)495 xchk_btree_check_minrecs(
496 struct xchk_btree *bs,
497 int level,
498 struct xfs_btree_block *block)
499 {
500 struct xfs_btree_cur *cur = bs->cur;
501 unsigned int root_level = cur->bc_nlevels - 1;
502 unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
503
504 /* More records than minrecs means the block is ok. */
505 if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
506 return;
507
508 /*
509 * For btrees rooted in the inode, it's possible that the root block
510 * contents spilled into a regular ondisk block because there wasn't
511 * enough space in the inode root. The number of records in that
512 * child block might be less than the standard minrecs, but that's ok
513 * provided that there's only one direct child of the root.
514 */
515 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
516 level == cur->bc_nlevels - 2) {
517 struct xfs_btree_block *root_block;
518 struct xfs_buf *root_bp;
519 int root_maxrecs;
520
521 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
522 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
523 if (xchk_btree_check_iroot_minrecs(bs) &&
524 (be16_to_cpu(root_block->bb_numrecs) != 1 ||
525 numrecs <= root_maxrecs))
526 xchk_btree_set_corrupt(bs->sc, cur, level);
527 return;
528 }
529
530 /*
531 * Otherwise, only the root level is allowed to have fewer than minrecs
532 * records or keyptrs.
533 */
534 if (level < root_level)
535 xchk_btree_set_corrupt(bs->sc, cur, level);
536 }
537
538 /*
539 * If this btree block has a parent, make sure that the parent's keys capture
540 * the keyspace contained in this block.
541 */
542 STATIC void
xchk_btree_block_check_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)543 xchk_btree_block_check_keys(
544 struct xchk_btree *bs,
545 int level,
546 struct xfs_btree_block *block)
547 {
548 union xfs_btree_key block_key;
549 union xfs_btree_key *block_high_key;
550 union xfs_btree_key *parent_low_key, *parent_high_key;
551 struct xfs_btree_cur *cur = bs->cur;
552 struct xfs_btree_block *parent_block;
553 struct xfs_buf *bp;
554
555 if (level == cur->bc_nlevels - 1)
556 return;
557
558 xfs_btree_get_keys(cur, block, &block_key);
559
560 /* Make sure the low key of this block matches the parent. */
561 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
562 parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
563 parent_block);
564 if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
565 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
566 return;
567 }
568
569 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
570 return;
571
572 /* Make sure the high key of this block matches the parent. */
573 parent_high_key = xfs_btree_high_key_addr(cur,
574 cur->bc_levels[level + 1].ptr, parent_block);
575 block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
576 if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
577 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
578 }
579
580 /*
581 * Grab and scrub a btree block given a btree pointer. Returns block
582 * and buffer pointers (if applicable) if they're ok to use.
583 */
584 STATIC int
xchk_btree_get_block(struct xchk_btree * bs,int level,union xfs_btree_ptr * pp,struct xfs_btree_block ** pblock,struct xfs_buf ** pbp)585 xchk_btree_get_block(
586 struct xchk_btree *bs,
587 int level,
588 union xfs_btree_ptr *pp,
589 struct xfs_btree_block **pblock,
590 struct xfs_buf **pbp)
591 {
592 int error;
593
594 *pblock = NULL;
595 *pbp = NULL;
596
597 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
598 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
599 !*pblock)
600 return error;
601
602 xfs_btree_get_block(bs->cur, level, pbp);
603 if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
604 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
605 return 0;
606 }
607 if (*pbp)
608 xchk_buffer_recheck(bs->sc, *pbp);
609
610 xchk_btree_check_minrecs(bs, level, *pblock);
611
612 /*
613 * Check the block's owner; this function absorbs error codes
614 * for us.
615 */
616 error = xchk_btree_check_owner(bs, level, *pbp);
617 if (error)
618 return error;
619
620 /*
621 * Check the block's siblings; this function absorbs error codes
622 * for us.
623 */
624 error = xchk_btree_block_check_siblings(bs, *pblock);
625 if (error)
626 return error;
627
628 xchk_btree_block_check_keys(bs, level, *pblock);
629 return 0;
630 }
631
632 /*
633 * Check that the low and high keys of this block match the keys stored
634 * in the parent block.
635 */
636 STATIC void
xchk_btree_block_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)637 xchk_btree_block_keys(
638 struct xchk_btree *bs,
639 int level,
640 struct xfs_btree_block *block)
641 {
642 union xfs_btree_key block_keys;
643 struct xfs_btree_cur *cur = bs->cur;
644 union xfs_btree_key *high_bk;
645 union xfs_btree_key *parent_keys;
646 union xfs_btree_key *high_pk;
647 struct xfs_btree_block *parent_block;
648 struct xfs_buf *bp;
649
650 if (level >= cur->bc_nlevels - 1)
651 return;
652
653 /* Calculate the keys for this block. */
654 xfs_btree_get_keys(cur, block, &block_keys);
655
656 /* Obtain the parent's copy of the keys for this block. */
657 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
658 parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
659 parent_block);
660
661 if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
662 xchk_btree_set_corrupt(bs->sc, cur, 1);
663
664 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
665 return;
666
667 /* Get high keys */
668 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
669 high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
670 parent_block);
671
672 if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
673 xchk_btree_set_corrupt(bs->sc, cur, 1);
674 }
675
676 /*
677 * Visit all nodes and leaves of a btree. Check that all pointers and
678 * records are in order, that the keys reflect the records, and use a callback
679 * so that the caller can verify individual records.
680 */
681 int
xchk_btree(struct xfs_scrub * sc,struct xfs_btree_cur * cur,xchk_btree_rec_fn scrub_fn,const struct xfs_owner_info * oinfo,void * private)682 xchk_btree(
683 struct xfs_scrub *sc,
684 struct xfs_btree_cur *cur,
685 xchk_btree_rec_fn scrub_fn,
686 const struct xfs_owner_info *oinfo,
687 void *private)
688 {
689 union xfs_btree_ptr ptr;
690 struct xchk_btree *bs;
691 union xfs_btree_ptr *pp;
692 union xfs_btree_rec *recp;
693 struct xfs_btree_block *block;
694 struct xfs_buf *bp;
695 struct check_owner *co;
696 struct check_owner *n;
697 size_t cur_sz;
698 int level;
699 int error = 0;
700
701 /*
702 * Allocate the btree scrub context from the heap, because this
703 * structure can get rather large. Don't let a caller feed us a
704 * totally absurd size.
705 */
706 cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
707 if (cur_sz > PAGE_SIZE) {
708 xchk_btree_set_corrupt(sc, cur, 0);
709 return 0;
710 }
711 bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
712 if (!bs)
713 return -ENOMEM;
714 bs->cur = cur;
715 bs->scrub_rec = scrub_fn;
716 bs->oinfo = oinfo;
717 bs->private = private;
718 bs->sc = sc;
719
720 /* Initialize scrub state */
721 INIT_LIST_HEAD(&bs->to_check);
722
723 /*
724 * Load the root of the btree. The helper function absorbs
725 * error codes for us.
726 */
727 level = cur->bc_nlevels - 1;
728 xfs_btree_init_ptr_from_cur(cur, &ptr);
729 if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
730 goto out;
731 error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
732 if (error || !block)
733 goto out;
734
735 cur->bc_levels[level].ptr = 1;
736
737 while (level < cur->bc_nlevels) {
738 block = xfs_btree_get_block(cur, level, &bp);
739
740 if (level == 0) {
741 /* End of leaf, pop back towards the root. */
742 if (cur->bc_levels[level].ptr >
743 be16_to_cpu(block->bb_numrecs)) {
744 xchk_btree_block_keys(bs, level, block);
745 if (level < cur->bc_nlevels - 1)
746 cur->bc_levels[level + 1].ptr++;
747 level++;
748 continue;
749 }
750
751 /* Records in order for scrub? */
752 xchk_btree_rec(bs);
753
754 /* Call out to the record checker. */
755 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
756 block);
757 error = bs->scrub_rec(bs, recp);
758 if (error)
759 break;
760 if (xchk_should_terminate(sc, &error) ||
761 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
762 break;
763
764 cur->bc_levels[level].ptr++;
765 continue;
766 }
767
768 /* End of node, pop back towards the root. */
769 if (cur->bc_levels[level].ptr >
770 be16_to_cpu(block->bb_numrecs)) {
771 xchk_btree_block_keys(bs, level, block);
772 if (level < cur->bc_nlevels - 1)
773 cur->bc_levels[level + 1].ptr++;
774 level++;
775 continue;
776 }
777
778 /* Keys in order for scrub? */
779 xchk_btree_key(bs, level);
780
781 /* Drill another level deeper. */
782 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
783 if (!xchk_btree_ptr_ok(bs, level, pp)) {
784 cur->bc_levels[level].ptr++;
785 continue;
786 }
787 level--;
788 error = xchk_btree_get_block(bs, level, pp, &block, &bp);
789 if (error || !block)
790 goto out;
791
792 cur->bc_levels[level].ptr = 1;
793 }
794
795 out:
796 /* Process deferred owner checks on btree blocks. */
797 list_for_each_entry_safe(co, n, &bs->to_check, list) {
798 if (!error && bs->cur)
799 error = xchk_btree_check_block_owner(bs, co->level,
800 co->daddr);
801 list_del(&co->list);
802 kfree(co);
803 }
804 kfree(bs);
805
806 return error;
807 }
808