1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
18
19 /* btree scrubbing */
20
21 /*
22 * Check for btree operation errors. See the section about handling
23 * operational errors in common.c.
24 */
25 static bool
__xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error,__u32 errflag,void * ret_ip)26 __xchk_btree_process_error(
27 struct xfs_scrub *sc,
28 struct xfs_btree_cur *cur,
29 int level,
30 int *error,
31 __u32 errflag,
32 void *ret_ip)
33 {
34 if (*error == 0)
35 return true;
36
37 switch (*error) {
38 case -EDEADLOCK:
39 case -ECHRNG:
40 /* Used to restart an op with deadlock avoidance. */
41 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
42 break;
43 case -EFSBADCRC:
44 case -EFSCORRUPTED:
45 /* Note the badness but don't abort. */
46 sc->sm->sm_flags |= errflag;
47 *error = 0;
48 fallthrough;
49 default:
50 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
51 trace_xchk_ifork_btree_op_error(sc, cur, level,
52 *error, ret_ip);
53 else
54 trace_xchk_btree_op_error(sc, cur, level,
55 *error, ret_ip);
56 break;
57 }
58 return false;
59 }
60
61 bool
xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)62 xchk_btree_process_error(
63 struct xfs_scrub *sc,
64 struct xfs_btree_cur *cur,
65 int level,
66 int *error)
67 {
68 return __xchk_btree_process_error(sc, cur, level, error,
69 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
70 }
71
72 bool
xchk_btree_xref_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)73 xchk_btree_xref_process_error(
74 struct xfs_scrub *sc,
75 struct xfs_btree_cur *cur,
76 int level,
77 int *error)
78 {
79 return __xchk_btree_process_error(sc, cur, level, error,
80 XFS_SCRUB_OFLAG_XFAIL, __return_address);
81 }
82
83 /* Record btree block corruption. */
84 static void
__xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,__u32 errflag,void * ret_ip)85 __xchk_btree_set_corrupt(
86 struct xfs_scrub *sc,
87 struct xfs_btree_cur *cur,
88 int level,
89 __u32 errflag,
90 void *ret_ip)
91 {
92 sc->sm->sm_flags |= errflag;
93
94 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
95 trace_xchk_ifork_btree_error(sc, cur, level,
96 ret_ip);
97 else
98 trace_xchk_btree_error(sc, cur, level,
99 ret_ip);
100 }
101
102 void
xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)103 xchk_btree_set_corrupt(
104 struct xfs_scrub *sc,
105 struct xfs_btree_cur *cur,
106 int level)
107 {
108 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
109 __return_address);
110 }
111
112 void
xchk_btree_xref_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)113 xchk_btree_xref_set_corrupt(
114 struct xfs_scrub *sc,
115 struct xfs_btree_cur *cur,
116 int level)
117 {
118 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
119 __return_address);
120 }
121
122 void
xchk_btree_set_preen(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)123 xchk_btree_set_preen(
124 struct xfs_scrub *sc,
125 struct xfs_btree_cur *cur,
126 int level)
127 {
128 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
129 __return_address);
130 }
131
132 /*
133 * Make sure this record is in order and doesn't stray outside of the parent
134 * keys.
135 */
136 STATIC void
xchk_btree_rec(struct xchk_btree * bs)137 xchk_btree_rec(
138 struct xchk_btree *bs)
139 {
140 struct xfs_btree_cur *cur = bs->cur;
141 union xfs_btree_rec *rec;
142 union xfs_btree_key key;
143 union xfs_btree_key hkey;
144 union xfs_btree_key *keyp;
145 struct xfs_btree_block *block;
146 struct xfs_btree_block *keyblock;
147 struct xfs_buf *bp;
148
149 block = xfs_btree_get_block(cur, 0, &bp);
150 rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
151
152 trace_xchk_btree_rec(bs->sc, cur, 0);
153
154 /* Are all records across all record blocks in order? */
155 if (bs->lastrec_valid &&
156 !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
157 xchk_btree_set_corrupt(bs->sc, cur, 0);
158 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
159 bs->lastrec_valid = true;
160
161 if (cur->bc_nlevels == 1)
162 return;
163
164 /* Is low_key(rec) at least as large as the parent low key? */
165 cur->bc_ops->init_key_from_rec(&key, rec);
166 keyblock = xfs_btree_get_block(cur, 1, &bp);
167 keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
168 if (xfs_btree_keycmp_lt(cur, &key, keyp))
169 xchk_btree_set_corrupt(bs->sc, cur, 1);
170
171 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
172 return;
173
174 /* Is high_key(rec) no larger than the parent high key? */
175 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
176 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
177 if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
178 xchk_btree_set_corrupt(bs->sc, cur, 1);
179 }
180
181 /*
182 * Make sure this key is in order and doesn't stray outside of the parent
183 * keys.
184 */
185 STATIC void
xchk_btree_key(struct xchk_btree * bs,int level)186 xchk_btree_key(
187 struct xchk_btree *bs,
188 int level)
189 {
190 struct xfs_btree_cur *cur = bs->cur;
191 union xfs_btree_key *key;
192 union xfs_btree_key *keyp;
193 struct xfs_btree_block *block;
194 struct xfs_btree_block *keyblock;
195 struct xfs_buf *bp;
196
197 block = xfs_btree_get_block(cur, level, &bp);
198 key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
199
200 trace_xchk_btree_key(bs->sc, cur, level);
201
202 /* Are all low keys across all node blocks in order? */
203 if (bs->lastkey[level - 1].valid &&
204 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
205 xchk_btree_set_corrupt(bs->sc, cur, level);
206 memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
207 bs->lastkey[level - 1].valid = true;
208
209 if (level + 1 >= cur->bc_nlevels)
210 return;
211
212 /* Is this block's low key at least as large as the parent low key? */
213 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
214 keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
215 if (xfs_btree_keycmp_lt(cur, key, keyp))
216 xchk_btree_set_corrupt(bs->sc, cur, level);
217
218 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
219 return;
220
221 /* Is this block's high key no larger than the parent high key? */
222 key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
223 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
224 keyblock);
225 if (xfs_btree_keycmp_lt(cur, keyp, key))
226 xchk_btree_set_corrupt(bs->sc, cur, level);
227 }
228
229 /*
230 * Check a btree pointer. Returns true if it's ok to use this pointer.
231 * Callers do not need to set the corrupt flag.
232 */
233 static bool
xchk_btree_ptr_ok(struct xchk_btree * bs,int level,union xfs_btree_ptr * ptr)234 xchk_btree_ptr_ok(
235 struct xchk_btree *bs,
236 int level,
237 union xfs_btree_ptr *ptr)
238 {
239 /* A btree rooted in an inode has no block pointer to the root. */
240 if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
241 level == bs->cur->bc_nlevels)
242 return true;
243
244 /* Otherwise, check the pointers. */
245 if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) {
246 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
247 return false;
248 }
249
250 return true;
251 }
252
253 /* Check that a btree block's sibling matches what we expect it. */
254 STATIC int
xchk_btree_block_check_sibling(struct xchk_btree * bs,int level,int direction,union xfs_btree_ptr * sibling)255 xchk_btree_block_check_sibling(
256 struct xchk_btree *bs,
257 int level,
258 int direction,
259 union xfs_btree_ptr *sibling)
260 {
261 struct xfs_btree_cur *cur = bs->cur;
262 struct xfs_btree_block *pblock;
263 struct xfs_buf *pbp;
264 struct xfs_btree_cur *ncur = NULL;
265 union xfs_btree_ptr *pp;
266 int success;
267 int error;
268
269 error = xfs_btree_dup_cursor(cur, &ncur);
270 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
271 !ncur)
272 return error;
273
274 /*
275 * If the pointer is null, we shouldn't be able to move the upper
276 * level pointer anywhere.
277 */
278 if (xfs_btree_ptr_is_null(cur, sibling)) {
279 if (direction > 0)
280 error = xfs_btree_increment(ncur, level + 1, &success);
281 else
282 error = xfs_btree_decrement(ncur, level + 1, &success);
283 if (error == 0 && success)
284 xchk_btree_set_corrupt(bs->sc, cur, level);
285 error = 0;
286 goto out;
287 }
288
289 /* Increment upper level pointer. */
290 if (direction > 0)
291 error = xfs_btree_increment(ncur, level + 1, &success);
292 else
293 error = xfs_btree_decrement(ncur, level + 1, &success);
294 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
295 goto out;
296 if (!success) {
297 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
298 goto out;
299 }
300
301 /* Compare upper level pointer to sibling pointer. */
302 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
303 pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
304 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
305 goto out;
306 if (pbp)
307 xchk_buffer_recheck(bs->sc, pbp);
308
309 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
310 xchk_btree_set_corrupt(bs->sc, cur, level);
311 out:
312 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
313 return error;
314 }
315
316 /* Check the siblings of a btree block. */
317 STATIC int
xchk_btree_block_check_siblings(struct xchk_btree * bs,struct xfs_btree_block * block)318 xchk_btree_block_check_siblings(
319 struct xchk_btree *bs,
320 struct xfs_btree_block *block)
321 {
322 struct xfs_btree_cur *cur = bs->cur;
323 union xfs_btree_ptr leftsib;
324 union xfs_btree_ptr rightsib;
325 int level;
326 int error = 0;
327
328 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
329 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
330 level = xfs_btree_get_level(block);
331
332 /* Root block should never have siblings. */
333 if (level == cur->bc_nlevels - 1) {
334 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
335 !xfs_btree_ptr_is_null(cur, &rightsib))
336 xchk_btree_set_corrupt(bs->sc, cur, level);
337 goto out;
338 }
339
340 /*
341 * Does the left & right sibling pointers match the adjacent
342 * parent level pointers?
343 * (These function absorbs error codes for us.)
344 */
345 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
346 if (error)
347 return error;
348 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
349 if (error)
350 return error;
351 out:
352 return error;
353 }
354
355 struct check_owner {
356 struct list_head list;
357 xfs_daddr_t daddr;
358 int level;
359 };
360
361 /*
362 * Make sure this btree block isn't in the free list and that there's
363 * an rmap record for it.
364 */
365 STATIC int
xchk_btree_check_block_owner(struct xchk_btree * bs,int level,xfs_daddr_t daddr)366 xchk_btree_check_block_owner(
367 struct xchk_btree *bs,
368 int level,
369 xfs_daddr_t daddr)
370 {
371 xfs_agnumber_t agno;
372 xfs_agblock_t agbno;
373 bool init_sa;
374 int error = 0;
375
376 if (!bs->cur)
377 return 0;
378
379 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
380 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
381
382 /*
383 * If the btree being examined is not itself a per-AG btree, initialize
384 * sc->sa so that we can check for the presence of an ownership record
385 * in the rmap btree for the AG containing the block.
386 */
387 init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG;
388 if (init_sa) {
389 error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
390 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
391 level, &error))
392 goto out_free;
393 }
394
395 xchk_xref_is_used_space(bs->sc, agbno, 1);
396 /*
397 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
398 * have to nullify it (to shut down further block owner checks) if
399 * self-xref encounters problems.
400 */
401 if (!bs->sc->sa.bno_cur && xfs_btree_is_bno(bs->cur->bc_ops))
402 bs->cur = NULL;
403
404 xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
405 if (!bs->sc->sa.rmap_cur && xfs_btree_is_rmap(bs->cur->bc_ops))
406 bs->cur = NULL;
407
408 out_free:
409 if (init_sa)
410 xchk_ag_free(bs->sc, &bs->sc->sa);
411
412 return error;
413 }
414
415 /* Check the owner of a btree block. */
416 STATIC int
xchk_btree_check_owner(struct xchk_btree * bs,int level,struct xfs_buf * bp)417 xchk_btree_check_owner(
418 struct xchk_btree *bs,
419 int level,
420 struct xfs_buf *bp)
421 {
422 struct xfs_btree_cur *cur = bs->cur;
423
424 /*
425 * In theory, xfs_btree_get_block should only give us a null buffer
426 * pointer for the root of a root-in-inode btree type, but we need
427 * to check defensively here in case the cursor state is also screwed
428 * up.
429 */
430 if (bp == NULL) {
431 if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE)
432 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
433 return 0;
434 }
435
436 /*
437 * We want to cross-reference each btree block with the bnobt
438 * and the rmapbt. We cannot cross-reference the bnobt or
439 * rmapbt while scanning the bnobt or rmapbt, respectively,
440 * because we cannot alter the cursor and we'd prefer not to
441 * duplicate cursors. Therefore, save the buffer daddr for
442 * later scanning.
443 */
444 if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) {
445 struct check_owner *co;
446
447 co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
448 if (!co)
449 return -ENOMEM;
450
451 INIT_LIST_HEAD(&co->list);
452 co->level = level;
453 co->daddr = xfs_buf_daddr(bp);
454 list_add_tail(&co->list, &bs->to_check);
455 return 0;
456 }
457
458 return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
459 }
460
461 /* Decide if we want to check minrecs of a btree block in the inode root. */
462 static inline bool
xchk_btree_check_iroot_minrecs(struct xchk_btree * bs)463 xchk_btree_check_iroot_minrecs(
464 struct xchk_btree *bs)
465 {
466 /*
467 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
468 * would miscalculate the space required for the data fork bmbt root
469 * when adding an attr fork, and promote the iroot contents to an
470 * external block unnecessarily. This went unnoticed for many years
471 * until scrub found filesystems in this state. Inode rooted btrees are
472 * not supposed to have immediate child blocks that are small enough
473 * that the contents could fit in the inode root, but we can't fail
474 * existing filesystems, so instead we disable the check for data fork
475 * bmap btrees when there's an attr fork.
476 */
477 if (xfs_btree_is_bmap(bs->cur->bc_ops) &&
478 bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
479 xfs_inode_has_attr_fork(bs->sc->ip))
480 return false;
481
482 return true;
483 }
484
485 /*
486 * Check that this btree block has at least minrecs records or is one of the
487 * special blocks that don't require that.
488 */
489 STATIC void
xchk_btree_check_minrecs(struct xchk_btree * bs,int level,struct xfs_btree_block * block)490 xchk_btree_check_minrecs(
491 struct xchk_btree *bs,
492 int level,
493 struct xfs_btree_block *block)
494 {
495 struct xfs_btree_cur *cur = bs->cur;
496 unsigned int root_level = cur->bc_nlevels - 1;
497 unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
498
499 /* More records than minrecs means the block is ok. */
500 if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
501 return;
502
503 /*
504 * For btrees rooted in the inode, it's possible that the root block
505 * contents spilled into a regular ondisk block because there wasn't
506 * enough space in the inode root. The number of records in that
507 * child block might be less than the standard minrecs, but that's ok
508 * provided that there's only one direct child of the root.
509 */
510 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
511 level == cur->bc_nlevels - 2) {
512 struct xfs_btree_block *root_block;
513 struct xfs_buf *root_bp;
514 int root_maxrecs;
515
516 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
517 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
518 if (xchk_btree_check_iroot_minrecs(bs) &&
519 (be16_to_cpu(root_block->bb_numrecs) != 1 ||
520 numrecs <= root_maxrecs))
521 xchk_btree_set_corrupt(bs->sc, cur, level);
522 return;
523 }
524
525 /*
526 * Otherwise, only the root level is allowed to have fewer than minrecs
527 * records or keyptrs.
528 */
529 if (level < root_level)
530 xchk_btree_set_corrupt(bs->sc, cur, level);
531 }
532
533 /*
534 * If this btree block has a parent, make sure that the parent's keys capture
535 * the keyspace contained in this block.
536 */
537 STATIC void
xchk_btree_block_check_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)538 xchk_btree_block_check_keys(
539 struct xchk_btree *bs,
540 int level,
541 struct xfs_btree_block *block)
542 {
543 union xfs_btree_key block_key;
544 union xfs_btree_key *block_high_key;
545 union xfs_btree_key *parent_low_key, *parent_high_key;
546 struct xfs_btree_cur *cur = bs->cur;
547 struct xfs_btree_block *parent_block;
548 struct xfs_buf *bp;
549
550 if (level == cur->bc_nlevels - 1)
551 return;
552
553 xfs_btree_get_keys(cur, block, &block_key);
554
555 /* Make sure the low key of this block matches the parent. */
556 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
557 parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
558 parent_block);
559 if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
560 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
561 return;
562 }
563
564 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
565 return;
566
567 /* Make sure the high key of this block matches the parent. */
568 parent_high_key = xfs_btree_high_key_addr(cur,
569 cur->bc_levels[level + 1].ptr, parent_block);
570 block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
571 if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
572 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
573 }
574
575 /*
576 * Grab and scrub a btree block given a btree pointer. Returns block
577 * and buffer pointers (if applicable) if they're ok to use.
578 */
579 STATIC int
xchk_btree_get_block(struct xchk_btree * bs,int level,union xfs_btree_ptr * pp,struct xfs_btree_block ** pblock,struct xfs_buf ** pbp)580 xchk_btree_get_block(
581 struct xchk_btree *bs,
582 int level,
583 union xfs_btree_ptr *pp,
584 struct xfs_btree_block **pblock,
585 struct xfs_buf **pbp)
586 {
587 int error;
588
589 *pblock = NULL;
590 *pbp = NULL;
591
592 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
593 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
594 !*pblock)
595 return error;
596
597 xfs_btree_get_block(bs->cur, level, pbp);
598 if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) {
599 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
600 return 0;
601 }
602 if (*pbp)
603 xchk_buffer_recheck(bs->sc, *pbp);
604
605 xchk_btree_check_minrecs(bs, level, *pblock);
606
607 /*
608 * Check the block's owner; this function absorbs error codes
609 * for us.
610 */
611 error = xchk_btree_check_owner(bs, level, *pbp);
612 if (error)
613 return error;
614
615 /*
616 * Check the block's siblings; this function absorbs error codes
617 * for us.
618 */
619 error = xchk_btree_block_check_siblings(bs, *pblock);
620 if (error)
621 return error;
622
623 xchk_btree_block_check_keys(bs, level, *pblock);
624 return 0;
625 }
626
627 /*
628 * Check that the low and high keys of this block match the keys stored
629 * in the parent block.
630 */
631 STATIC void
xchk_btree_block_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)632 xchk_btree_block_keys(
633 struct xchk_btree *bs,
634 int level,
635 struct xfs_btree_block *block)
636 {
637 union xfs_btree_key block_keys;
638 struct xfs_btree_cur *cur = bs->cur;
639 union xfs_btree_key *high_bk;
640 union xfs_btree_key *parent_keys;
641 union xfs_btree_key *high_pk;
642 struct xfs_btree_block *parent_block;
643 struct xfs_buf *bp;
644
645 if (level >= cur->bc_nlevels - 1)
646 return;
647
648 /* Calculate the keys for this block. */
649 xfs_btree_get_keys(cur, block, &block_keys);
650
651 /* Obtain the parent's copy of the keys for this block. */
652 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
653 parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
654 parent_block);
655
656 if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
657 xchk_btree_set_corrupt(bs->sc, cur, 1);
658
659 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
660 return;
661
662 /* Get high keys */
663 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
664 high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
665 parent_block);
666
667 if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
668 xchk_btree_set_corrupt(bs->sc, cur, 1);
669 }
670
671 /*
672 * Visit all nodes and leaves of a btree. Check that all pointers and
673 * records are in order, that the keys reflect the records, and use a callback
674 * so that the caller can verify individual records.
675 */
676 int
xchk_btree(struct xfs_scrub * sc,struct xfs_btree_cur * cur,xchk_btree_rec_fn scrub_fn,const struct xfs_owner_info * oinfo,void * private)677 xchk_btree(
678 struct xfs_scrub *sc,
679 struct xfs_btree_cur *cur,
680 xchk_btree_rec_fn scrub_fn,
681 const struct xfs_owner_info *oinfo,
682 void *private)
683 {
684 union xfs_btree_ptr ptr;
685 struct xchk_btree *bs;
686 union xfs_btree_ptr *pp;
687 union xfs_btree_rec *recp;
688 struct xfs_btree_block *block;
689 struct xfs_buf *bp;
690 struct check_owner *co;
691 struct check_owner *n;
692 size_t cur_sz;
693 int level;
694 int error = 0;
695
696 /*
697 * Allocate the btree scrub context from the heap, because this
698 * structure can get rather large. Don't let a caller feed us a
699 * totally absurd size.
700 */
701 cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
702 if (cur_sz > PAGE_SIZE) {
703 xchk_btree_set_corrupt(sc, cur, 0);
704 return 0;
705 }
706 bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
707 if (!bs)
708 return -ENOMEM;
709 bs->cur = cur;
710 bs->scrub_rec = scrub_fn;
711 bs->oinfo = oinfo;
712 bs->private = private;
713 bs->sc = sc;
714
715 /* Initialize scrub state */
716 INIT_LIST_HEAD(&bs->to_check);
717
718 /*
719 * Load the root of the btree. The helper function absorbs
720 * error codes for us.
721 */
722 level = cur->bc_nlevels - 1;
723 xfs_btree_init_ptr_from_cur(cur, &ptr);
724 if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
725 goto out;
726 error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
727 if (error || !block)
728 goto out;
729
730 cur->bc_levels[level].ptr = 1;
731
732 while (level < cur->bc_nlevels) {
733 block = xfs_btree_get_block(cur, level, &bp);
734
735 if (level == 0) {
736 /* End of leaf, pop back towards the root. */
737 if (cur->bc_levels[level].ptr >
738 be16_to_cpu(block->bb_numrecs)) {
739 xchk_btree_block_keys(bs, level, block);
740 if (level < cur->bc_nlevels - 1)
741 cur->bc_levels[level + 1].ptr++;
742 level++;
743 continue;
744 }
745
746 /* Records in order for scrub? */
747 xchk_btree_rec(bs);
748
749 /* Call out to the record checker. */
750 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
751 block);
752 error = bs->scrub_rec(bs, recp);
753 if (error)
754 break;
755 if (xchk_should_terminate(sc, &error) ||
756 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
757 break;
758
759 cur->bc_levels[level].ptr++;
760 continue;
761 }
762
763 /* End of node, pop back towards the root. */
764 if (cur->bc_levels[level].ptr >
765 be16_to_cpu(block->bb_numrecs)) {
766 xchk_btree_block_keys(bs, level, block);
767 if (level < cur->bc_nlevels - 1)
768 cur->bc_levels[level + 1].ptr++;
769 level++;
770 continue;
771 }
772
773 /* Keys in order for scrub? */
774 xchk_btree_key(bs, level);
775
776 /* Drill another level deeper. */
777 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
778 if (!xchk_btree_ptr_ok(bs, level, pp)) {
779 cur->bc_levels[level].ptr++;
780 continue;
781 }
782 level--;
783 error = xchk_btree_get_block(bs, level, pp, &block, &bp);
784 if (error || !block)
785 goto out;
786
787 cur->bc_levels[level].ptr = 1;
788 }
789
790 out:
791 /* Process deferred owner checks on btree blocks. */
792 list_for_each_entry_safe(co, n, &bs->to_check, list) {
793 if (!error && bs->cur)
794 error = xchk_btree_check_block_owner(bs, co->level,
795 co->daddr);
796 list_del(&co->list);
797 kfree(co);
798 }
799 kfree(bs);
800
801 return error;
802 }
803