xref: /linux/fs/xfs/scrub/btree.c (revision fd639726bf15fca8ee1a00dce8e0096d0ad9bd18)
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_alloc.h"
34 #include "scrub/scrub.h"
35 #include "scrub/common.h"
36 #include "scrub/btree.h"
37 #include "scrub/trace.h"
38 
39 /* btree scrubbing */
40 
41 /*
42  * Check for btree operation errors.  See the section about handling
43  * operational errors in common.c.
44  */
45 bool
46 xfs_scrub_btree_process_error(
47 	struct xfs_scrub_context	*sc,
48 	struct xfs_btree_cur		*cur,
49 	int				level,
50 	int				*error)
51 {
52 	if (*error == 0)
53 		return true;
54 
55 	switch (*error) {
56 	case -EDEADLOCK:
57 		/* Used to restart an op with deadlock avoidance. */
58 		trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
59 		break;
60 	case -EFSBADCRC:
61 	case -EFSCORRUPTED:
62 		/* Note the badness but don't abort. */
63 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
64 		*error = 0;
65 		/* fall through */
66 	default:
67 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
68 			trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
69 					*error, __return_address);
70 		else
71 			trace_xfs_scrub_btree_op_error(sc, cur, level,
72 					*error, __return_address);
73 		break;
74 	}
75 	return false;
76 }
77 
78 /* Record btree block corruption. */
79 void
80 xfs_scrub_btree_set_corrupt(
81 	struct xfs_scrub_context	*sc,
82 	struct xfs_btree_cur		*cur,
83 	int				level)
84 {
85 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
86 
87 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
88 		trace_xfs_scrub_ifork_btree_error(sc, cur, level,
89 				__return_address);
90 	else
91 		trace_xfs_scrub_btree_error(sc, cur, level,
92 				__return_address);
93 }
94 
95 /*
96  * Make sure this record is in order and doesn't stray outside of the parent
97  * keys.
98  */
99 STATIC void
100 xfs_scrub_btree_rec(
101 	struct xfs_scrub_btree	*bs)
102 {
103 	struct xfs_btree_cur	*cur = bs->cur;
104 	union xfs_btree_rec	*rec;
105 	union xfs_btree_key	key;
106 	union xfs_btree_key	hkey;
107 	union xfs_btree_key	*keyp;
108 	struct xfs_btree_block	*block;
109 	struct xfs_btree_block	*keyblock;
110 	struct xfs_buf		*bp;
111 
112 	block = xfs_btree_get_block(cur, 0, &bp);
113 	rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
114 
115 	trace_xfs_scrub_btree_rec(bs->sc, cur, 0);
116 
117 	/* If this isn't the first record, are they in order? */
118 	if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
119 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 0);
120 	bs->firstrec = false;
121 	memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
122 
123 	if (cur->bc_nlevels == 1)
124 		return;
125 
126 	/* Is this at least as large as the parent low key? */
127 	cur->bc_ops->init_key_from_rec(&key, rec);
128 	keyblock = xfs_btree_get_block(cur, 1, &bp);
129 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock);
130 	if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0)
131 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
132 
133 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
134 		return;
135 
136 	/* Is this no larger than the parent high key? */
137 	cur->bc_ops->init_high_key_from_rec(&hkey, rec);
138 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock);
139 	if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0)
140 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
141 }
142 
143 /*
144  * Make sure this key is in order and doesn't stray outside of the parent
145  * keys.
146  */
147 STATIC void
148 xfs_scrub_btree_key(
149 	struct xfs_scrub_btree	*bs,
150 	int			level)
151 {
152 	struct xfs_btree_cur	*cur = bs->cur;
153 	union xfs_btree_key	*key;
154 	union xfs_btree_key	*keyp;
155 	struct xfs_btree_block	*block;
156 	struct xfs_btree_block	*keyblock;
157 	struct xfs_buf		*bp;
158 
159 	block = xfs_btree_get_block(cur, level, &bp);
160 	key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
161 
162 	trace_xfs_scrub_btree_key(bs->sc, cur, level);
163 
164 	/* If this isn't the first key, are they in order? */
165 	if (!bs->firstkey[level] &&
166 	    !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key))
167 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
168 	bs->firstkey[level] = false;
169 	memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len);
170 
171 	if (level + 1 >= cur->bc_nlevels)
172 		return;
173 
174 	/* Is this at least as large as the parent low key? */
175 	keyblock = xfs_btree_get_block(cur, level + 1, &bp);
176 	keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
177 	if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0)
178 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
179 
180 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
181 		return;
182 
183 	/* Is this no larger than the parent high key? */
184 	key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
185 	keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock);
186 	if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0)
187 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
188 }
189 
190 /*
191  * Check a btree pointer.  Returns true if it's ok to use this pointer.
192  * Callers do not need to set the corrupt flag.
193  */
194 static bool
195 xfs_scrub_btree_ptr_ok(
196 	struct xfs_scrub_btree		*bs,
197 	int				level,
198 	union xfs_btree_ptr		*ptr)
199 {
200 	bool				res;
201 
202 	/* A btree rooted in an inode has no block pointer to the root. */
203 	if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
204 	    level == bs->cur->bc_nlevels)
205 		return true;
206 
207 	/* Otherwise, check the pointers. */
208 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
209 		res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
210 	else
211 		res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
212 	if (!res)
213 		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
214 
215 	return res;
216 }
217 
218 /* Check that a btree block's sibling matches what we expect it. */
219 STATIC int
220 xfs_scrub_btree_block_check_sibling(
221 	struct xfs_scrub_btree		*bs,
222 	int				level,
223 	int				direction,
224 	union xfs_btree_ptr		*sibling)
225 {
226 	struct xfs_btree_cur		*cur = bs->cur;
227 	struct xfs_btree_block		*pblock;
228 	struct xfs_buf			*pbp;
229 	struct xfs_btree_cur		*ncur = NULL;
230 	union xfs_btree_ptr		*pp;
231 	int				success;
232 	int				error;
233 
234 	error = xfs_btree_dup_cursor(cur, &ncur);
235 	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error) ||
236 	    !ncur)
237 		return error;
238 
239 	/*
240 	 * If the pointer is null, we shouldn't be able to move the upper
241 	 * level pointer anywhere.
242 	 */
243 	if (xfs_btree_ptr_is_null(cur, sibling)) {
244 		if (direction > 0)
245 			error = xfs_btree_increment(ncur, level + 1, &success);
246 		else
247 			error = xfs_btree_decrement(ncur, level + 1, &success);
248 		if (error == 0 && success)
249 			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
250 		error = 0;
251 		goto out;
252 	}
253 
254 	/* Increment upper level pointer. */
255 	if (direction > 0)
256 		error = xfs_btree_increment(ncur, level + 1, &success);
257 	else
258 		error = xfs_btree_decrement(ncur, level + 1, &success);
259 	if (!xfs_scrub_btree_process_error(bs->sc, cur, level + 1, &error))
260 		goto out;
261 	if (!success) {
262 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level + 1);
263 		goto out;
264 	}
265 
266 	/* Compare upper level pointer to sibling pointer. */
267 	pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
268 	pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
269 	if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
270 		goto out;
271 
272 	if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
273 		xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
274 out:
275 	xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
276 	return error;
277 }
278 
279 /* Check the siblings of a btree block. */
280 STATIC int
281 xfs_scrub_btree_block_check_siblings(
282 	struct xfs_scrub_btree		*bs,
283 	struct xfs_btree_block		*block)
284 {
285 	struct xfs_btree_cur		*cur = bs->cur;
286 	union xfs_btree_ptr		leftsib;
287 	union xfs_btree_ptr		rightsib;
288 	int				level;
289 	int				error = 0;
290 
291 	xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
292 	xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
293 	level = xfs_btree_get_level(block);
294 
295 	/* Root block should never have siblings. */
296 	if (level == cur->bc_nlevels - 1) {
297 		if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
298 		    !xfs_btree_ptr_is_null(cur, &rightsib))
299 			xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
300 		goto out;
301 	}
302 
303 	/*
304 	 * Does the left & right sibling pointers match the adjacent
305 	 * parent level pointers?
306 	 * (These function absorbs error codes for us.)
307 	 */
308 	error = xfs_scrub_btree_block_check_sibling(bs, level, -1, &leftsib);
309 	if (error)
310 		return error;
311 	error = xfs_scrub_btree_block_check_sibling(bs, level, 1, &rightsib);
312 	if (error)
313 		return error;
314 out:
315 	return error;
316 }
317 
318 /*
319  * Grab and scrub a btree block given a btree pointer.  Returns block
320  * and buffer pointers (if applicable) if they're ok to use.
321  */
322 STATIC int
323 xfs_scrub_btree_get_block(
324 	struct xfs_scrub_btree		*bs,
325 	int				level,
326 	union xfs_btree_ptr		*pp,
327 	struct xfs_btree_block		**pblock,
328 	struct xfs_buf			**pbp)
329 {
330 	void				*failed_at;
331 	int				error;
332 
333 	*pblock = NULL;
334 	*pbp = NULL;
335 
336 	error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
337 	if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, level, &error) ||
338 	    !*pblock)
339 		return error;
340 
341 	xfs_btree_get_block(bs->cur, level, pbp);
342 	if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
343 		failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
344 				level, *pbp);
345 	else
346 		failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
347 				 level, *pbp);
348 	if (failed_at) {
349 		xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
350 		return 0;
351 	}
352 
353 	/*
354 	 * Check the block's siblings; this function absorbs error codes
355 	 * for us.
356 	 */
357 	return xfs_scrub_btree_block_check_siblings(bs, *pblock);
358 }
359 
360 /*
361  * Check that the low and high keys of this block match the keys stored
362  * in the parent block.
363  */
364 STATIC void
365 xfs_scrub_btree_block_keys(
366 	struct xfs_scrub_btree		*bs,
367 	int				level,
368 	struct xfs_btree_block		*block)
369 {
370 	union xfs_btree_key		block_keys;
371 	struct xfs_btree_cur		*cur = bs->cur;
372 	union xfs_btree_key		*high_bk;
373 	union xfs_btree_key		*parent_keys;
374 	union xfs_btree_key		*high_pk;
375 	struct xfs_btree_block		*parent_block;
376 	struct xfs_buf			*bp;
377 
378 	if (level >= cur->bc_nlevels - 1)
379 		return;
380 
381 	/* Calculate the keys for this block. */
382 	xfs_btree_get_keys(cur, block, &block_keys);
383 
384 	/* Obtain the parent's copy of the keys for this block. */
385 	parent_block = xfs_btree_get_block(cur, level + 1, &bp);
386 	parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1],
387 			parent_block);
388 
389 	if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0)
390 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
391 
392 	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
393 		return;
394 
395 	/* Get high keys */
396 	high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
397 	high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1],
398 			parent_block);
399 
400 	if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0)
401 		xfs_scrub_btree_set_corrupt(bs->sc, cur, 1);
402 }
403 
404 /*
405  * Visit all nodes and leaves of a btree.  Check that all pointers and
406  * records are in order, that the keys reflect the records, and use a callback
407  * so that the caller can verify individual records.
408  */
409 int
410 xfs_scrub_btree(
411 	struct xfs_scrub_context	*sc,
412 	struct xfs_btree_cur		*cur,
413 	xfs_scrub_btree_rec_fn		scrub_fn,
414 	struct xfs_owner_info		*oinfo,
415 	void				*private)
416 {
417 	struct xfs_scrub_btree		bs = { NULL };
418 	union xfs_btree_ptr		ptr;
419 	union xfs_btree_ptr		*pp;
420 	union xfs_btree_rec		*recp;
421 	struct xfs_btree_block		*block;
422 	int				level;
423 	struct xfs_buf			*bp;
424 	int				i;
425 	int				error = 0;
426 
427 	/* Initialize scrub state */
428 	bs.cur = cur;
429 	bs.scrub_rec = scrub_fn;
430 	bs.oinfo = oinfo;
431 	bs.firstrec = true;
432 	bs.private = private;
433 	bs.sc = sc;
434 	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
435 		bs.firstkey[i] = true;
436 	INIT_LIST_HEAD(&bs.to_check);
437 
438 	/* Don't try to check a tree with a height we can't handle. */
439 	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
440 		xfs_scrub_btree_set_corrupt(sc, cur, 0);
441 		goto out;
442 	}
443 
444 	/*
445 	 * Load the root of the btree.  The helper function absorbs
446 	 * error codes for us.
447 	 */
448 	level = cur->bc_nlevels - 1;
449 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
450 	if (!xfs_scrub_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
451 		goto out;
452 	error = xfs_scrub_btree_get_block(&bs, level, &ptr, &block, &bp);
453 	if (error || !block)
454 		goto out;
455 
456 	cur->bc_ptrs[level] = 1;
457 
458 	while (level < cur->bc_nlevels) {
459 		block = xfs_btree_get_block(cur, level, &bp);
460 
461 		if (level == 0) {
462 			/* End of leaf, pop back towards the root. */
463 			if (cur->bc_ptrs[level] >
464 			    be16_to_cpu(block->bb_numrecs)) {
465 				xfs_scrub_btree_block_keys(&bs, level, block);
466 				if (level < cur->bc_nlevels - 1)
467 					cur->bc_ptrs[level + 1]++;
468 				level++;
469 				continue;
470 			}
471 
472 			/* Records in order for scrub? */
473 			xfs_scrub_btree_rec(&bs);
474 
475 			/* Call out to the record checker. */
476 			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
477 			error = bs.scrub_rec(&bs, recp);
478 			if (error)
479 				break;
480 			if (xfs_scrub_should_terminate(sc, &error) ||
481 			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
482 				break;
483 
484 			cur->bc_ptrs[level]++;
485 			continue;
486 		}
487 
488 		/* End of node, pop back towards the root. */
489 		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
490 			xfs_scrub_btree_block_keys(&bs, level, block);
491 			if (level < cur->bc_nlevels - 1)
492 				cur->bc_ptrs[level + 1]++;
493 			level++;
494 			continue;
495 		}
496 
497 		/* Keys in order for scrub? */
498 		xfs_scrub_btree_key(&bs, level);
499 
500 		/* Drill another level deeper. */
501 		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
502 		if (!xfs_scrub_btree_ptr_ok(&bs, level, pp)) {
503 			cur->bc_ptrs[level]++;
504 			continue;
505 		}
506 		level--;
507 		error = xfs_scrub_btree_get_block(&bs, level, pp, &block, &bp);
508 		if (error || !block)
509 			goto out;
510 
511 		cur->bc_ptrs[level] = 1;
512 	}
513 
514 out:
515 	return error;
516 }
517