xref: /linux/fs/btrfs/tree-checker.c (revision 954ea91fb68b771dba6d87cfa61b68e09cc2497f)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright (C) Qu Wenruo 2017.  All rights reserved.
4   */
5  
6  /*
7   * The module is used to catch unexpected/corrupted tree block data.
8   * Such behavior can be caused either by a fuzzed image or bugs.
9   *
10   * The objective is to do leaf/node validation checks when tree block is read
11   * from disk, and check *every* possible member, so other code won't
12   * need to checking them again.
13   *
14   * Due to the potential and unwanted damage, every checker needs to be
15   * carefully reviewed otherwise so it does not prevent mount of valid images.
16   */
17  
18  #include <linux/types.h>
19  #include <linux/stddef.h>
20  #include <linux/error-injection.h>
21  #include "messages.h"
22  #include "ctree.h"
23  #include "tree-checker.h"
24  #include "disk-io.h"
25  #include "compression.h"
26  #include "volumes.h"
27  #include "misc.h"
28  #include "btrfs_inode.h"
29  #include "fs.h"
30  #include "accessors.h"
31  #include "file-item.h"
32  
33  /*
34   * Error message should follow the following format:
35   * corrupt <type>: <identifier>, <reason>[, <bad_value>]
36   *
37   * @type:	leaf or node
38   * @identifier:	the necessary info to locate the leaf/node.
39   * 		It's recommended to decode key.objecitd/offset if it's
40   * 		meaningful.
41   * @reason:	describe the error
42   * @bad_value:	optional, it's recommended to output bad value and its
43   *		expected value (range).
44   *
45   * Since comma is used to separate the components, only space is allowed
46   * inside each component.
47   */
48  
49  /*
50   * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
51   * Allows callers to customize the output.
52   */
53  __printf(3, 4)
54  __cold
55  static void generic_err(const struct extent_buffer *eb, int slot,
56  			const char *fmt, ...)
57  {
58  	const struct btrfs_fs_info *fs_info = eb->fs_info;
59  	struct va_format vaf;
60  	va_list args;
61  
62  	va_start(args, fmt);
63  
64  	vaf.fmt = fmt;
65  	vaf.va = &args;
66  
67  	btrfs_crit(fs_info,
68  		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
69  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
70  		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
71  	va_end(args);
72  }
73  
74  /*
75   * Customized reporter for extent data item, since its key objectid and
76   * offset has its own meaning.
77   */
78  __printf(3, 4)
79  __cold
80  static void file_extent_err(const struct extent_buffer *eb, int slot,
81  			    const char *fmt, ...)
82  {
83  	const struct btrfs_fs_info *fs_info = eb->fs_info;
84  	struct btrfs_key key;
85  	struct va_format vaf;
86  	va_list args;
87  
88  	btrfs_item_key_to_cpu(eb, &key, slot);
89  	va_start(args, fmt);
90  
91  	vaf.fmt = fmt;
92  	vaf.va = &args;
93  
94  	btrfs_crit(fs_info,
95  	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
96  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
97  		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
98  		key.objectid, key.offset, &vaf);
99  	va_end(args);
100  }
101  
102  /*
103   * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
104   * Else return 1
105   */
106  #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
107  ({									      \
108  	if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)),      \
109  				 (alignment))))				      \
110  		file_extent_err((leaf), (slot),				      \
111  	"invalid %s for file extent, have %llu, should be aligned to %u",     \
112  			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
113  			(alignment));					      \
114  	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
115  })
116  
117  static u64 file_extent_end(struct extent_buffer *leaf,
118  			   struct btrfs_key *key,
119  			   struct btrfs_file_extent_item *extent)
120  {
121  	u64 end;
122  	u64 len;
123  
124  	if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
125  		len = btrfs_file_extent_ram_bytes(leaf, extent);
126  		end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
127  	} else {
128  		len = btrfs_file_extent_num_bytes(leaf, extent);
129  		end = key->offset + len;
130  	}
131  	return end;
132  }
133  
134  /*
135   * Customized report for dir_item, the only new important information is
136   * key->objectid, which represents inode number
137   */
138  __printf(3, 4)
139  __cold
140  static void dir_item_err(const struct extent_buffer *eb, int slot,
141  			 const char *fmt, ...)
142  {
143  	const struct btrfs_fs_info *fs_info = eb->fs_info;
144  	struct btrfs_key key;
145  	struct va_format vaf;
146  	va_list args;
147  
148  	btrfs_item_key_to_cpu(eb, &key, slot);
149  	va_start(args, fmt);
150  
151  	vaf.fmt = fmt;
152  	vaf.va = &args;
153  
154  	btrfs_crit(fs_info,
155  		"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
156  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
157  		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
158  		key.objectid, &vaf);
159  	va_end(args);
160  }
161  
162  /*
163   * This functions checks prev_key->objectid, to ensure current key and prev_key
164   * share the same objectid as inode number.
165   *
166   * This is to detect missing INODE_ITEM in subvolume trees.
167   *
168   * Return true if everything is OK or we don't need to check.
169   * Return false if anything is wrong.
170   */
171  static bool check_prev_ino(struct extent_buffer *leaf,
172  			   struct btrfs_key *key, int slot,
173  			   struct btrfs_key *prev_key)
174  {
175  	/* No prev key, skip check */
176  	if (slot == 0)
177  		return true;
178  
179  	/* Only these key->types needs to be checked */
180  	ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
181  	       key->type == BTRFS_INODE_REF_KEY ||
182  	       key->type == BTRFS_DIR_INDEX_KEY ||
183  	       key->type == BTRFS_DIR_ITEM_KEY ||
184  	       key->type == BTRFS_EXTENT_DATA_KEY);
185  
186  	/*
187  	 * Only subvolume trees along with their reloc trees need this check.
188  	 * Things like log tree doesn't follow this ino requirement.
189  	 */
190  	if (!is_fstree(btrfs_header_owner(leaf)))
191  		return true;
192  
193  	if (key->objectid == prev_key->objectid)
194  		return true;
195  
196  	/* Error found */
197  	dir_item_err(leaf, slot,
198  		"invalid previous key objectid, have %llu expect %llu",
199  		prev_key->objectid, key->objectid);
200  	return false;
201  }
202  static int check_extent_data_item(struct extent_buffer *leaf,
203  				  struct btrfs_key *key, int slot,
204  				  struct btrfs_key *prev_key)
205  {
206  	struct btrfs_fs_info *fs_info = leaf->fs_info;
207  	struct btrfs_file_extent_item *fi;
208  	u32 sectorsize = fs_info->sectorsize;
209  	u32 item_size = btrfs_item_size(leaf, slot);
210  	u64 extent_end;
211  
212  	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
213  		file_extent_err(leaf, slot,
214  "unaligned file_offset for file extent, have %llu should be aligned to %u",
215  			key->offset, sectorsize);
216  		return -EUCLEAN;
217  	}
218  
219  	/*
220  	 * Previous key must have the same key->objectid (ino).
221  	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
222  	 * But if objectids mismatch, it means we have a missing
223  	 * INODE_ITEM.
224  	 */
225  	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
226  		return -EUCLEAN;
227  
228  	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
229  
230  	/*
231  	 * Make sure the item contains at least inline header, so the file
232  	 * extent type is not some garbage.
233  	 */
234  	if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) {
235  		file_extent_err(leaf, slot,
236  				"invalid item size, have %u expect [%zu, %u)",
237  				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
238  				SZ_4K);
239  		return -EUCLEAN;
240  	}
241  	if (unlikely(btrfs_file_extent_type(leaf, fi) >=
242  		     BTRFS_NR_FILE_EXTENT_TYPES)) {
243  		file_extent_err(leaf, slot,
244  		"invalid type for file extent, have %u expect range [0, %u]",
245  			btrfs_file_extent_type(leaf, fi),
246  			BTRFS_NR_FILE_EXTENT_TYPES - 1);
247  		return -EUCLEAN;
248  	}
249  
250  	/*
251  	 * Support for new compression/encryption must introduce incompat flag,
252  	 * and must be caught in open_ctree().
253  	 */
254  	if (unlikely(btrfs_file_extent_compression(leaf, fi) >=
255  		     BTRFS_NR_COMPRESS_TYPES)) {
256  		file_extent_err(leaf, slot,
257  	"invalid compression for file extent, have %u expect range [0, %u]",
258  			btrfs_file_extent_compression(leaf, fi),
259  			BTRFS_NR_COMPRESS_TYPES - 1);
260  		return -EUCLEAN;
261  	}
262  	if (unlikely(btrfs_file_extent_encryption(leaf, fi))) {
263  		file_extent_err(leaf, slot,
264  			"invalid encryption for file extent, have %u expect 0",
265  			btrfs_file_extent_encryption(leaf, fi));
266  		return -EUCLEAN;
267  	}
268  	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
269  		/* Inline extent must have 0 as key offset */
270  		if (unlikely(key->offset)) {
271  			file_extent_err(leaf, slot,
272  		"invalid file_offset for inline file extent, have %llu expect 0",
273  				key->offset);
274  			return -EUCLEAN;
275  		}
276  
277  		/* Compressed inline extent has no on-disk size, skip it */
278  		if (btrfs_file_extent_compression(leaf, fi) !=
279  		    BTRFS_COMPRESS_NONE)
280  			return 0;
281  
282  		/* Uncompressed inline extent size must match item size */
283  		if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
284  					  btrfs_file_extent_ram_bytes(leaf, fi))) {
285  			file_extent_err(leaf, slot,
286  	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
287  				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
288  				btrfs_file_extent_ram_bytes(leaf, fi));
289  			return -EUCLEAN;
290  		}
291  		return 0;
292  	}
293  
294  	/* Regular or preallocated extent has fixed item size */
295  	if (unlikely(item_size != sizeof(*fi))) {
296  		file_extent_err(leaf, slot,
297  	"invalid item size for reg/prealloc file extent, have %u expect %zu",
298  			item_size, sizeof(*fi));
299  		return -EUCLEAN;
300  	}
301  	if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
302  		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
303  		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
304  		     CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
305  		     CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)))
306  		return -EUCLEAN;
307  
308  	/* Catch extent end overflow */
309  	if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
310  					key->offset, &extent_end))) {
311  		file_extent_err(leaf, slot,
312  	"extent end overflow, have file offset %llu extent num bytes %llu",
313  				key->offset,
314  				btrfs_file_extent_num_bytes(leaf, fi));
315  		return -EUCLEAN;
316  	}
317  
318  	/*
319  	 * Check that no two consecutive file extent items, in the same leaf,
320  	 * present ranges that overlap each other.
321  	 */
322  	if (slot > 0 &&
323  	    prev_key->objectid == key->objectid &&
324  	    prev_key->type == BTRFS_EXTENT_DATA_KEY) {
325  		struct btrfs_file_extent_item *prev_fi;
326  		u64 prev_end;
327  
328  		prev_fi = btrfs_item_ptr(leaf, slot - 1,
329  					 struct btrfs_file_extent_item);
330  		prev_end = file_extent_end(leaf, prev_key, prev_fi);
331  		if (unlikely(prev_end > key->offset)) {
332  			file_extent_err(leaf, slot - 1,
333  "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
334  					prev_end, key->offset);
335  			return -EUCLEAN;
336  		}
337  	}
338  
339  	return 0;
340  }
341  
342  static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
343  			   int slot, struct btrfs_key *prev_key)
344  {
345  	struct btrfs_fs_info *fs_info = leaf->fs_info;
346  	u32 sectorsize = fs_info->sectorsize;
347  	const u32 csumsize = fs_info->csum_size;
348  
349  	if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) {
350  		generic_err(leaf, slot,
351  		"invalid key objectid for csum item, have %llu expect %llu",
352  			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
353  		return -EUCLEAN;
354  	}
355  	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
356  		generic_err(leaf, slot,
357  	"unaligned key offset for csum item, have %llu should be aligned to %u",
358  			key->offset, sectorsize);
359  		return -EUCLEAN;
360  	}
361  	if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) {
362  		generic_err(leaf, slot,
363  	"unaligned item size for csum item, have %u should be aligned to %u",
364  			btrfs_item_size(leaf, slot), csumsize);
365  		return -EUCLEAN;
366  	}
367  	if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
368  		u64 prev_csum_end;
369  		u32 prev_item_size;
370  
371  		prev_item_size = btrfs_item_size(leaf, slot - 1);
372  		prev_csum_end = (prev_item_size / csumsize) * sectorsize;
373  		prev_csum_end += prev_key->offset;
374  		if (unlikely(prev_csum_end > key->offset)) {
375  			generic_err(leaf, slot - 1,
376  "csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
377  				    prev_csum_end, key->offset);
378  			return -EUCLEAN;
379  		}
380  	}
381  	return 0;
382  }
383  
384  /* Inode item error output has the same format as dir_item_err() */
385  #define inode_item_err(eb, slot, fmt, ...)			\
386  	dir_item_err(eb, slot, fmt, __VA_ARGS__)
387  
388  static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
389  			   int slot)
390  {
391  	struct btrfs_key item_key;
392  	bool is_inode_item;
393  
394  	btrfs_item_key_to_cpu(leaf, &item_key, slot);
395  	is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);
396  
397  	/* For XATTR_ITEM, location key should be all 0 */
398  	if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
399  		if (unlikely(key->objectid != 0 || key->type != 0 ||
400  			     key->offset != 0))
401  			return -EUCLEAN;
402  		return 0;
403  	}
404  
405  	if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
406  		      key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
407  		     key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
408  		     key->objectid != BTRFS_FREE_INO_OBJECTID)) {
409  		if (is_inode_item) {
410  			generic_err(leaf, slot,
411  	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
412  				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
413  				BTRFS_FIRST_FREE_OBJECTID,
414  				BTRFS_LAST_FREE_OBJECTID,
415  				BTRFS_FREE_INO_OBJECTID);
416  		} else {
417  			dir_item_err(leaf, slot,
418  "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
419  				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
420  				BTRFS_FIRST_FREE_OBJECTID,
421  				BTRFS_LAST_FREE_OBJECTID,
422  				BTRFS_FREE_INO_OBJECTID);
423  		}
424  		return -EUCLEAN;
425  	}
426  	if (unlikely(key->offset != 0)) {
427  		if (is_inode_item)
428  			inode_item_err(leaf, slot,
429  				       "invalid key offset: has %llu expect 0",
430  				       key->offset);
431  		else
432  			dir_item_err(leaf, slot,
433  				"invalid location key offset:has %llu expect 0",
434  				key->offset);
435  		return -EUCLEAN;
436  	}
437  	return 0;
438  }
439  
440  static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
441  			  int slot)
442  {
443  	struct btrfs_key item_key;
444  	bool is_root_item;
445  
446  	btrfs_item_key_to_cpu(leaf, &item_key, slot);
447  	is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
448  
449  	/* No such tree id */
450  	if (unlikely(key->objectid == 0)) {
451  		if (is_root_item)
452  			generic_err(leaf, slot, "invalid root id 0");
453  		else
454  			dir_item_err(leaf, slot,
455  				     "invalid location key root id 0");
456  		return -EUCLEAN;
457  	}
458  
459  	/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
460  	if (unlikely(!is_fstree(key->objectid) && !is_root_item)) {
461  		dir_item_err(leaf, slot,
462  		"invalid location key objectid, have %llu expect [%llu, %llu]",
463  				key->objectid, BTRFS_FIRST_FREE_OBJECTID,
464  				BTRFS_LAST_FREE_OBJECTID);
465  		return -EUCLEAN;
466  	}
467  
468  	/*
469  	 * ROOT_ITEM with non-zero offset means this is a snapshot, created at
470  	 * @offset transid.
471  	 * Furthermore, for location key in DIR_ITEM, its offset is always -1.
472  	 *
473  	 * So here we only check offset for reloc tree whose key->offset must
474  	 * be a valid tree.
475  	 */
476  	if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
477  		     key->offset == 0)) {
478  		generic_err(leaf, slot, "invalid root id 0 for reloc tree");
479  		return -EUCLEAN;
480  	}
481  	return 0;
482  }
483  
484  static int check_dir_item(struct extent_buffer *leaf,
485  			  struct btrfs_key *key, struct btrfs_key *prev_key,
486  			  int slot)
487  {
488  	struct btrfs_fs_info *fs_info = leaf->fs_info;
489  	struct btrfs_dir_item *di;
490  	u32 item_size = btrfs_item_size(leaf, slot);
491  	u32 cur = 0;
492  
493  	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
494  		return -EUCLEAN;
495  
496  	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
497  	while (cur < item_size) {
498  		struct btrfs_key location_key;
499  		u32 name_len;
500  		u32 data_len;
501  		u32 max_name_len;
502  		u32 total_size;
503  		u32 name_hash;
504  		u8 dir_type;
505  		int ret;
506  
507  		/* header itself should not cross item boundary */
508  		if (unlikely(cur + sizeof(*di) > item_size)) {
509  			dir_item_err(leaf, slot,
510  		"dir item header crosses item boundary, have %zu boundary %u",
511  				cur + sizeof(*di), item_size);
512  			return -EUCLEAN;
513  		}
514  
515  		/* Location key check */
516  		btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
517  		if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
518  			ret = check_root_key(leaf, &location_key, slot);
519  			if (unlikely(ret < 0))
520  				return ret;
521  		} else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
522  			   location_key.type == 0) {
523  			ret = check_inode_key(leaf, &location_key, slot);
524  			if (unlikely(ret < 0))
525  				return ret;
526  		} else {
527  			dir_item_err(leaf, slot,
528  			"invalid location key type, have %u, expect %u or %u",
529  				     location_key.type, BTRFS_ROOT_ITEM_KEY,
530  				     BTRFS_INODE_ITEM_KEY);
531  			return -EUCLEAN;
532  		}
533  
534  		/* dir type check */
535  		dir_type = btrfs_dir_ftype(leaf, di);
536  		if (unlikely(dir_type >= BTRFS_FT_MAX)) {
537  			dir_item_err(leaf, slot,
538  			"invalid dir item type, have %u expect [0, %u)",
539  				dir_type, BTRFS_FT_MAX);
540  			return -EUCLEAN;
541  		}
542  
543  		if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY &&
544  			     dir_type != BTRFS_FT_XATTR)) {
545  			dir_item_err(leaf, slot,
546  		"invalid dir item type for XATTR key, have %u expect %u",
547  				dir_type, BTRFS_FT_XATTR);
548  			return -EUCLEAN;
549  		}
550  		if (unlikely(dir_type == BTRFS_FT_XATTR &&
551  			     key->type != BTRFS_XATTR_ITEM_KEY)) {
552  			dir_item_err(leaf, slot,
553  			"xattr dir type found for non-XATTR key");
554  			return -EUCLEAN;
555  		}
556  		if (dir_type == BTRFS_FT_XATTR)
557  			max_name_len = XATTR_NAME_MAX;
558  		else
559  			max_name_len = BTRFS_NAME_LEN;
560  
561  		/* Name/data length check */
562  		name_len = btrfs_dir_name_len(leaf, di);
563  		data_len = btrfs_dir_data_len(leaf, di);
564  		if (unlikely(name_len > max_name_len)) {
565  			dir_item_err(leaf, slot,
566  			"dir item name len too long, have %u max %u",
567  				name_len, max_name_len);
568  			return -EUCLEAN;
569  		}
570  		if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) {
571  			dir_item_err(leaf, slot,
572  			"dir item name and data len too long, have %u max %u",
573  				name_len + data_len,
574  				BTRFS_MAX_XATTR_SIZE(fs_info));
575  			return -EUCLEAN;
576  		}
577  
578  		if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) {
579  			dir_item_err(leaf, slot,
580  			"dir item with invalid data len, have %u expect 0",
581  				data_len);
582  			return -EUCLEAN;
583  		}
584  
585  		total_size = sizeof(*di) + name_len + data_len;
586  
587  		/* header and name/data should not cross item boundary */
588  		if (unlikely(cur + total_size > item_size)) {
589  			dir_item_err(leaf, slot,
590  		"dir item data crosses item boundary, have %u boundary %u",
591  				cur + total_size, item_size);
592  			return -EUCLEAN;
593  		}
594  
595  		/*
596  		 * Special check for XATTR/DIR_ITEM, as key->offset is name
597  		 * hash, should match its name
598  		 */
599  		if (key->type == BTRFS_DIR_ITEM_KEY ||
600  		    key->type == BTRFS_XATTR_ITEM_KEY) {
601  			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
602  
603  			read_extent_buffer(leaf, namebuf,
604  					(unsigned long)(di + 1), name_len);
605  			name_hash = btrfs_name_hash(namebuf, name_len);
606  			if (unlikely(key->offset != name_hash)) {
607  				dir_item_err(leaf, slot,
608  		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
609  					name_hash, key->offset);
610  				return -EUCLEAN;
611  			}
612  		}
613  		cur += total_size;
614  		di = (struct btrfs_dir_item *)((void *)di + total_size);
615  	}
616  	return 0;
617  }
618  
619  __printf(3, 4)
620  __cold
621  static void block_group_err(const struct extent_buffer *eb, int slot,
622  			    const char *fmt, ...)
623  {
624  	const struct btrfs_fs_info *fs_info = eb->fs_info;
625  	struct btrfs_key key;
626  	struct va_format vaf;
627  	va_list args;
628  
629  	btrfs_item_key_to_cpu(eb, &key, slot);
630  	va_start(args, fmt);
631  
632  	vaf.fmt = fmt;
633  	vaf.va = &args;
634  
635  	btrfs_crit(fs_info,
636  	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
637  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
638  		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
639  		key.objectid, key.offset, &vaf);
640  	va_end(args);
641  }
642  
643  static int check_block_group_item(struct extent_buffer *leaf,
644  				  struct btrfs_key *key, int slot)
645  {
646  	struct btrfs_fs_info *fs_info = leaf->fs_info;
647  	struct btrfs_block_group_item bgi;
648  	u32 item_size = btrfs_item_size(leaf, slot);
649  	u64 chunk_objectid;
650  	u64 flags;
651  	u64 type;
652  
653  	/*
654  	 * Here we don't really care about alignment since extent allocator can
655  	 * handle it.  We care more about the size.
656  	 */
657  	if (unlikely(key->offset == 0)) {
658  		block_group_err(leaf, slot,
659  				"invalid block group size 0");
660  		return -EUCLEAN;
661  	}
662  
663  	if (unlikely(item_size != sizeof(bgi))) {
664  		block_group_err(leaf, slot,
665  			"invalid item size, have %u expect %zu",
666  				item_size, sizeof(bgi));
667  		return -EUCLEAN;
668  	}
669  
670  	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
671  			   sizeof(bgi));
672  	chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
673  	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
674  		/*
675  		 * We don't init the nr_global_roots until we load the global
676  		 * roots, so this could be 0 at mount time.  If it's 0 we'll
677  		 * just assume we're fine, and later we'll check against our
678  		 * actual value.
679  		 */
680  		if (unlikely(fs_info->nr_global_roots &&
681  			     chunk_objectid >= fs_info->nr_global_roots)) {
682  			block_group_err(leaf, slot,
683  	"invalid block group global root id, have %llu, needs to be <= %llu",
684  					chunk_objectid,
685  					fs_info->nr_global_roots);
686  			return -EUCLEAN;
687  		}
688  	} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
689  		block_group_err(leaf, slot,
690  		"invalid block group chunk objectid, have %llu expect %llu",
691  				btrfs_stack_block_group_chunk_objectid(&bgi),
692  				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
693  		return -EUCLEAN;
694  	}
695  
696  	if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) {
697  		block_group_err(leaf, slot,
698  			"invalid block group used, have %llu expect [0, %llu)",
699  				btrfs_stack_block_group_used(&bgi), key->offset);
700  		return -EUCLEAN;
701  	}
702  
703  	flags = btrfs_stack_block_group_flags(&bgi);
704  	if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) {
705  		block_group_err(leaf, slot,
706  "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
707  			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
708  			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
709  		return -EUCLEAN;
710  	}
711  
712  	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
713  	if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
714  		     type != BTRFS_BLOCK_GROUP_METADATA &&
715  		     type != BTRFS_BLOCK_GROUP_SYSTEM &&
716  		     type != (BTRFS_BLOCK_GROUP_METADATA |
717  			      BTRFS_BLOCK_GROUP_DATA))) {
718  		block_group_err(leaf, slot,
719  "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
720  			type, hweight64(type),
721  			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
722  			BTRFS_BLOCK_GROUP_SYSTEM,
723  			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
724  		return -EUCLEAN;
725  	}
726  	return 0;
727  }
728  
729  __printf(4, 5)
730  __cold
731  static void chunk_err(const struct extent_buffer *leaf,
732  		      const struct btrfs_chunk *chunk, u64 logical,
733  		      const char *fmt, ...)
734  {
735  	const struct btrfs_fs_info *fs_info = leaf->fs_info;
736  	bool is_sb;
737  	struct va_format vaf;
738  	va_list args;
739  	int i;
740  	int slot = -1;
741  
742  	/* Only superblock eb is able to have such small offset */
743  	is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
744  
745  	if (!is_sb) {
746  		/*
747  		 * Get the slot number by iterating through all slots, this
748  		 * would provide better readability.
749  		 */
750  		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
751  			if (btrfs_item_ptr_offset(leaf, i) ==
752  					(unsigned long)chunk) {
753  				slot = i;
754  				break;
755  			}
756  		}
757  	}
758  	va_start(args, fmt);
759  	vaf.fmt = fmt;
760  	vaf.va = &args;
761  
762  	if (is_sb)
763  		btrfs_crit(fs_info,
764  		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
765  			   logical, &vaf);
766  	else
767  		btrfs_crit(fs_info,
768  	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
769  			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
770  			   logical, &vaf);
771  	va_end(args);
772  }
773  
774  /*
775   * The common chunk check which could also work on super block sys chunk array.
776   *
777   * Return -EUCLEAN if anything is corrupted.
778   * Return 0 if everything is OK.
779   */
780  int btrfs_check_chunk_valid(struct extent_buffer *leaf,
781  			    struct btrfs_chunk *chunk, u64 logical)
782  {
783  	struct btrfs_fs_info *fs_info = leaf->fs_info;
784  	u64 length;
785  	u64 chunk_end;
786  	u64 stripe_len;
787  	u16 num_stripes;
788  	u16 sub_stripes;
789  	u64 type;
790  	u64 features;
791  	bool mixed = false;
792  	int raid_index;
793  	int nparity;
794  	int ncopies;
795  
796  	length = btrfs_chunk_length(leaf, chunk);
797  	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
798  	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
799  	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
800  	type = btrfs_chunk_type(leaf, chunk);
801  	raid_index = btrfs_bg_flags_to_raid_index(type);
802  	ncopies = btrfs_raid_array[raid_index].ncopies;
803  	nparity = btrfs_raid_array[raid_index].nparity;
804  
805  	if (unlikely(!num_stripes)) {
806  		chunk_err(leaf, chunk, logical,
807  			  "invalid chunk num_stripes, have %u", num_stripes);
808  		return -EUCLEAN;
809  	}
810  	if (unlikely(num_stripes < ncopies)) {
811  		chunk_err(leaf, chunk, logical,
812  			  "invalid chunk num_stripes < ncopies, have %u < %d",
813  			  num_stripes, ncopies);
814  		return -EUCLEAN;
815  	}
816  	if (unlikely(nparity && num_stripes == nparity)) {
817  		chunk_err(leaf, chunk, logical,
818  			  "invalid chunk num_stripes == nparity, have %u == %d",
819  			  num_stripes, nparity);
820  		return -EUCLEAN;
821  	}
822  	if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) {
823  		chunk_err(leaf, chunk, logical,
824  		"invalid chunk logical, have %llu should aligned to %u",
825  			  logical, fs_info->sectorsize);
826  		return -EUCLEAN;
827  	}
828  	if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) {
829  		chunk_err(leaf, chunk, logical,
830  			  "invalid chunk sectorsize, have %u expect %u",
831  			  btrfs_chunk_sector_size(leaf, chunk),
832  			  fs_info->sectorsize);
833  		return -EUCLEAN;
834  	}
835  	if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) {
836  		chunk_err(leaf, chunk, logical,
837  			  "invalid chunk length, have %llu", length);
838  		return -EUCLEAN;
839  	}
840  	if (unlikely(check_add_overflow(logical, length, &chunk_end))) {
841  		chunk_err(leaf, chunk, logical,
842  "invalid chunk logical start and length, have logical start %llu length %llu",
843  			  logical, length);
844  		return -EUCLEAN;
845  	}
846  	if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) {
847  		chunk_err(leaf, chunk, logical,
848  			  "invalid chunk stripe length: %llu",
849  			  stripe_len);
850  		return -EUCLEAN;
851  	}
852  	if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
853  			      BTRFS_BLOCK_GROUP_PROFILE_MASK))) {
854  		chunk_err(leaf, chunk, logical,
855  			  "unrecognized chunk type: 0x%llx",
856  			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
857  			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
858  			  btrfs_chunk_type(leaf, chunk));
859  		return -EUCLEAN;
860  	}
861  
862  	if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
863  		     (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) {
864  		chunk_err(leaf, chunk, logical,
865  		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
866  			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
867  		return -EUCLEAN;
868  	}
869  	if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) {
870  		chunk_err(leaf, chunk, logical,
871  	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
872  			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
873  		return -EUCLEAN;
874  	}
875  
876  	if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
877  		     (type & (BTRFS_BLOCK_GROUP_METADATA |
878  			      BTRFS_BLOCK_GROUP_DATA)))) {
879  		chunk_err(leaf, chunk, logical,
880  			  "system chunk with data or metadata type: 0x%llx",
881  			  type);
882  		return -EUCLEAN;
883  	}
884  
885  	features = btrfs_super_incompat_flags(fs_info->super_copy);
886  	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
887  		mixed = true;
888  
889  	if (!mixed) {
890  		if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) &&
891  			     (type & BTRFS_BLOCK_GROUP_DATA))) {
892  			chunk_err(leaf, chunk, logical,
893  			"mixed chunk type in non-mixed mode: 0x%llx", type);
894  			return -EUCLEAN;
895  		}
896  	}
897  
898  	if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 &&
899  		      sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) ||
900  		     (type & BTRFS_BLOCK_GROUP_RAID1 &&
901  		      num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) ||
902  		     (type & BTRFS_BLOCK_GROUP_RAID1C3 &&
903  		      num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) ||
904  		     (type & BTRFS_BLOCK_GROUP_RAID1C4 &&
905  		      num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) ||
906  		     (type & BTRFS_BLOCK_GROUP_RAID5 &&
907  		      num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) ||
908  		     (type & BTRFS_BLOCK_GROUP_RAID6 &&
909  		      num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) ||
910  		     (type & BTRFS_BLOCK_GROUP_DUP &&
911  		      num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) ||
912  		     ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
913  		      num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) {
914  		chunk_err(leaf, chunk, logical,
915  			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
916  			num_stripes, sub_stripes,
917  			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
918  		return -EUCLEAN;
919  	}
920  
921  	return 0;
922  }
923  
924  /*
925   * Enhanced version of chunk item checker.
926   *
927   * The common btrfs_check_chunk_valid() doesn't check item size since it needs
928   * to work on super block sys_chunk_array which doesn't have full item ptr.
929   */
930  static int check_leaf_chunk_item(struct extent_buffer *leaf,
931  				 struct btrfs_chunk *chunk,
932  				 struct btrfs_key *key, int slot)
933  {
934  	int num_stripes;
935  
936  	if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) {
937  		chunk_err(leaf, chunk, key->offset,
938  			"invalid chunk item size: have %u expect [%zu, %u)",
939  			btrfs_item_size(leaf, slot),
940  			sizeof(struct btrfs_chunk),
941  			BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
942  		return -EUCLEAN;
943  	}
944  
945  	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
946  	/* Let btrfs_check_chunk_valid() handle this error type */
947  	if (num_stripes == 0)
948  		goto out;
949  
950  	if (unlikely(btrfs_chunk_item_size(num_stripes) !=
951  		     btrfs_item_size(leaf, slot))) {
952  		chunk_err(leaf, chunk, key->offset,
953  			"invalid chunk item size: have %u expect %lu",
954  			btrfs_item_size(leaf, slot),
955  			btrfs_chunk_item_size(num_stripes));
956  		return -EUCLEAN;
957  	}
958  out:
959  	return btrfs_check_chunk_valid(leaf, chunk, key->offset);
960  }
961  
962  __printf(3, 4)
963  __cold
964  static void dev_item_err(const struct extent_buffer *eb, int slot,
965  			 const char *fmt, ...)
966  {
967  	struct btrfs_key key;
968  	struct va_format vaf;
969  	va_list args;
970  
971  	btrfs_item_key_to_cpu(eb, &key, slot);
972  	va_start(args, fmt);
973  
974  	vaf.fmt = fmt;
975  	vaf.va = &args;
976  
977  	btrfs_crit(eb->fs_info,
978  	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
979  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
980  		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
981  		key.objectid, &vaf);
982  	va_end(args);
983  }
984  
985  static int check_dev_item(struct extent_buffer *leaf,
986  			  struct btrfs_key *key, int slot)
987  {
988  	struct btrfs_dev_item *ditem;
989  	const u32 item_size = btrfs_item_size(leaf, slot);
990  
991  	if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
992  		dev_item_err(leaf, slot,
993  			     "invalid objectid: has=%llu expect=%llu",
994  			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
995  		return -EUCLEAN;
996  	}
997  
998  	if (unlikely(item_size != sizeof(*ditem))) {
999  		dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
1000  			     item_size, sizeof(*ditem));
1001  		return -EUCLEAN;
1002  	}
1003  
1004  	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
1005  	if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
1006  		dev_item_err(leaf, slot,
1007  			     "devid mismatch: key has=%llu item has=%llu",
1008  			     key->offset, btrfs_device_id(leaf, ditem));
1009  		return -EUCLEAN;
1010  	}
1011  
1012  	/*
1013  	 * For device total_bytes, we don't have reliable way to check it, as
1014  	 * it can be 0 for device removal. Device size check can only be done
1015  	 * by dev extents check.
1016  	 */
1017  	if (unlikely(btrfs_device_bytes_used(leaf, ditem) >
1018  		     btrfs_device_total_bytes(leaf, ditem))) {
1019  		dev_item_err(leaf, slot,
1020  			     "invalid bytes used: have %llu expect [0, %llu]",
1021  			     btrfs_device_bytes_used(leaf, ditem),
1022  			     btrfs_device_total_bytes(leaf, ditem));
1023  		return -EUCLEAN;
1024  	}
1025  	/*
1026  	 * Remaining members like io_align/type/gen/dev_group aren't really
1027  	 * utilized.  Skip them to make later usage of them easier.
1028  	 */
1029  	return 0;
1030  }
1031  
1032  static int check_inode_item(struct extent_buffer *leaf,
1033  			    struct btrfs_key *key, int slot)
1034  {
1035  	struct btrfs_fs_info *fs_info = leaf->fs_info;
1036  	struct btrfs_inode_item *iitem;
1037  	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
1038  	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
1039  	const u32 item_size = btrfs_item_size(leaf, slot);
1040  	u32 mode;
1041  	int ret;
1042  	u32 flags;
1043  	u32 ro_flags;
1044  
1045  	ret = check_inode_key(leaf, key, slot);
1046  	if (unlikely(ret < 0))
1047  		return ret;
1048  
1049  	if (unlikely(item_size != sizeof(*iitem))) {
1050  		generic_err(leaf, slot, "invalid item size: has %u expect %zu",
1051  			    item_size, sizeof(*iitem));
1052  		return -EUCLEAN;
1053  	}
1054  
1055  	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
1056  
1057  	/* Here we use super block generation + 1 to handle log tree */
1058  	if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) {
1059  		inode_item_err(leaf, slot,
1060  			"invalid inode generation: has %llu expect (0, %llu]",
1061  			       btrfs_inode_generation(leaf, iitem),
1062  			       super_gen + 1);
1063  		return -EUCLEAN;
1064  	}
1065  	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
1066  	if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) {
1067  		inode_item_err(leaf, slot,
1068  			"invalid inode transid: has %llu expect [0, %llu]",
1069  			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
1070  		return -EUCLEAN;
1071  	}
1072  
1073  	/*
1074  	 * For size and nbytes it's better not to be too strict, as for dir
1075  	 * item its size/nbytes can easily get wrong, but doesn't affect
1076  	 * anything in the fs. So here we skip the check.
1077  	 */
1078  	mode = btrfs_inode_mode(leaf, iitem);
1079  	if (unlikely(mode & ~valid_mask)) {
1080  		inode_item_err(leaf, slot,
1081  			       "unknown mode bit detected: 0x%x",
1082  			       mode & ~valid_mask);
1083  		return -EUCLEAN;
1084  	}
1085  
1086  	/*
1087  	 * S_IFMT is not bit mapped so we can't completely rely on
1088  	 * is_power_of_2/has_single_bit_set, but it can save us from checking
1089  	 * FIFO/CHR/DIR/REG.  Only needs to check BLK, LNK and SOCKS
1090  	 */
1091  	if (!has_single_bit_set(mode & S_IFMT)) {
1092  		if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) {
1093  			inode_item_err(leaf, slot,
1094  			"invalid mode: has 0%o expect valid S_IF* bit(s)",
1095  				       mode & S_IFMT);
1096  			return -EUCLEAN;
1097  		}
1098  	}
1099  	if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) {
1100  		inode_item_err(leaf, slot,
1101  		       "invalid nlink: has %u expect no more than 1 for dir",
1102  			btrfs_inode_nlink(leaf, iitem));
1103  		return -EUCLEAN;
1104  	}
1105  	btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags);
1106  	if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) {
1107  		inode_item_err(leaf, slot,
1108  			       "unknown incompat flags detected: 0x%x", flags);
1109  		return -EUCLEAN;
1110  	}
1111  	if (unlikely(!sb_rdonly(fs_info->sb) &&
1112  		     (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) {
1113  		inode_item_err(leaf, slot,
1114  			"unknown ro-compat flags detected on writeable mount: 0x%x",
1115  			ro_flags);
1116  		return -EUCLEAN;
1117  	}
1118  	return 0;
1119  }
1120  
1121  static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
1122  			   int slot)
1123  {
1124  	struct btrfs_fs_info *fs_info = leaf->fs_info;
1125  	struct btrfs_root_item ri = { 0 };
1126  	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
1127  				     BTRFS_ROOT_SUBVOL_DEAD;
1128  	int ret;
1129  
1130  	ret = check_root_key(leaf, key, slot);
1131  	if (unlikely(ret < 0))
1132  		return ret;
1133  
1134  	if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) &&
1135  		     btrfs_item_size(leaf, slot) !=
1136  		     btrfs_legacy_root_item_size())) {
1137  		generic_err(leaf, slot,
1138  			    "invalid root item size, have %u expect %zu or %u",
1139  			    btrfs_item_size(leaf, slot), sizeof(ri),
1140  			    btrfs_legacy_root_item_size());
1141  		return -EUCLEAN;
1142  	}
1143  
1144  	/*
1145  	 * For legacy root item, the members starting at generation_v2 will be
1146  	 * all filled with 0.
1147  	 * And since we allow geneartion_v2 as 0, it will still pass the check.
1148  	 */
1149  	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
1150  			   btrfs_item_size(leaf, slot));
1151  
1152  	/* Generation related */
1153  	if (unlikely(btrfs_root_generation(&ri) >
1154  		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1155  		generic_err(leaf, slot,
1156  			"invalid root generation, have %llu expect (0, %llu]",
1157  			    btrfs_root_generation(&ri),
1158  			    btrfs_super_generation(fs_info->super_copy) + 1);
1159  		return -EUCLEAN;
1160  	}
1161  	if (unlikely(btrfs_root_generation_v2(&ri) >
1162  		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1163  		generic_err(leaf, slot,
1164  		"invalid root v2 generation, have %llu expect (0, %llu]",
1165  			    btrfs_root_generation_v2(&ri),
1166  			    btrfs_super_generation(fs_info->super_copy) + 1);
1167  		return -EUCLEAN;
1168  	}
1169  	if (unlikely(btrfs_root_last_snapshot(&ri) >
1170  		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1171  		generic_err(leaf, slot,
1172  		"invalid root last_snapshot, have %llu expect (0, %llu]",
1173  			    btrfs_root_last_snapshot(&ri),
1174  			    btrfs_super_generation(fs_info->super_copy) + 1);
1175  		return -EUCLEAN;
1176  	}
1177  
1178  	/* Alignment and level check */
1179  	if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) {
1180  		generic_err(leaf, slot,
1181  		"invalid root bytenr, have %llu expect to be aligned to %u",
1182  			    btrfs_root_bytenr(&ri), fs_info->sectorsize);
1183  		return -EUCLEAN;
1184  	}
1185  	if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) {
1186  		generic_err(leaf, slot,
1187  			    "invalid root level, have %u expect [0, %u]",
1188  			    btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
1189  		return -EUCLEAN;
1190  	}
1191  	if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) {
1192  		generic_err(leaf, slot,
1193  			    "invalid root level, have %u expect [0, %u]",
1194  			    btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1);
1195  		return -EUCLEAN;
1196  	}
1197  
1198  	/* Flags check */
1199  	if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) {
1200  		generic_err(leaf, slot,
1201  			    "invalid root flags, have 0x%llx expect mask 0x%llx",
1202  			    btrfs_root_flags(&ri), valid_root_flags);
1203  		return -EUCLEAN;
1204  	}
1205  	return 0;
1206  }
1207  
1208  __printf(3,4)
1209  __cold
1210  static void extent_err(const struct extent_buffer *eb, int slot,
1211  		       const char *fmt, ...)
1212  {
1213  	struct btrfs_key key;
1214  	struct va_format vaf;
1215  	va_list args;
1216  	u64 bytenr;
1217  	u64 len;
1218  
1219  	btrfs_item_key_to_cpu(eb, &key, slot);
1220  	bytenr = key.objectid;
1221  	if (key.type == BTRFS_METADATA_ITEM_KEY ||
1222  	    key.type == BTRFS_TREE_BLOCK_REF_KEY ||
1223  	    key.type == BTRFS_SHARED_BLOCK_REF_KEY)
1224  		len = eb->fs_info->nodesize;
1225  	else
1226  		len = key.offset;
1227  	va_start(args, fmt);
1228  
1229  	vaf.fmt = fmt;
1230  	vaf.va = &args;
1231  
1232  	btrfs_crit(eb->fs_info,
1233  	"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
1234  		btrfs_header_level(eb) == 0 ? "leaf" : "node",
1235  		eb->start, slot, bytenr, len, &vaf);
1236  	va_end(args);
1237  }
1238  
1239  static int check_extent_item(struct extent_buffer *leaf,
1240  			     struct btrfs_key *key, int slot,
1241  			     struct btrfs_key *prev_key)
1242  {
1243  	struct btrfs_fs_info *fs_info = leaf->fs_info;
1244  	struct btrfs_extent_item *ei;
1245  	bool is_tree_block = false;
1246  	unsigned long ptr;	/* Current pointer inside inline refs */
1247  	unsigned long end;	/* Extent item end */
1248  	const u32 item_size = btrfs_item_size(leaf, slot);
1249  	u64 flags;
1250  	u64 generation;
1251  	u64 total_refs;		/* Total refs in btrfs_extent_item */
1252  	u64 inline_refs = 0;	/* found total inline refs */
1253  
1254  	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1255  		     !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) {
1256  		generic_err(leaf, slot,
1257  "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
1258  		return -EUCLEAN;
1259  	}
1260  	/* key->objectid is the bytenr for both key types */
1261  	if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) {
1262  		generic_err(leaf, slot,
1263  		"invalid key objectid, have %llu expect to be aligned to %u",
1264  			   key->objectid, fs_info->sectorsize);
1265  		return -EUCLEAN;
1266  	}
1267  
1268  	/* key->offset is tree level for METADATA_ITEM_KEY */
1269  	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
1270  		     key->offset >= BTRFS_MAX_LEVEL)) {
1271  		extent_err(leaf, slot,
1272  			   "invalid tree level, have %llu expect [0, %u]",
1273  			   key->offset, BTRFS_MAX_LEVEL - 1);
1274  		return -EUCLEAN;
1275  	}
1276  
1277  	/*
1278  	 * EXTENT/METADATA_ITEM consists of:
1279  	 * 1) One btrfs_extent_item
1280  	 *    Records the total refs, type and generation of the extent.
1281  	 *
1282  	 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
1283  	 *    Records the first key and level of the tree block.
1284  	 *
1285  	 * 2) Zero or more btrfs_extent_inline_ref(s)
1286  	 *    Each inline ref has one btrfs_extent_inline_ref shows:
1287  	 *    2.1) The ref type, one of the 4
1288  	 *         TREE_BLOCK_REF	Tree block only
1289  	 *         SHARED_BLOCK_REF	Tree block only
1290  	 *         EXTENT_DATA_REF	Data only
1291  	 *         SHARED_DATA_REF	Data only
1292  	 *    2.2) Ref type specific data
1293  	 *         Either using btrfs_extent_inline_ref::offset, or specific
1294  	 *         data structure.
1295  	 */
1296  	if (unlikely(item_size < sizeof(*ei))) {
1297  		extent_err(leaf, slot,
1298  			   "invalid item size, have %u expect [%zu, %u)",
1299  			   item_size, sizeof(*ei),
1300  			   BTRFS_LEAF_DATA_SIZE(fs_info));
1301  		return -EUCLEAN;
1302  	}
1303  	end = item_size + btrfs_item_ptr_offset(leaf, slot);
1304  
1305  	/* Checks against extent_item */
1306  	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
1307  	flags = btrfs_extent_flags(leaf, ei);
1308  	total_refs = btrfs_extent_refs(leaf, ei);
1309  	generation = btrfs_extent_generation(leaf, ei);
1310  	if (unlikely(generation >
1311  		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1312  		extent_err(leaf, slot,
1313  			   "invalid generation, have %llu expect (0, %llu]",
1314  			   generation,
1315  			   btrfs_super_generation(fs_info->super_copy) + 1);
1316  		return -EUCLEAN;
1317  	}
1318  	if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
1319  						  BTRFS_EXTENT_FLAG_TREE_BLOCK)))) {
1320  		extent_err(leaf, slot,
1321  		"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
1322  			flags, BTRFS_EXTENT_FLAG_DATA |
1323  			BTRFS_EXTENT_FLAG_TREE_BLOCK);
1324  		return -EUCLEAN;
1325  	}
1326  	is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
1327  	if (is_tree_block) {
1328  		if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY &&
1329  			     key->offset != fs_info->nodesize)) {
1330  			extent_err(leaf, slot,
1331  				   "invalid extent length, have %llu expect %u",
1332  				   key->offset, fs_info->nodesize);
1333  			return -EUCLEAN;
1334  		}
1335  	} else {
1336  		if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) {
1337  			extent_err(leaf, slot,
1338  			"invalid key type, have %u expect %u for data backref",
1339  				   key->type, BTRFS_EXTENT_ITEM_KEY);
1340  			return -EUCLEAN;
1341  		}
1342  		if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) {
1343  			extent_err(leaf, slot,
1344  			"invalid extent length, have %llu expect aligned to %u",
1345  				   key->offset, fs_info->sectorsize);
1346  			return -EUCLEAN;
1347  		}
1348  		if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1349  			extent_err(leaf, slot,
1350  			"invalid extent flag, data has full backref set");
1351  			return -EUCLEAN;
1352  		}
1353  	}
1354  	ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);
1355  
1356  	/* Check the special case of btrfs_tree_block_info */
1357  	if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
1358  		struct btrfs_tree_block_info *info;
1359  
1360  		info = (struct btrfs_tree_block_info *)ptr;
1361  		if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) {
1362  			extent_err(leaf, slot,
1363  			"invalid tree block info level, have %u expect [0, %u]",
1364  				   btrfs_tree_block_level(leaf, info),
1365  				   BTRFS_MAX_LEVEL - 1);
1366  			return -EUCLEAN;
1367  		}
1368  		ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
1369  	}
1370  
1371  	/* Check inline refs */
1372  	while (ptr < end) {
1373  		struct btrfs_extent_inline_ref *iref;
1374  		struct btrfs_extent_data_ref *dref;
1375  		struct btrfs_shared_data_ref *sref;
1376  		u64 dref_offset;
1377  		u64 inline_offset;
1378  		u8 inline_type;
1379  
1380  		if (unlikely(ptr + sizeof(*iref) > end)) {
1381  			extent_err(leaf, slot,
1382  "inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
1383  				   ptr, sizeof(*iref), end);
1384  			return -EUCLEAN;
1385  		}
1386  		iref = (struct btrfs_extent_inline_ref *)ptr;
1387  		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
1388  		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1389  		if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
1390  			extent_err(leaf, slot,
1391  "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
1392  				   ptr, inline_type, end);
1393  			return -EUCLEAN;
1394  		}
1395  
1396  		switch (inline_type) {
1397  		/* inline_offset is subvolid of the owner, no need to check */
1398  		case BTRFS_TREE_BLOCK_REF_KEY:
1399  			inline_refs++;
1400  			break;
1401  		/* Contains parent bytenr */
1402  		case BTRFS_SHARED_BLOCK_REF_KEY:
1403  			if (unlikely(!IS_ALIGNED(inline_offset,
1404  						 fs_info->sectorsize))) {
1405  				extent_err(leaf, slot,
1406  		"invalid tree parent bytenr, have %llu expect aligned to %u",
1407  					   inline_offset, fs_info->sectorsize);
1408  				return -EUCLEAN;
1409  			}
1410  			inline_refs++;
1411  			break;
1412  		/*
1413  		 * Contains owner subvolid, owner key objectid, adjusted offset.
1414  		 * The only obvious corruption can happen in that offset.
1415  		 */
1416  		case BTRFS_EXTENT_DATA_REF_KEY:
1417  			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1418  			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
1419  			if (unlikely(!IS_ALIGNED(dref_offset,
1420  						 fs_info->sectorsize))) {
1421  				extent_err(leaf, slot,
1422  		"invalid data ref offset, have %llu expect aligned to %u",
1423  					   dref_offset, fs_info->sectorsize);
1424  				return -EUCLEAN;
1425  			}
1426  			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
1427  			break;
1428  		/* Contains parent bytenr and ref count */
1429  		case BTRFS_SHARED_DATA_REF_KEY:
1430  			sref = (struct btrfs_shared_data_ref *)(iref + 1);
1431  			if (unlikely(!IS_ALIGNED(inline_offset,
1432  						 fs_info->sectorsize))) {
1433  				extent_err(leaf, slot,
1434  		"invalid data parent bytenr, have %llu expect aligned to %u",
1435  					   inline_offset, fs_info->sectorsize);
1436  				return -EUCLEAN;
1437  			}
1438  			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
1439  			break;
1440  		default:
1441  			extent_err(leaf, slot, "unknown inline ref type: %u",
1442  				   inline_type);
1443  			return -EUCLEAN;
1444  		}
1445  		ptr += btrfs_extent_inline_ref_size(inline_type);
1446  	}
1447  	/* No padding is allowed */
1448  	if (unlikely(ptr != end)) {
1449  		extent_err(leaf, slot,
1450  			   "invalid extent item size, padding bytes found");
1451  		return -EUCLEAN;
1452  	}
1453  
1454  	/* Finally, check the inline refs against total refs */
1455  	if (unlikely(inline_refs > total_refs)) {
1456  		extent_err(leaf, slot,
1457  			"invalid extent refs, have %llu expect >= inline %llu",
1458  			   total_refs, inline_refs);
1459  		return -EUCLEAN;
1460  	}
1461  
1462  	if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
1463  	    (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
1464  		u64 prev_end = prev_key->objectid;
1465  
1466  		if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
1467  			prev_end += fs_info->nodesize;
1468  		else
1469  			prev_end += prev_key->offset;
1470  
1471  		if (unlikely(prev_end > key->objectid)) {
1472  			extent_err(leaf, slot,
1473  	"previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
1474  				   prev_key->objectid, prev_key->type,
1475  				   prev_key->offset, key->objectid, key->type,
1476  				   key->offset);
1477  			return -EUCLEAN;
1478  		}
1479  	}
1480  
1481  	return 0;
1482  }
1483  
1484  static int check_simple_keyed_refs(struct extent_buffer *leaf,
1485  				   struct btrfs_key *key, int slot)
1486  {
1487  	u32 expect_item_size = 0;
1488  
1489  	if (key->type == BTRFS_SHARED_DATA_REF_KEY)
1490  		expect_item_size = sizeof(struct btrfs_shared_data_ref);
1491  
1492  	if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) {
1493  		generic_err(leaf, slot,
1494  		"invalid item size, have %u expect %u for key type %u",
1495  			    btrfs_item_size(leaf, slot),
1496  			    expect_item_size, key->type);
1497  		return -EUCLEAN;
1498  	}
1499  	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1500  		generic_err(leaf, slot,
1501  "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1502  			    key->objectid, leaf->fs_info->sectorsize);
1503  		return -EUCLEAN;
1504  	}
1505  	if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY &&
1506  		     !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) {
1507  		extent_err(leaf, slot,
1508  		"invalid tree parent bytenr, have %llu expect aligned to %u",
1509  			   key->offset, leaf->fs_info->sectorsize);
1510  		return -EUCLEAN;
1511  	}
1512  	return 0;
1513  }
1514  
1515  static int check_extent_data_ref(struct extent_buffer *leaf,
1516  				 struct btrfs_key *key, int slot)
1517  {
1518  	struct btrfs_extent_data_ref *dref;
1519  	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
1520  	const unsigned long end = ptr + btrfs_item_size(leaf, slot);
1521  
1522  	if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) {
1523  		generic_err(leaf, slot,
1524  	"invalid item size, have %u expect aligned to %zu for key type %u",
1525  			    btrfs_item_size(leaf, slot),
1526  			    sizeof(*dref), key->type);
1527  		return -EUCLEAN;
1528  	}
1529  	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1530  		generic_err(leaf, slot,
1531  "invalid key objectid for shared block ref, have %llu expect aligned to %u",
1532  			    key->objectid, leaf->fs_info->sectorsize);
1533  		return -EUCLEAN;
1534  	}
1535  	for (; ptr < end; ptr += sizeof(*dref)) {
1536  		u64 offset;
1537  
1538  		/*
1539  		 * We cannot check the extent_data_ref hash due to possible
1540  		 * overflow from the leaf due to hash collisions.
1541  		 */
1542  		dref = (struct btrfs_extent_data_ref *)ptr;
1543  		offset = btrfs_extent_data_ref_offset(leaf, dref);
1544  		if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
1545  			extent_err(leaf, slot,
1546  	"invalid extent data backref offset, have %llu expect aligned to %u",
1547  				   offset, leaf->fs_info->sectorsize);
1548  			return -EUCLEAN;
1549  		}
1550  	}
1551  	return 0;
1552  }
1553  
1554  #define inode_ref_err(eb, slot, fmt, args...)			\
1555  	inode_item_err(eb, slot, fmt, ##args)
1556  static int check_inode_ref(struct extent_buffer *leaf,
1557  			   struct btrfs_key *key, struct btrfs_key *prev_key,
1558  			   int slot)
1559  {
1560  	struct btrfs_inode_ref *iref;
1561  	unsigned long ptr;
1562  	unsigned long end;
1563  
1564  	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
1565  		return -EUCLEAN;
1566  	/* namelen can't be 0, so item_size == sizeof() is also invalid */
1567  	if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) {
1568  		inode_ref_err(leaf, slot,
1569  			"invalid item size, have %u expect (%zu, %u)",
1570  			btrfs_item_size(leaf, slot),
1571  			sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
1572  		return -EUCLEAN;
1573  	}
1574  
1575  	ptr = btrfs_item_ptr_offset(leaf, slot);
1576  	end = ptr + btrfs_item_size(leaf, slot);
1577  	while (ptr < end) {
1578  		u16 namelen;
1579  
1580  		if (unlikely(ptr + sizeof(iref) > end)) {
1581  			inode_ref_err(leaf, slot,
1582  			"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
1583  				ptr, end, sizeof(iref));
1584  			return -EUCLEAN;
1585  		}
1586  
1587  		iref = (struct btrfs_inode_ref *)ptr;
1588  		namelen = btrfs_inode_ref_name_len(leaf, iref);
1589  		if (unlikely(ptr + sizeof(*iref) + namelen > end)) {
1590  			inode_ref_err(leaf, slot,
1591  				"inode ref overflow, ptr %lu end %lu namelen %u",
1592  				ptr, end, namelen);
1593  			return -EUCLEAN;
1594  		}
1595  
1596  		/*
1597  		 * NOTE: In theory we should record all found index numbers
1598  		 * to find any duplicated indexes, but that will be too time
1599  		 * consuming for inodes with too many hard links.
1600  		 */
1601  		ptr += sizeof(*iref) + namelen;
1602  	}
1603  	return 0;
1604  }
1605  
1606  /*
1607   * Common point to switch the item-specific validation.
1608   */
1609  static int check_leaf_item(struct extent_buffer *leaf,
1610  			   struct btrfs_key *key, int slot,
1611  			   struct btrfs_key *prev_key)
1612  {
1613  	int ret = 0;
1614  	struct btrfs_chunk *chunk;
1615  
1616  	switch (key->type) {
1617  	case BTRFS_EXTENT_DATA_KEY:
1618  		ret = check_extent_data_item(leaf, key, slot, prev_key);
1619  		break;
1620  	case BTRFS_EXTENT_CSUM_KEY:
1621  		ret = check_csum_item(leaf, key, slot, prev_key);
1622  		break;
1623  	case BTRFS_DIR_ITEM_KEY:
1624  	case BTRFS_DIR_INDEX_KEY:
1625  	case BTRFS_XATTR_ITEM_KEY:
1626  		ret = check_dir_item(leaf, key, prev_key, slot);
1627  		break;
1628  	case BTRFS_INODE_REF_KEY:
1629  		ret = check_inode_ref(leaf, key, prev_key, slot);
1630  		break;
1631  	case BTRFS_BLOCK_GROUP_ITEM_KEY:
1632  		ret = check_block_group_item(leaf, key, slot);
1633  		break;
1634  	case BTRFS_CHUNK_ITEM_KEY:
1635  		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1636  		ret = check_leaf_chunk_item(leaf, chunk, key, slot);
1637  		break;
1638  	case BTRFS_DEV_ITEM_KEY:
1639  		ret = check_dev_item(leaf, key, slot);
1640  		break;
1641  	case BTRFS_INODE_ITEM_KEY:
1642  		ret = check_inode_item(leaf, key, slot);
1643  		break;
1644  	case BTRFS_ROOT_ITEM_KEY:
1645  		ret = check_root_item(leaf, key, slot);
1646  		break;
1647  	case BTRFS_EXTENT_ITEM_KEY:
1648  	case BTRFS_METADATA_ITEM_KEY:
1649  		ret = check_extent_item(leaf, key, slot, prev_key);
1650  		break;
1651  	case BTRFS_TREE_BLOCK_REF_KEY:
1652  	case BTRFS_SHARED_DATA_REF_KEY:
1653  	case BTRFS_SHARED_BLOCK_REF_KEY:
1654  		ret = check_simple_keyed_refs(leaf, key, slot);
1655  		break;
1656  	case BTRFS_EXTENT_DATA_REF_KEY:
1657  		ret = check_extent_data_ref(leaf, key, slot);
1658  		break;
1659  	}
1660  	return ret;
1661  }
1662  
1663  static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
1664  {
1665  	struct btrfs_fs_info *fs_info = leaf->fs_info;
1666  	/* No valid key type is 0, so all key should be larger than this key */
1667  	struct btrfs_key prev_key = {0, 0, 0};
1668  	struct btrfs_key key;
1669  	u32 nritems = btrfs_header_nritems(leaf);
1670  	int slot;
1671  
1672  	if (unlikely(btrfs_header_level(leaf) != 0)) {
1673  		generic_err(leaf, 0,
1674  			"invalid level for leaf, have %d expect 0",
1675  			btrfs_header_level(leaf));
1676  		return -EUCLEAN;
1677  	}
1678  
1679  	/*
1680  	 * Extent buffers from a relocation tree have a owner field that
1681  	 * corresponds to the subvolume tree they are based on. So just from an
1682  	 * extent buffer alone we can not find out what is the id of the
1683  	 * corresponding subvolume tree, so we can not figure out if the extent
1684  	 * buffer corresponds to the root of the relocation tree or not. So
1685  	 * skip this check for relocation trees.
1686  	 */
1687  	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
1688  		u64 owner = btrfs_header_owner(leaf);
1689  
1690  		/* These trees must never be empty */
1691  		if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
1692  			     owner == BTRFS_CHUNK_TREE_OBJECTID ||
1693  			     owner == BTRFS_DEV_TREE_OBJECTID ||
1694  			     owner == BTRFS_FS_TREE_OBJECTID ||
1695  			     owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
1696  			generic_err(leaf, 0,
1697  			"invalid root, root %llu must never be empty",
1698  				    owner);
1699  			return -EUCLEAN;
1700  		}
1701  
1702  		/* Unknown tree */
1703  		if (unlikely(owner == 0)) {
1704  			generic_err(leaf, 0,
1705  				"invalid owner, root 0 is not defined");
1706  			return -EUCLEAN;
1707  		}
1708  
1709  		/* EXTENT_TREE_V2 can have empty extent trees. */
1710  		if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
1711  			return 0;
1712  
1713  		if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
1714  			generic_err(leaf, 0,
1715  			"invalid root, root %llu must never be empty",
1716  				    owner);
1717  			return -EUCLEAN;
1718  		}
1719  
1720  		return 0;
1721  	}
1722  
1723  	if (unlikely(nritems == 0))
1724  		return 0;
1725  
1726  	/*
1727  	 * Check the following things to make sure this is a good leaf, and
1728  	 * leaf users won't need to bother with similar sanity checks:
1729  	 *
1730  	 * 1) key ordering
1731  	 * 2) item offset and size
1732  	 *    No overlap, no hole, all inside the leaf.
1733  	 * 3) item content
1734  	 *    If possible, do comprehensive sanity check.
1735  	 *    NOTE: All checks must only rely on the item data itself.
1736  	 */
1737  	for (slot = 0; slot < nritems; slot++) {
1738  		u32 item_end_expected;
1739  		u64 item_data_end;
1740  		int ret;
1741  
1742  		btrfs_item_key_to_cpu(leaf, &key, slot);
1743  
1744  		/* Make sure the keys are in the right order */
1745  		if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) {
1746  			generic_err(leaf, slot,
1747  	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
1748  				prev_key.objectid, prev_key.type,
1749  				prev_key.offset, key.objectid, key.type,
1750  				key.offset);
1751  			return -EUCLEAN;
1752  		}
1753  
1754  		item_data_end = (u64)btrfs_item_offset(leaf, slot) +
1755  				btrfs_item_size(leaf, slot);
1756  		/*
1757  		 * Make sure the offset and ends are right, remember that the
1758  		 * item data starts at the end of the leaf and grows towards the
1759  		 * front.
1760  		 */
1761  		if (slot == 0)
1762  			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
1763  		else
1764  			item_end_expected = btrfs_item_offset(leaf,
1765  								 slot - 1);
1766  		if (unlikely(item_data_end != item_end_expected)) {
1767  			generic_err(leaf, slot,
1768  				"unexpected item end, have %llu expect %u",
1769  				item_data_end, item_end_expected);
1770  			return -EUCLEAN;
1771  		}
1772  
1773  		/*
1774  		 * Check to make sure that we don't point outside of the leaf,
1775  		 * just in case all the items are consistent to each other, but
1776  		 * all point outside of the leaf.
1777  		 */
1778  		if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) {
1779  			generic_err(leaf, slot,
1780  			"slot end outside of leaf, have %llu expect range [0, %u]",
1781  				item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
1782  			return -EUCLEAN;
1783  		}
1784  
1785  		/* Also check if the item pointer overlaps with btrfs item. */
1786  		if (unlikely(btrfs_item_ptr_offset(leaf, slot) <
1787  			     btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) {
1788  			generic_err(leaf, slot,
1789  		"slot overlaps with its data, item end %lu data start %lu",
1790  				btrfs_item_nr_offset(leaf, slot) +
1791  				sizeof(struct btrfs_item),
1792  				btrfs_item_ptr_offset(leaf, slot));
1793  			return -EUCLEAN;
1794  		}
1795  
1796  		if (check_item_data) {
1797  			/*
1798  			 * Check if the item size and content meet other
1799  			 * criteria
1800  			 */
1801  			ret = check_leaf_item(leaf, &key, slot, &prev_key);
1802  			if (unlikely(ret < 0))
1803  				return ret;
1804  		}
1805  
1806  		prev_key.objectid = key.objectid;
1807  		prev_key.type = key.type;
1808  		prev_key.offset = key.offset;
1809  	}
1810  
1811  	return 0;
1812  }
1813  
1814  int btrfs_check_leaf_full(struct extent_buffer *leaf)
1815  {
1816  	return check_leaf(leaf, true);
1817  }
1818  ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
1819  
1820  int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
1821  {
1822  	return check_leaf(leaf, false);
1823  }
1824  
1825  int btrfs_check_node(struct extent_buffer *node)
1826  {
1827  	struct btrfs_fs_info *fs_info = node->fs_info;
1828  	unsigned long nr = btrfs_header_nritems(node);
1829  	struct btrfs_key key, next_key;
1830  	int slot;
1831  	int level = btrfs_header_level(node);
1832  	u64 bytenr;
1833  	int ret = 0;
1834  
1835  	if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
1836  		generic_err(node, 0,
1837  			"invalid level for node, have %d expect [1, %d]",
1838  			level, BTRFS_MAX_LEVEL - 1);
1839  		return -EUCLEAN;
1840  	}
1841  	if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
1842  		btrfs_crit(fs_info,
1843  "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
1844  			   btrfs_header_owner(node), node->start,
1845  			   nr == 0 ? "small" : "large", nr,
1846  			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1847  		return -EUCLEAN;
1848  	}
1849  
1850  	for (slot = 0; slot < nr - 1; slot++) {
1851  		bytenr = btrfs_node_blockptr(node, slot);
1852  		btrfs_node_key_to_cpu(node, &key, slot);
1853  		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
1854  
1855  		if (unlikely(!bytenr)) {
1856  			generic_err(node, slot,
1857  				"invalid NULL node pointer");
1858  			ret = -EUCLEAN;
1859  			goto out;
1860  		}
1861  		if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
1862  			generic_err(node, slot,
1863  			"unaligned pointer, have %llu should be aligned to %u",
1864  				bytenr, fs_info->sectorsize);
1865  			ret = -EUCLEAN;
1866  			goto out;
1867  		}
1868  
1869  		if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
1870  			generic_err(node, slot,
1871  	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
1872  				key.objectid, key.type, key.offset,
1873  				next_key.objectid, next_key.type,
1874  				next_key.offset);
1875  			ret = -EUCLEAN;
1876  			goto out;
1877  		}
1878  	}
1879  out:
1880  	return ret;
1881  }
1882  ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
1883  
1884  int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
1885  {
1886  	const bool is_subvol = is_fstree(root_owner);
1887  	const u64 eb_owner = btrfs_header_owner(eb);
1888  
1889  	/*
1890  	 * Skip dummy fs, as selftests don't create unique ebs for each dummy
1891  	 * root.
1892  	 */
1893  	if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &eb->fs_info->fs_state))
1894  		return 0;
1895  	/*
1896  	 * There are several call sites (backref walking, qgroup, and data
1897  	 * reloc) passing 0 as @root_owner, as they are not holding the
1898  	 * tree root.  In that case, we can not do a reliable ownership check,
1899  	 * so just exit.
1900  	 */
1901  	if (root_owner == 0)
1902  		return 0;
1903  	/*
1904  	 * These trees use key.offset as their owner, our callers don't have
1905  	 * the extra capacity to pass key.offset here.  So we just skip them.
1906  	 */
1907  	if (root_owner == BTRFS_TREE_LOG_OBJECTID ||
1908  	    root_owner == BTRFS_TREE_RELOC_OBJECTID)
1909  		return 0;
1910  
1911  	if (!is_subvol) {
1912  		/* For non-subvolume trees, the eb owner should match root owner */
1913  		if (unlikely(root_owner != eb_owner)) {
1914  			btrfs_crit(eb->fs_info,
1915  "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu",
1916  				btrfs_header_level(eb) == 0 ? "leaf" : "node",
1917  				root_owner, btrfs_header_bytenr(eb), eb_owner,
1918  				root_owner);
1919  			return -EUCLEAN;
1920  		}
1921  		return 0;
1922  	}
1923  
1924  	/*
1925  	 * For subvolume trees, owners can mismatch, but they should all belong
1926  	 * to subvolume trees.
1927  	 */
1928  	if (unlikely(is_subvol != is_fstree(eb_owner))) {
1929  		btrfs_crit(eb->fs_info,
1930  "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]",
1931  			btrfs_header_level(eb) == 0 ? "leaf" : "node",
1932  			root_owner, btrfs_header_bytenr(eb), eb_owner,
1933  			BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID);
1934  		return -EUCLEAN;
1935  	}
1936  	return 0;
1937  }
1938