1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) Qu Wenruo 2017. All rights reserved. 4 */ 5 6 /* 7 * The module is used to catch unexpected/corrupted tree block data. 8 * Such behavior can be caused either by a fuzzed image or bugs. 9 * 10 * The objective is to do leaf/node validation checks when tree block is read 11 * from disk, and check *every* possible member, so other code won't 12 * need to checking them again. 13 * 14 * Due to the potential and unwanted damage, every checker needs to be 15 * carefully reviewed otherwise so it does not prevent mount of valid images. 16 */ 17 18 #include <linux/types.h> 19 #include <linux/stddef.h> 20 #include <linux/error-injection.h> 21 #include "messages.h" 22 #include "ctree.h" 23 #include "tree-checker.h" 24 #include "compression.h" 25 #include "volumes.h" 26 #include "misc.h" 27 #include "fs.h" 28 #include "accessors.h" 29 #include "file-item.h" 30 #include "inode-item.h" 31 #include "dir-item.h" 32 #include "extent-tree.h" 33 34 /* 35 * Error message should follow the following format: 36 * corrupt <type>: <identifier>, <reason>[, <bad_value>] 37 * 38 * @type: leaf or node 39 * @identifier: the necessary info to locate the leaf/node. 40 * It's recommended to decode key.objecitd/offset if it's 41 * meaningful. 42 * @reason: describe the error 43 * @bad_value: optional, it's recommended to output bad value and its 44 * expected value (range). 45 * 46 * Since comma is used to separate the components, only space is allowed 47 * inside each component. 48 */ 49 50 /* 51 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. 52 * Allows callers to customize the output. 53 */ 54 __printf(3, 4) 55 __cold 56 static void generic_err(const struct extent_buffer *eb, int slot, 57 const char *fmt, ...) 58 { 59 const struct btrfs_fs_info *fs_info = eb->fs_info; 60 struct va_format vaf; 61 va_list args; 62 63 va_start(args, fmt); 64 65 vaf.fmt = fmt; 66 vaf.va = &args; 67 68 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 69 btrfs_crit(fs_info, 70 "corrupt %s: root=%llu block=%llu slot=%d, %pV", 71 btrfs_header_level(eb) == 0 ? "leaf" : "node", 72 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf); 73 va_end(args); 74 } 75 76 /* 77 * Customized reporter for extent data item, since its key objectid and 78 * offset has its own meaning. 79 */ 80 __printf(3, 4) 81 __cold 82 static void file_extent_err(const struct extent_buffer *eb, int slot, 83 const char *fmt, ...) 84 { 85 const struct btrfs_fs_info *fs_info = eb->fs_info; 86 struct btrfs_key key; 87 struct va_format vaf; 88 va_list args; 89 90 btrfs_item_key_to_cpu(eb, &key, slot); 91 va_start(args, fmt); 92 93 vaf.fmt = fmt; 94 vaf.va = &args; 95 96 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 97 btrfs_crit(fs_info, 98 "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV", 99 btrfs_header_level(eb) == 0 ? "leaf" : "node", 100 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 101 key.objectid, key.offset, &vaf); 102 va_end(args); 103 } 104 105 /* 106 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment 107 * Else return 1 108 */ 109 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \ 110 ({ \ 111 if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), \ 112 (alignment)))) \ 113 file_extent_err((leaf), (slot), \ 114 "invalid %s for file extent, have %llu, should be aligned to %u", \ 115 (#name), btrfs_file_extent_##name((leaf), (fi)), \ 116 (alignment)); \ 117 (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ 118 }) 119 120 static u64 file_extent_end(struct extent_buffer *leaf, 121 struct btrfs_key *key, 122 struct btrfs_file_extent_item *extent) 123 { 124 u64 end; 125 u64 len; 126 127 if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { 128 len = btrfs_file_extent_ram_bytes(leaf, extent); 129 end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); 130 } else { 131 len = btrfs_file_extent_num_bytes(leaf, extent); 132 end = key->offset + len; 133 } 134 return end; 135 } 136 137 /* 138 * Customized report for dir_item, the only new important information is 139 * key->objectid, which represents inode number 140 */ 141 __printf(3, 4) 142 __cold 143 static void dir_item_err(const struct extent_buffer *eb, int slot, 144 const char *fmt, ...) 145 { 146 const struct btrfs_fs_info *fs_info = eb->fs_info; 147 struct btrfs_key key; 148 struct va_format vaf; 149 va_list args; 150 151 btrfs_item_key_to_cpu(eb, &key, slot); 152 va_start(args, fmt); 153 154 vaf.fmt = fmt; 155 vaf.va = &args; 156 157 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 158 btrfs_crit(fs_info, 159 "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", 160 btrfs_header_level(eb) == 0 ? "leaf" : "node", 161 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 162 key.objectid, &vaf); 163 va_end(args); 164 } 165 166 /* 167 * This functions checks prev_key->objectid, to ensure current key and prev_key 168 * share the same objectid as inode number. 169 * 170 * This is to detect missing INODE_ITEM in subvolume trees. 171 * 172 * Return true if everything is OK or we don't need to check. 173 * Return false if anything is wrong. 174 */ 175 static bool check_prev_ino(struct extent_buffer *leaf, 176 struct btrfs_key *key, int slot, 177 struct btrfs_key *prev_key) 178 { 179 /* No prev key, skip check */ 180 if (slot == 0) 181 return true; 182 183 /* Only these key->types needs to be checked */ 184 ASSERT(key->type == BTRFS_XATTR_ITEM_KEY || 185 key->type == BTRFS_INODE_REF_KEY || 186 key->type == BTRFS_INODE_EXTREF_KEY || 187 key->type == BTRFS_DIR_INDEX_KEY || 188 key->type == BTRFS_DIR_ITEM_KEY || 189 key->type == BTRFS_EXTENT_DATA_KEY, "key->type=%u", key->type); 190 191 /* 192 * Only subvolume trees along with their reloc trees need this check. 193 * Things like log tree doesn't follow this ino requirement. 194 */ 195 if (!btrfs_is_fstree(btrfs_header_owner(leaf))) 196 return true; 197 198 if (key->objectid == prev_key->objectid) 199 return true; 200 201 /* Error found */ 202 dir_item_err(leaf, slot, 203 "invalid previous key objectid, have %llu expect %llu", 204 prev_key->objectid, key->objectid); 205 return false; 206 } 207 static int check_extent_data_item(struct extent_buffer *leaf, 208 struct btrfs_key *key, int slot, 209 struct btrfs_key *prev_key) 210 { 211 struct btrfs_fs_info *fs_info = leaf->fs_info; 212 struct btrfs_file_extent_item *fi; 213 u32 sectorsize = fs_info->sectorsize; 214 u32 item_size = btrfs_item_size(leaf, slot); 215 u64 extent_end; 216 217 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 218 file_extent_err(leaf, slot, 219 "unaligned file_offset for file extent, have %llu should be aligned to %u", 220 key->offset, sectorsize); 221 return -EUCLEAN; 222 } 223 224 /* 225 * Previous key must have the same key->objectid (ino). 226 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA. 227 * But if objectids mismatch, it means we have a missing 228 * INODE_ITEM. 229 */ 230 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 231 return -EUCLEAN; 232 233 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 234 235 /* 236 * Make sure the item contains at least inline header, so the file 237 * extent type is not some garbage. 238 */ 239 if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) { 240 file_extent_err(leaf, slot, 241 "invalid item size, have %u expect [%zu, %u)", 242 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START, 243 SZ_4K); 244 return -EUCLEAN; 245 } 246 if (unlikely(btrfs_file_extent_type(leaf, fi) >= 247 BTRFS_NR_FILE_EXTENT_TYPES)) { 248 file_extent_err(leaf, slot, 249 "invalid type for file extent, have %u expect range [0, %u]", 250 btrfs_file_extent_type(leaf, fi), 251 BTRFS_NR_FILE_EXTENT_TYPES - 1); 252 return -EUCLEAN; 253 } 254 255 /* 256 * Support for new compression/encryption must introduce incompat flag, 257 * and must be caught in open_ctree(). 258 */ 259 if (unlikely(btrfs_file_extent_compression(leaf, fi) >= 260 BTRFS_NR_COMPRESS_TYPES)) { 261 file_extent_err(leaf, slot, 262 "invalid compression for file extent, have %u expect range [0, %u]", 263 btrfs_file_extent_compression(leaf, fi), 264 BTRFS_NR_COMPRESS_TYPES - 1); 265 return -EUCLEAN; 266 } 267 if (unlikely(btrfs_file_extent_encryption(leaf, fi))) { 268 file_extent_err(leaf, slot, 269 "invalid encryption for file extent, have %u expect 0", 270 btrfs_file_extent_encryption(leaf, fi)); 271 return -EUCLEAN; 272 } 273 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { 274 /* Inline extent must have 0 as key offset */ 275 if (unlikely(key->offset)) { 276 file_extent_err(leaf, slot, 277 "invalid file_offset for inline file extent, have %llu expect 0", 278 key->offset); 279 return -EUCLEAN; 280 } 281 282 /* Compressed inline extent has no on-disk size, skip it */ 283 if (btrfs_file_extent_compression(leaf, fi) != 284 BTRFS_COMPRESS_NONE) 285 return 0; 286 287 /* Uncompressed inline extent size must match item size */ 288 if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + 289 btrfs_file_extent_ram_bytes(leaf, fi))) { 290 file_extent_err(leaf, slot, 291 "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", 292 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + 293 btrfs_file_extent_ram_bytes(leaf, fi)); 294 return -EUCLEAN; 295 } 296 return 0; 297 } 298 299 /* 300 * For the data reloc tree, file extent items are written by 301 * relocation's own paths. The data reloc inode is created with 302 * BTRFS_INODE_NOCOMPRESS, so insert_ordered_extent_file_extent() 303 * always leaves the compression field at 0. Encryption and 304 * other_encoding are reserved-and-zero in btrfs. A non-zero value 305 * for any of these means the leaf decoded from disk does not match 306 * what the kernel wrote, i.e. on-disk corruption. 307 * 308 * The file_extent_item's offset field is NOT a universal invariant 309 * here: partial-PREALLOC writebacks legitimately produce REG items 310 * with non-zero offset at non-boundary keys. The offset check is 311 * performed at the call site in get_new_location(), which only 312 * inspects cluster-boundary keys where offset is always 0. 313 */ 314 if (unlikely(btrfs_header_owner(leaf) == BTRFS_DATA_RELOC_TREE_OBJECTID && 315 (btrfs_file_extent_compression(leaf, fi) || 316 btrfs_file_extent_encryption(leaf, fi) || 317 btrfs_file_extent_other_encoding(leaf, fi)))) { 318 file_extent_err(leaf, slot, 319 "invalid encoding fields for data reloc tree, compression=%u encryption=%u other_encoding=%u", 320 btrfs_file_extent_compression(leaf, fi), 321 btrfs_file_extent_encryption(leaf, fi), 322 btrfs_file_extent_other_encoding(leaf, fi)); 323 return -EUCLEAN; 324 } 325 326 /* Regular or preallocated extent has fixed item size */ 327 if (unlikely(item_size != sizeof(*fi))) { 328 file_extent_err(leaf, slot, 329 "invalid item size for reg/prealloc file extent, have %u expect %zu", 330 item_size, sizeof(*fi)); 331 return -EUCLEAN; 332 } 333 if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) || 334 CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) || 335 CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) || 336 CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || 337 CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))) 338 return -EUCLEAN; 339 340 /* Catch extent end overflow */ 341 if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi), 342 key->offset, &extent_end))) { 343 file_extent_err(leaf, slot, 344 "extent end overflow, have file offset %llu extent num bytes %llu", 345 key->offset, 346 btrfs_file_extent_num_bytes(leaf, fi)); 347 return -EUCLEAN; 348 } 349 350 /* 351 * Check that no two consecutive file extent items, in the same leaf, 352 * present ranges that overlap each other. 353 */ 354 if (slot > 0 && 355 prev_key->objectid == key->objectid && 356 prev_key->type == BTRFS_EXTENT_DATA_KEY) { 357 struct btrfs_file_extent_item *prev_fi; 358 u64 prev_end; 359 360 prev_fi = btrfs_item_ptr(leaf, slot - 1, 361 struct btrfs_file_extent_item); 362 prev_end = file_extent_end(leaf, prev_key, prev_fi); 363 if (unlikely(prev_end > key->offset)) { 364 file_extent_err(leaf, slot - 1, 365 "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", 366 prev_end, key->offset); 367 return -EUCLEAN; 368 } 369 } 370 371 /* 372 * For non-compressed data extents, ram_bytes should match its 373 * disk_num_bytes. 374 * However we do not really utilize ram_bytes in this case, so this check 375 * is only optional for DEBUG builds for developers to catch the 376 * unexpected behaviors. 377 */ 378 if (IS_ENABLED(CONFIG_BTRFS_DEBUG) && 379 btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE && 380 btrfs_file_extent_disk_bytenr(leaf, fi)) { 381 if (WARN_ON(btrfs_file_extent_ram_bytes(leaf, fi) != 382 btrfs_file_extent_disk_num_bytes(leaf, fi))) 383 file_extent_err(leaf, slot, 384 "mismatch ram_bytes (%llu) and disk_num_bytes (%llu) for non-compressed extent", 385 btrfs_file_extent_ram_bytes(leaf, fi), 386 btrfs_file_extent_disk_num_bytes(leaf, fi)); 387 } 388 389 return 0; 390 } 391 392 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, 393 int slot, struct btrfs_key *prev_key) 394 { 395 struct btrfs_fs_info *fs_info = leaf->fs_info; 396 u32 sectorsize = fs_info->sectorsize; 397 const u32 csumsize = fs_info->csum_size; 398 399 if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) { 400 generic_err(leaf, slot, 401 "invalid key objectid for csum item, have %llu expect %llu", 402 key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); 403 return -EUCLEAN; 404 } 405 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 406 generic_err(leaf, slot, 407 "unaligned key offset for csum item, have %llu should be aligned to %u", 408 key->offset, sectorsize); 409 return -EUCLEAN; 410 } 411 if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) { 412 generic_err(leaf, slot, 413 "unaligned item size for csum item, have %u should be aligned to %u", 414 btrfs_item_size(leaf, slot), csumsize); 415 return -EUCLEAN; 416 } 417 if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) { 418 u64 prev_csum_end; 419 u32 prev_item_size; 420 421 prev_item_size = btrfs_item_size(leaf, slot - 1); 422 prev_csum_end = (prev_item_size / csumsize) * sectorsize; 423 prev_csum_end += prev_key->offset; 424 if (unlikely(prev_csum_end > key->offset)) { 425 generic_err(leaf, slot - 1, 426 "csum end range (%llu) goes beyond the start range (%llu) of the next csum item", 427 prev_csum_end, key->offset); 428 return -EUCLEAN; 429 } 430 } 431 return 0; 432 } 433 434 /* Inode item error output has the same format as dir_item_err() */ 435 #define inode_item_err(eb, slot, fmt, ...) \ 436 dir_item_err(eb, slot, fmt, __VA_ARGS__) 437 438 static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key, 439 int slot) 440 { 441 struct btrfs_key item_key; 442 bool is_inode_item; 443 444 btrfs_item_key_to_cpu(leaf, &item_key, slot); 445 is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY); 446 447 /* For XATTR_ITEM, location key should be all 0 */ 448 if (item_key.type == BTRFS_XATTR_ITEM_KEY) { 449 if (unlikely(key->objectid != 0 || key->type != 0 || 450 key->offset != 0)) 451 return -EUCLEAN; 452 return 0; 453 } 454 455 if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID || 456 key->objectid > BTRFS_LAST_FREE_OBJECTID) && 457 key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && 458 key->objectid != BTRFS_FREE_INO_OBJECTID)) { 459 if (is_inode_item) { 460 generic_err(leaf, slot, 461 "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", 462 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, 463 BTRFS_FIRST_FREE_OBJECTID, 464 BTRFS_LAST_FREE_OBJECTID, 465 BTRFS_FREE_INO_OBJECTID); 466 } else { 467 dir_item_err(leaf, slot, 468 "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu", 469 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, 470 BTRFS_FIRST_FREE_OBJECTID, 471 BTRFS_LAST_FREE_OBJECTID, 472 BTRFS_FREE_INO_OBJECTID); 473 } 474 return -EUCLEAN; 475 } 476 if (unlikely(key->offset != 0)) { 477 if (is_inode_item) 478 inode_item_err(leaf, slot, 479 "invalid key offset: has %llu expect 0", 480 key->offset); 481 else 482 dir_item_err(leaf, slot, 483 "invalid location key offset:has %llu expect 0", 484 key->offset); 485 return -EUCLEAN; 486 } 487 return 0; 488 } 489 490 static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, 491 int slot) 492 { 493 struct btrfs_key item_key; 494 bool is_root_item; 495 496 btrfs_item_key_to_cpu(leaf, &item_key, slot); 497 is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); 498 499 /* 500 * Bad rootid for reloc trees. 501 * 502 * Reloc trees are only for subvolume trees, other trees only need 503 * to be COWed to be relocated. 504 */ 505 if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && 506 !btrfs_is_fstree(key->offset))) { 507 generic_err(leaf, slot, 508 "invalid reloc tree for root %lld, root id is not a subvolume tree", 509 key->offset); 510 return -EUCLEAN; 511 } 512 513 /* No such tree id */ 514 if (unlikely(key->objectid == 0)) { 515 if (is_root_item) 516 generic_err(leaf, slot, "invalid root id 0"); 517 else 518 dir_item_err(leaf, slot, 519 "invalid location key root id 0"); 520 return -EUCLEAN; 521 } 522 523 /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */ 524 if (unlikely(!btrfs_is_fstree(key->objectid) && !is_root_item)) { 525 dir_item_err(leaf, slot, 526 "invalid location key objectid, have %llu expect [%llu, %llu]", 527 key->objectid, BTRFS_FIRST_FREE_OBJECTID, 528 BTRFS_LAST_FREE_OBJECTID); 529 return -EUCLEAN; 530 } 531 532 /* 533 * ROOT_ITEM with non-zero offset means this is a snapshot, created at 534 * @offset transid. 535 * Furthermore, for location key in DIR_ITEM, its offset is always -1. 536 * 537 * So here we only check offset for reloc tree whose key->offset must 538 * be a valid tree. 539 */ 540 if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID && 541 key->offset == 0)) { 542 generic_err(leaf, slot, "invalid root id 0 for reloc tree"); 543 return -EUCLEAN; 544 } 545 return 0; 546 } 547 548 static int check_dir_item(struct extent_buffer *leaf, 549 struct btrfs_key *key, struct btrfs_key *prev_key, 550 int slot) 551 { 552 struct btrfs_fs_info *fs_info = leaf->fs_info; 553 struct btrfs_dir_item *di; 554 u32 item_size = btrfs_item_size(leaf, slot); 555 u32 cur = 0; 556 557 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 558 return -EUCLEAN; 559 560 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 561 while (cur < item_size) { 562 struct btrfs_key location_key; 563 u32 name_len; 564 u32 data_len; 565 u32 max_name_len; 566 u32 total_size; 567 u32 name_hash; 568 u8 dir_type; 569 int ret; 570 571 /* header itself should not cross item boundary */ 572 if (unlikely(cur + sizeof(*di) > item_size)) { 573 dir_item_err(leaf, slot, 574 "dir item header crosses item boundary, have %zu boundary %u", 575 cur + sizeof(*di), item_size); 576 return -EUCLEAN; 577 } 578 579 /* Location key check */ 580 btrfs_dir_item_key_to_cpu(leaf, di, &location_key); 581 if (location_key.type == BTRFS_ROOT_ITEM_KEY) { 582 ret = check_root_key(leaf, &location_key, slot); 583 if (unlikely(ret < 0)) 584 return ret; 585 } else if (location_key.type == BTRFS_INODE_ITEM_KEY || 586 location_key.type == 0) { 587 ret = check_inode_key(leaf, &location_key, slot); 588 if (unlikely(ret < 0)) 589 return ret; 590 } else { 591 dir_item_err(leaf, slot, 592 "invalid location key type, have %u, expect %u or %u", 593 location_key.type, BTRFS_ROOT_ITEM_KEY, 594 BTRFS_INODE_ITEM_KEY); 595 return -EUCLEAN; 596 } 597 598 /* dir type check */ 599 dir_type = btrfs_dir_ftype(leaf, di); 600 if (unlikely(dir_type <= BTRFS_FT_UNKNOWN || 601 dir_type >= BTRFS_FT_MAX)) { 602 dir_item_err(leaf, slot, 603 "invalid dir item type, have %u expect (0, %u)", 604 dir_type, BTRFS_FT_MAX); 605 return -EUCLEAN; 606 } 607 608 if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY && 609 dir_type != BTRFS_FT_XATTR)) { 610 dir_item_err(leaf, slot, 611 "invalid dir item type for XATTR key, have %u expect %u", 612 dir_type, BTRFS_FT_XATTR); 613 return -EUCLEAN; 614 } 615 if (unlikely(dir_type == BTRFS_FT_XATTR && 616 key->type != BTRFS_XATTR_ITEM_KEY)) { 617 dir_item_err(leaf, slot, 618 "xattr dir type found for non-XATTR key"); 619 return -EUCLEAN; 620 } 621 if (dir_type == BTRFS_FT_XATTR) 622 max_name_len = XATTR_NAME_MAX; 623 else 624 max_name_len = BTRFS_NAME_LEN; 625 626 /* Name/data length check */ 627 name_len = btrfs_dir_name_len(leaf, di); 628 data_len = btrfs_dir_data_len(leaf, di); 629 if (unlikely(name_len > max_name_len)) { 630 dir_item_err(leaf, slot, 631 "dir item name len too long, have %u max %u", 632 name_len, max_name_len); 633 return -EUCLEAN; 634 } 635 if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) { 636 dir_item_err(leaf, slot, 637 "dir item name and data len too long, have %u max %u", 638 name_len + data_len, 639 BTRFS_MAX_XATTR_SIZE(fs_info)); 640 return -EUCLEAN; 641 } 642 643 if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) { 644 dir_item_err(leaf, slot, 645 "dir item with invalid data len, have %u expect 0", 646 data_len); 647 return -EUCLEAN; 648 } 649 650 total_size = sizeof(*di) + name_len + data_len; 651 652 /* header and name/data should not cross item boundary */ 653 if (unlikely(cur + total_size > item_size)) { 654 dir_item_err(leaf, slot, 655 "dir item data crosses item boundary, have %u boundary %u", 656 cur + total_size, item_size); 657 return -EUCLEAN; 658 } 659 660 /* 661 * Special check for XATTR/DIR_ITEM, as key->offset is name 662 * hash, should match its name 663 */ 664 if (key->type == BTRFS_DIR_ITEM_KEY || 665 key->type == BTRFS_XATTR_ITEM_KEY) { 666 char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; 667 668 read_extent_buffer(leaf, namebuf, 669 (unsigned long)(di + 1), name_len); 670 name_hash = btrfs_name_hash(namebuf, name_len); 671 if (unlikely(key->offset != name_hash)) { 672 dir_item_err(leaf, slot, 673 "name hash mismatch with key, have 0x%016x expect 0x%016llx", 674 name_hash, key->offset); 675 return -EUCLEAN; 676 } 677 } 678 cur += total_size; 679 di = (struct btrfs_dir_item *)((void *)di + total_size); 680 } 681 return 0; 682 } 683 684 __printf(3, 4) 685 __cold 686 static void block_group_err(const struct extent_buffer *eb, int slot, 687 const char *fmt, ...) 688 { 689 const struct btrfs_fs_info *fs_info = eb->fs_info; 690 struct btrfs_key key; 691 struct va_format vaf; 692 va_list args; 693 694 btrfs_item_key_to_cpu(eb, &key, slot); 695 va_start(args, fmt); 696 697 vaf.fmt = fmt; 698 vaf.va = &args; 699 700 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 701 btrfs_crit(fs_info, 702 "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", 703 btrfs_header_level(eb) == 0 ? "leaf" : "node", 704 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 705 key.objectid, key.offset, &vaf); 706 va_end(args); 707 } 708 709 static int check_block_group_item(struct extent_buffer *leaf, 710 struct btrfs_key *key, int slot) 711 { 712 struct btrfs_fs_info *fs_info = leaf->fs_info; 713 struct btrfs_block_group_item bgi; 714 u32 item_size = btrfs_item_size(leaf, slot); 715 u64 chunk_objectid; 716 u64 flags; 717 u64 type; 718 size_t exp_size; 719 720 /* 721 * Here we don't really care about alignment since extent allocator can 722 * handle it. We care more about the size. 723 */ 724 if (unlikely(key->offset == 0)) { 725 block_group_err(leaf, slot, 726 "invalid block group size 0"); 727 return -EUCLEAN; 728 } 729 730 if (btrfs_fs_incompat(fs_info, REMAP_TREE)) 731 exp_size = sizeof(struct btrfs_block_group_item_v2); 732 else 733 exp_size = sizeof(struct btrfs_block_group_item); 734 735 if (unlikely(item_size != exp_size)) { 736 block_group_err(leaf, slot, 737 "invalid item size, have %u expect %zu", 738 item_size, exp_size); 739 return -EUCLEAN; 740 } 741 742 read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), 743 sizeof(bgi)); 744 chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi); 745 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { 746 /* 747 * We don't init the nr_global_roots until we load the global 748 * roots, so this could be 0 at mount time. If it's 0 we'll 749 * just assume we're fine, and later we'll check against our 750 * actual value. 751 */ 752 if (unlikely(fs_info->nr_global_roots && 753 chunk_objectid >= fs_info->nr_global_roots)) { 754 block_group_err(leaf, slot, 755 "invalid block group global root id, have %llu, needs to be <= %llu", 756 chunk_objectid, 757 fs_info->nr_global_roots); 758 return -EUCLEAN; 759 } 760 } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { 761 block_group_err(leaf, slot, 762 "invalid block group chunk objectid, have %llu expect %llu", 763 btrfs_stack_block_group_chunk_objectid(&bgi), 764 BTRFS_FIRST_CHUNK_TREE_OBJECTID); 765 return -EUCLEAN; 766 } 767 768 if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) { 769 block_group_err(leaf, slot, 770 "invalid block group used, have %llu expect [0, %llu)", 771 btrfs_stack_block_group_used(&bgi), key->offset); 772 return -EUCLEAN; 773 } 774 775 flags = btrfs_stack_block_group_flags(&bgi); 776 if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) { 777 block_group_err(leaf, slot, 778 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", 779 flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, 780 hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); 781 return -EUCLEAN; 782 } 783 784 if (unlikely(flags & BTRFS_BLOCK_GROUP_METADATA_REMAP && 785 !btrfs_fs_incompat(fs_info, REMAP_TREE))) { 786 block_group_err(leaf, slot, 787 "invalid flags, have 0x%llx (METADATA_REMAP flag set) but no remap-tree incompat flag", 788 flags); 789 return -EUCLEAN; 790 } 791 792 type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 793 if (unlikely(type != BTRFS_BLOCK_GROUP_DATA && 794 type != BTRFS_BLOCK_GROUP_METADATA && 795 type != BTRFS_BLOCK_GROUP_SYSTEM && 796 type != BTRFS_BLOCK_GROUP_METADATA_REMAP && 797 type != (BTRFS_BLOCK_GROUP_METADATA | 798 BTRFS_BLOCK_GROUP_DATA))) { 799 block_group_err(leaf, slot, 800 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx, 0x%llx or 0x%llx", 801 type, hweight64(type), 802 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, 803 BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA_REMAP, 804 BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); 805 return -EUCLEAN; 806 } 807 808 if (unlikely(!btrfs_fs_incompat(fs_info, REMAP_TREE) && 809 type == BTRFS_BLOCK_GROUP_METADATA_REMAP)) { 810 block_group_err(leaf, slot, 811 "invalid type, METADATA_REMAP set but REMAP_TREE incompat flag not set"); 812 return -EUCLEAN; 813 } 814 815 if (unlikely(!btrfs_fs_incompat(fs_info, REMAP_TREE) && 816 flags & BTRFS_BLOCK_GROUP_REMAPPED)) { 817 block_group_err(leaf, slot, 818 "invalid flags, REMAPPED set but REMAP_TREE incompat flag not set"); 819 return -EUCLEAN; 820 } 821 822 if (item_size == sizeof(struct btrfs_block_group_item_v2)) { 823 struct btrfs_block_group_item_v2 *bgi2; 824 u64 remap_bytes; 825 u32 identity_remap_count; 826 827 bgi2 = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item_v2); 828 remap_bytes = btrfs_block_group_v2_remap_bytes(leaf, bgi2); 829 830 if (unlikely(remap_bytes > key->offset)) { 831 block_group_err(leaf, slot, 832 "invalid remap_bytes, have %llu expect [0, %llu]", 833 remap_bytes, key->offset); 834 return -EUCLEAN; 835 } 836 837 identity_remap_count = btrfs_block_group_v2_identity_remap_count(leaf, bgi2); 838 if (unlikely((u64)identity_remap_count > 839 key->offset >> fs_info->sectorsize_bits)) { 840 block_group_err(leaf, slot, 841 "invalid identity_remap_count, have %u expect [0, %llu]", 842 identity_remap_count, 843 key->offset >> fs_info->sectorsize_bits); 844 return -EUCLEAN; 845 } 846 } 847 848 return 0; 849 } 850 851 __printf(5, 6) 852 __cold 853 static void chunk_err(const struct btrfs_fs_info *fs_info, 854 const struct extent_buffer *leaf, 855 const struct btrfs_chunk *chunk, u64 logical, 856 const char *fmt, ...) 857 { 858 bool is_sb = !leaf; 859 struct va_format vaf; 860 va_list args; 861 int i; 862 int slot = -1; 863 864 if (!is_sb) { 865 /* 866 * Get the slot number by iterating through all slots, this 867 * would provide better readability. 868 */ 869 for (i = 0; i < btrfs_header_nritems(leaf); i++) { 870 if (btrfs_item_ptr_offset(leaf, i) == 871 (unsigned long)chunk) { 872 slot = i; 873 break; 874 } 875 } 876 } 877 va_start(args, fmt); 878 vaf.fmt = fmt; 879 vaf.va = &args; 880 881 if (is_sb) 882 btrfs_crit(fs_info, 883 "corrupt superblock syschunk array: chunk_start=%llu, %pV", 884 logical, &vaf); 885 else 886 btrfs_crit(fs_info, 887 "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", 888 BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, 889 logical, &vaf); 890 va_end(args); 891 } 892 893 static bool valid_stripe_count(u64 profile, u16 num_stripes, u16 sub_stripes) 894 { 895 switch (profile) { 896 case BTRFS_BLOCK_GROUP_RAID0: 897 return true; 898 case BTRFS_BLOCK_GROUP_RAID10: 899 return sub_stripes == btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes; 900 case BTRFS_BLOCK_GROUP_RAID1: 901 return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1].devs_min; 902 case BTRFS_BLOCK_GROUP_RAID1C3: 903 return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min; 904 case BTRFS_BLOCK_GROUP_RAID1C4: 905 return num_stripes == btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min; 906 case BTRFS_BLOCK_GROUP_RAID5: 907 return num_stripes >= btrfs_raid_array[BTRFS_RAID_RAID5].devs_min; 908 case BTRFS_BLOCK_GROUP_RAID6: 909 return num_stripes >= btrfs_raid_array[BTRFS_RAID_RAID6].devs_min; 910 case BTRFS_BLOCK_GROUP_DUP: 911 return num_stripes == btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes; 912 case 0: /* SINGLE */ 913 return num_stripes == btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes; 914 default: 915 BUG(); 916 } 917 } 918 919 /* 920 * The common chunk check which could also work on super block sys chunk array. 921 * 922 * If @leaf is NULL, then @chunk must be an on-stack chunk item. 923 * (For superblock sys_chunk array, and fs_info->sectorsize is unreliable) 924 * 925 * Return -EUCLEAN if anything is corrupted. 926 * Return 0 if everything is OK. 927 */ 928 int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info, 929 const struct extent_buffer *leaf, 930 const struct btrfs_chunk *chunk, u64 logical, 931 u32 sectorsize) 932 { 933 u64 length; 934 u64 chunk_end; 935 u64 stripe_len; 936 u16 num_stripes; 937 u16 sub_stripes; 938 u64 type; 939 u64 features; 940 u32 chunk_sector_size; 941 bool mixed = false; 942 bool remapped; 943 int raid_index; 944 int nparity; 945 int ncopies; 946 947 if (leaf) { 948 length = btrfs_chunk_length(leaf, chunk); 949 stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 950 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 951 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 952 type = btrfs_chunk_type(leaf, chunk); 953 chunk_sector_size = btrfs_chunk_sector_size(leaf, chunk); 954 } else { 955 length = btrfs_stack_chunk_length(chunk); 956 stripe_len = btrfs_stack_chunk_stripe_len(chunk); 957 num_stripes = btrfs_stack_chunk_num_stripes(chunk); 958 sub_stripes = btrfs_stack_chunk_sub_stripes(chunk); 959 type = btrfs_stack_chunk_type(chunk); 960 chunk_sector_size = btrfs_stack_chunk_sector_size(chunk); 961 } 962 raid_index = btrfs_bg_flags_to_raid_index(type); 963 ncopies = btrfs_raid_array[raid_index].ncopies; 964 nparity = btrfs_raid_array[raid_index].nparity; 965 remapped = (type & BTRFS_BLOCK_GROUP_REMAPPED); 966 967 if (unlikely(!remapped && !num_stripes)) { 968 chunk_err(fs_info, leaf, chunk, logical, 969 "invalid chunk num_stripes, have %u", num_stripes); 970 return -EUCLEAN; 971 } 972 if (unlikely(num_stripes != 0 && num_stripes < ncopies)) { 973 chunk_err(fs_info, leaf, chunk, logical, 974 "invalid chunk num_stripes < ncopies, have %u < %d", 975 num_stripes, ncopies); 976 return -EUCLEAN; 977 } 978 if (unlikely(nparity && num_stripes == nparity)) { 979 chunk_err(fs_info, leaf, chunk, logical, 980 "invalid chunk num_stripes == nparity, have %u == %d", 981 num_stripes, nparity); 982 return -EUCLEAN; 983 } 984 if (unlikely(!IS_ALIGNED(logical, sectorsize))) { 985 chunk_err(fs_info, leaf, chunk, logical, 986 "invalid chunk logical, have %llu should aligned to %u", 987 logical, sectorsize); 988 return -EUCLEAN; 989 } 990 if (unlikely(chunk_sector_size != sectorsize)) { 991 chunk_err(fs_info, leaf, chunk, logical, 992 "invalid chunk sectorsize, have %u expect %u", 993 chunk_sector_size, sectorsize); 994 return -EUCLEAN; 995 } 996 if (unlikely(!length || !IS_ALIGNED(length, sectorsize))) { 997 chunk_err(fs_info, leaf, chunk, logical, 998 "invalid chunk length, have %llu", length); 999 return -EUCLEAN; 1000 } 1001 if (unlikely(check_add_overflow(logical, length, &chunk_end))) { 1002 chunk_err(fs_info, leaf, chunk, logical, 1003 "invalid chunk logical start and length, have logical start %llu length %llu", 1004 logical, length); 1005 return -EUCLEAN; 1006 } 1007 if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { 1008 chunk_err(fs_info, leaf, chunk, logical, 1009 "invalid chunk stripe length: %llu", 1010 stripe_len); 1011 return -EUCLEAN; 1012 } 1013 /* 1014 * We artificially limit the chunk size, so that the number of stripes 1015 * inside a chunk can be fit into a U32. The current limit (256G) is 1016 * way too large for real world usage anyway, and it's also much larger 1017 * than our existing limit (10G). 1018 * 1019 * Thus it should be a good way to catch obvious bitflips. 1020 */ 1021 if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { 1022 chunk_err(fs_info, leaf, chunk, logical, 1023 "chunk length too large: have %llu limit %llu", 1024 length, btrfs_stripe_nr_to_offset(U32_MAX)); 1025 return -EUCLEAN; 1026 } 1027 if (unlikely(type & ~BTRFS_BLOCK_GROUP_VALID)) { 1028 chunk_err(fs_info, leaf, chunk, logical, 1029 "unrecognized chunk type: 0x%llx", 1030 type & ~BTRFS_BLOCK_GROUP_VALID); 1031 return -EUCLEAN; 1032 } 1033 1034 if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && 1035 (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) { 1036 chunk_err(fs_info, leaf, chunk, logical, 1037 "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", 1038 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); 1039 return -EUCLEAN; 1040 } 1041 if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) { 1042 chunk_err(fs_info, leaf, chunk, logical, 1043 "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", 1044 type, BTRFS_BLOCK_GROUP_TYPE_MASK); 1045 return -EUCLEAN; 1046 } 1047 1048 if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) && 1049 (type & (BTRFS_BLOCK_GROUP_METADATA | 1050 BTRFS_BLOCK_GROUP_DATA)))) { 1051 chunk_err(fs_info, leaf, chunk, logical, 1052 "system chunk with data or metadata type: 0x%llx", 1053 type); 1054 return -EUCLEAN; 1055 } 1056 1057 features = btrfs_super_incompat_flags(fs_info->super_copy); 1058 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 1059 mixed = true; 1060 1061 if (!mixed) { 1062 if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) && 1063 (type & BTRFS_BLOCK_GROUP_DATA))) { 1064 chunk_err(fs_info, leaf, chunk, logical, 1065 "mixed chunk type in non-mixed mode: 0x%llx", type); 1066 return -EUCLEAN; 1067 } 1068 } 1069 1070 if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA_REMAP) && 1071 !(features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE))) { 1072 chunk_err(fs_info, leaf, chunk, logical, 1073 "METADATA_REMAP chunk type without REMAP_TREE incompat bit"); 1074 return -EUCLEAN; 1075 } 1076 1077 if (unlikely(remapped && 1078 !(features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE))) { 1079 chunk_err(fs_info, leaf, chunk, logical, 1080 "REMAPPED chunk flag without REMAP_TREE incompat bit"); 1081 return -EUCLEAN; 1082 } 1083 1084 if (!remapped && 1085 !valid_stripe_count(type & BTRFS_BLOCK_GROUP_PROFILE_MASK, 1086 num_stripes, sub_stripes)) { 1087 chunk_err(fs_info, leaf, chunk, logical, 1088 "invalid num_stripes:sub_stripes %u:%u for profile %llu", 1089 num_stripes, sub_stripes, 1090 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); 1091 return -EUCLEAN; 1092 } 1093 1094 return 0; 1095 } 1096 1097 /* 1098 * Enhanced version of chunk item checker. 1099 * 1100 * The common btrfs_check_chunk_valid() doesn't check item size since it needs 1101 * to work on super block sys_chunk_array which doesn't have full item ptr. 1102 */ 1103 static int check_leaf_chunk_item(struct extent_buffer *leaf, 1104 struct btrfs_chunk *chunk, 1105 struct btrfs_key *key, int slot) 1106 { 1107 struct btrfs_fs_info *fs_info = leaf->fs_info; 1108 int num_stripes; 1109 1110 if (unlikely(btrfs_item_size(leaf, slot) < offsetof(struct btrfs_chunk, stripe))) { 1111 chunk_err(fs_info, leaf, chunk, key->offset, 1112 "invalid chunk item size: have %u expect [%zu, %u)", 1113 btrfs_item_size(leaf, slot), 1114 offsetof(struct btrfs_chunk, stripe), 1115 BTRFS_LEAF_DATA_SIZE(fs_info)); 1116 return -EUCLEAN; 1117 } 1118 1119 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 1120 /* Let btrfs_check_chunk_valid() handle this error type */ 1121 if (num_stripes == 0) 1122 goto out; 1123 1124 if (unlikely(btrfs_chunk_item_size(num_stripes) != 1125 btrfs_item_size(leaf, slot))) { 1126 chunk_err(fs_info, leaf, chunk, key->offset, 1127 "invalid chunk item size: have %u expect %lu", 1128 btrfs_item_size(leaf, slot), 1129 btrfs_chunk_item_size(num_stripes)); 1130 return -EUCLEAN; 1131 } 1132 out: 1133 return btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset, 1134 fs_info->sectorsize); 1135 } 1136 1137 __printf(3, 4) 1138 __cold 1139 static void dev_item_err(const struct extent_buffer *eb, int slot, 1140 const char *fmt, ...) 1141 { 1142 struct btrfs_key key; 1143 struct va_format vaf; 1144 va_list args; 1145 1146 btrfs_item_key_to_cpu(eb, &key, slot); 1147 va_start(args, fmt); 1148 1149 vaf.fmt = fmt; 1150 vaf.va = &args; 1151 1152 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 1153 btrfs_crit(eb->fs_info, 1154 "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", 1155 btrfs_header_level(eb) == 0 ? "leaf" : "node", 1156 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 1157 key.objectid, &vaf); 1158 va_end(args); 1159 } 1160 1161 static int check_dev_item(struct extent_buffer *leaf, 1162 struct btrfs_key *key, int slot) 1163 { 1164 struct btrfs_dev_item *ditem; 1165 const u32 item_size = btrfs_item_size(leaf, slot); 1166 1167 if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) { 1168 dev_item_err(leaf, slot, 1169 "invalid objectid: has=%llu expect=%llu", 1170 key->objectid, BTRFS_DEV_ITEMS_OBJECTID); 1171 return -EUCLEAN; 1172 } 1173 1174 if (unlikely(item_size != sizeof(*ditem))) { 1175 dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", 1176 item_size, sizeof(*ditem)); 1177 return -EUCLEAN; 1178 } 1179 1180 ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); 1181 if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) { 1182 dev_item_err(leaf, slot, 1183 "devid mismatch: key has=%llu item has=%llu", 1184 key->offset, btrfs_device_id(leaf, ditem)); 1185 return -EUCLEAN; 1186 } 1187 1188 /* 1189 * For device total_bytes, we don't have reliable way to check it, as 1190 * it can be 0 for device removal. Device size check can only be done 1191 * by dev extents check. 1192 */ 1193 if (unlikely(btrfs_device_bytes_used(leaf, ditem) > 1194 btrfs_device_total_bytes(leaf, ditem))) { 1195 dev_item_err(leaf, slot, 1196 "invalid bytes used: have %llu expect [0, %llu]", 1197 btrfs_device_bytes_used(leaf, ditem), 1198 btrfs_device_total_bytes(leaf, ditem)); 1199 return -EUCLEAN; 1200 } 1201 /* 1202 * Remaining members like io_align/type/gen/dev_group aren't really 1203 * utilized. Skip them to make later usage of them easier. 1204 */ 1205 return 0; 1206 } 1207 1208 static int check_inode_item(struct extent_buffer *leaf, 1209 struct btrfs_key *key, int slot) 1210 { 1211 struct btrfs_fs_info *fs_info = leaf->fs_info; 1212 struct btrfs_inode_item *iitem; 1213 u64 super_gen = btrfs_super_generation(fs_info->super_copy); 1214 u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); 1215 const u32 item_size = btrfs_item_size(leaf, slot); 1216 u32 mode; 1217 int ret; 1218 u32 flags; 1219 u32 ro_flags; 1220 1221 ret = check_inode_key(leaf, key, slot); 1222 if (unlikely(ret < 0)) 1223 return ret; 1224 1225 if (unlikely(item_size != sizeof(*iitem))) { 1226 generic_err(leaf, slot, "invalid item size: has %u expect %zu", 1227 item_size, sizeof(*iitem)); 1228 return -EUCLEAN; 1229 } 1230 1231 iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); 1232 1233 /* Here we use super block generation + 1 to handle log tree */ 1234 if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) { 1235 inode_item_err(leaf, slot, 1236 "invalid inode generation: has %llu expect (0, %llu]", 1237 btrfs_inode_generation(leaf, iitem), 1238 super_gen + 1); 1239 return -EUCLEAN; 1240 } 1241 /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ 1242 if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) { 1243 inode_item_err(leaf, slot, 1244 "invalid inode transid: has %llu expect [0, %llu]", 1245 btrfs_inode_transid(leaf, iitem), super_gen + 1); 1246 return -EUCLEAN; 1247 } 1248 1249 /* 1250 * For size and nbytes it's better not to be too strict, as for dir 1251 * item its size/nbytes can easily get wrong, but doesn't affect 1252 * anything in the fs. So here we skip the check. 1253 */ 1254 mode = btrfs_inode_mode(leaf, iitem); 1255 if (unlikely(mode & ~valid_mask)) { 1256 inode_item_err(leaf, slot, 1257 "unknown mode bit detected: 0x%x", 1258 mode & ~valid_mask); 1259 return -EUCLEAN; 1260 } 1261 1262 /* 1263 * S_IFMT is not bit mapped so we can't completely rely on 1264 * is_power_of_2/has_single_bit_set, but it can save us from checking 1265 * FIFO/CHR/DIR/REG. Only needs to check BLK, LNK and SOCKS 1266 */ 1267 if (!has_single_bit_set(mode & S_IFMT)) { 1268 if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) { 1269 inode_item_err(leaf, slot, 1270 "invalid mode: has 0%o expect valid S_IF* bit(s)", 1271 mode & S_IFMT); 1272 return -EUCLEAN; 1273 } 1274 } 1275 if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) { 1276 inode_item_err(leaf, slot, 1277 "invalid nlink: has %u expect no more than 1 for dir", 1278 btrfs_inode_nlink(leaf, iitem)); 1279 return -EUCLEAN; 1280 } 1281 btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags); 1282 if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) { 1283 inode_item_err(leaf, slot, 1284 "unknown incompat flags detected: 0x%x", flags); 1285 return -EUCLEAN; 1286 } 1287 if (unlikely(!sb_rdonly(fs_info->sb) && 1288 (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) { 1289 inode_item_err(leaf, slot, 1290 "unknown ro-compat flags detected on writeable mount: 0x%x", 1291 ro_flags); 1292 return -EUCLEAN; 1293 } 1294 return 0; 1295 } 1296 1297 static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, 1298 int slot) 1299 { 1300 struct btrfs_fs_info *fs_info = leaf->fs_info; 1301 struct btrfs_root_item ri = { 0 }; 1302 const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY | 1303 BTRFS_ROOT_SUBVOL_DEAD; 1304 int ret; 1305 1306 ret = check_root_key(leaf, key, slot); 1307 if (unlikely(ret < 0)) 1308 return ret; 1309 1310 if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) && 1311 btrfs_item_size(leaf, slot) != 1312 btrfs_legacy_root_item_size())) { 1313 generic_err(leaf, slot, 1314 "invalid root item size, have %u expect %zu or %u", 1315 btrfs_item_size(leaf, slot), sizeof(ri), 1316 btrfs_legacy_root_item_size()); 1317 return -EUCLEAN; 1318 } 1319 1320 /* 1321 * For legacy root item, the members starting at generation_v2 will be 1322 * all filled with 0. 1323 * And since we allow generation_v2 as 0, it will still pass the check. 1324 */ 1325 read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot), 1326 btrfs_item_size(leaf, slot)); 1327 1328 /* Generation related */ 1329 if (unlikely(btrfs_root_generation(&ri) > 1330 btrfs_super_generation(fs_info->super_copy) + 1)) { 1331 generic_err(leaf, slot, 1332 "invalid root generation, have %llu expect (0, %llu]", 1333 btrfs_root_generation(&ri), 1334 btrfs_super_generation(fs_info->super_copy) + 1); 1335 return -EUCLEAN; 1336 } 1337 if (unlikely(btrfs_root_generation_v2(&ri) > 1338 btrfs_super_generation(fs_info->super_copy) + 1)) { 1339 generic_err(leaf, slot, 1340 "invalid root v2 generation, have %llu expect (0, %llu]", 1341 btrfs_root_generation_v2(&ri), 1342 btrfs_super_generation(fs_info->super_copy) + 1); 1343 return -EUCLEAN; 1344 } 1345 if (unlikely(btrfs_root_last_snapshot(&ri) > 1346 btrfs_super_generation(fs_info->super_copy) + 1)) { 1347 generic_err(leaf, slot, 1348 "invalid root last_snapshot, have %llu expect (0, %llu]", 1349 btrfs_root_last_snapshot(&ri), 1350 btrfs_super_generation(fs_info->super_copy) + 1); 1351 return -EUCLEAN; 1352 } 1353 1354 /* Alignment and level check */ 1355 if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) { 1356 generic_err(leaf, slot, 1357 "invalid root bytenr, have %llu expect to be aligned to %u", 1358 btrfs_root_bytenr(&ri), fs_info->sectorsize); 1359 return -EUCLEAN; 1360 } 1361 if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) { 1362 generic_err(leaf, slot, 1363 "invalid root level, have %u expect [0, %u]", 1364 btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1); 1365 return -EUCLEAN; 1366 } 1367 if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) { 1368 generic_err(leaf, slot, 1369 "invalid root drop_level, have %u expect [0, %u]", 1370 btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); 1371 return -EUCLEAN; 1372 } 1373 /* 1374 * If drop_progress.objectid is non-zero, a btrfs_drop_snapshot() was 1375 * interrupted and the resume point was recorded in drop_progress and 1376 * drop_level. In that case drop_level must be >= 1: level 0 is the 1377 * leaf level and drop_snapshot never saves a checkpoint there (it 1378 * only records checkpoints at internal node levels in DROP_REFERENCE 1379 * stage). A zero drop_level combined with a non-zero drop_progress 1380 * objectid indicates on-disk corruption and would cause a BUG_ON in 1381 * merge_reloc_root() and btrfs_drop_snapshot() at mount time. 1382 */ 1383 if (unlikely(btrfs_disk_key_objectid(&ri.drop_progress) != 0 && 1384 btrfs_root_drop_level(&ri) == 0)) { 1385 generic_err(leaf, slot, 1386 "invalid root drop_level 0 with non-zero drop_progress objectid %llu", 1387 btrfs_disk_key_objectid(&ri.drop_progress)); 1388 return -EUCLEAN; 1389 } 1390 1391 /* Flags check */ 1392 if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) { 1393 generic_err(leaf, slot, 1394 "invalid root flags, have 0x%llx expect mask 0x%llx", 1395 btrfs_root_flags(&ri), valid_root_flags); 1396 return -EUCLEAN; 1397 } 1398 return 0; 1399 } 1400 1401 static int check_root_ref(struct extent_buffer *leaf, struct btrfs_key *key, int slot) 1402 { 1403 struct btrfs_root_ref *rref; 1404 u32 item_size = btrfs_item_size(leaf, slot); 1405 u32 name_len; 1406 1407 if (unlikely(item_size <= sizeof(*rref))) { 1408 generic_err(leaf, slot, 1409 "invalid root ref item size for key type %u, have %u expect > %zu", 1410 key->type, item_size, sizeof(*rref)); 1411 return -EUCLEAN; 1412 } 1413 1414 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 1415 name_len = btrfs_root_ref_name_len(leaf, rref); 1416 if (unlikely(name_len > BTRFS_NAME_LEN)) { 1417 generic_err(leaf, slot, 1418 "root ref name too long for key type %u, have %u max %u", 1419 key->type, name_len, BTRFS_NAME_LEN); 1420 return -EUCLEAN; 1421 } 1422 if (unlikely(item_size != sizeof(*rref) + name_len)) { 1423 generic_err(leaf, slot, 1424 "invalid root ref item size for key type %u, have %u expect %zu", 1425 key->type, item_size, sizeof(*rref) + name_len); 1426 return -EUCLEAN; 1427 } 1428 1429 return 0; 1430 } 1431 1432 __printf(3,4) 1433 __cold 1434 static void extent_err(const struct extent_buffer *eb, int slot, 1435 const char *fmt, ...) 1436 { 1437 struct btrfs_key key; 1438 struct va_format vaf; 1439 va_list args; 1440 u64 bytenr; 1441 u64 len; 1442 1443 btrfs_item_key_to_cpu(eb, &key, slot); 1444 bytenr = key.objectid; 1445 if (key.type == BTRFS_METADATA_ITEM_KEY || 1446 key.type == BTRFS_TREE_BLOCK_REF_KEY || 1447 key.type == BTRFS_SHARED_BLOCK_REF_KEY) 1448 len = eb->fs_info->nodesize; 1449 else 1450 len = key.offset; 1451 va_start(args, fmt); 1452 1453 vaf.fmt = fmt; 1454 vaf.va = &args; 1455 1456 dump_page(folio_page(eb->folios[0], 0), "eb page dump"); 1457 btrfs_crit(eb->fs_info, 1458 "corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV", 1459 btrfs_header_level(eb) == 0 ? "leaf" : "node", 1460 eb->start, slot, bytenr, len, &vaf); 1461 va_end(args); 1462 } 1463 1464 static bool is_valid_dref_root(u64 rootid) 1465 { 1466 /* 1467 * The following tree root objectids are allowed to have a data backref: 1468 * - subvolume trees 1469 * - data reloc tree 1470 * - tree root 1471 * For v1 space cache 1472 */ 1473 return btrfs_is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || 1474 rootid == BTRFS_ROOT_TREE_OBJECTID; 1475 } 1476 1477 static int check_extent_item(struct extent_buffer *leaf, 1478 struct btrfs_key *key, int slot, 1479 struct btrfs_key *prev_key) 1480 { 1481 struct btrfs_fs_info *fs_info = leaf->fs_info; 1482 struct btrfs_extent_item *ei; 1483 bool is_tree_block = false; 1484 unsigned long ptr; /* Current pointer inside inline refs */ 1485 unsigned long end; /* Extent item end */ 1486 const u32 item_size = btrfs_item_size(leaf, slot); 1487 u8 last_type = 0; 1488 u64 last_seq = U64_MAX; 1489 u64 flags; 1490 u64 generation; 1491 u64 total_refs; /* Total refs in btrfs_extent_item */ 1492 u64 inline_refs = 0; /* found total inline refs */ 1493 1494 if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && 1495 !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) { 1496 generic_err(leaf, slot, 1497 "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled"); 1498 return -EUCLEAN; 1499 } 1500 /* key->objectid is the bytenr for both key types */ 1501 if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) { 1502 generic_err(leaf, slot, 1503 "invalid key objectid, have %llu expect to be aligned to %u", 1504 key->objectid, fs_info->sectorsize); 1505 return -EUCLEAN; 1506 } 1507 1508 /* key->offset is tree level for METADATA_ITEM_KEY */ 1509 if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && 1510 key->offset >= BTRFS_MAX_LEVEL)) { 1511 extent_err(leaf, slot, 1512 "invalid tree level, have %llu expect [0, %u]", 1513 key->offset, BTRFS_MAX_LEVEL - 1); 1514 return -EUCLEAN; 1515 } 1516 1517 /* 1518 * EXTENT/METADATA_ITEM consists of: 1519 * 1) One btrfs_extent_item 1520 * Records the total refs, type and generation of the extent. 1521 * 1522 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only) 1523 * Records the first key and level of the tree block. 1524 * 1525 * 2) Zero or more btrfs_extent_inline_ref(s) 1526 * Each inline ref has one btrfs_extent_inline_ref shows: 1527 * 2.1) The ref type, one of the 4 1528 * TREE_BLOCK_REF Tree block only 1529 * SHARED_BLOCK_REF Tree block only 1530 * EXTENT_DATA_REF Data only 1531 * SHARED_DATA_REF Data only 1532 * 2.2) Ref type specific data 1533 * Either using btrfs_extent_inline_ref::offset, or specific 1534 * data structure. 1535 * 1536 * All above inline items should follow the order: 1537 * 1538 * - All btrfs_extent_inline_ref::type should be in an ascending 1539 * order 1540 * 1541 * - Within the same type, the items should follow a descending 1542 * order by their sequence number. The sequence number is 1543 * determined by: 1544 * * btrfs_extent_inline_ref::offset for all types other than 1545 * EXTENT_DATA_REF 1546 * * hash_extent_data_ref() for EXTENT_DATA_REF 1547 */ 1548 if (unlikely(item_size < sizeof(*ei))) { 1549 extent_err(leaf, slot, 1550 "invalid item size, have %u expect [%zu, %u)", 1551 item_size, sizeof(*ei), 1552 BTRFS_LEAF_DATA_SIZE(fs_info)); 1553 return -EUCLEAN; 1554 } 1555 end = item_size + btrfs_item_ptr_offset(leaf, slot); 1556 1557 /* Checks against extent_item */ 1558 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 1559 flags = btrfs_extent_flags(leaf, ei); 1560 total_refs = btrfs_extent_refs(leaf, ei); 1561 generation = btrfs_extent_generation(leaf, ei); 1562 if (unlikely(generation > 1563 btrfs_super_generation(fs_info->super_copy) + 1)) { 1564 extent_err(leaf, slot, 1565 "invalid generation, have %llu expect (0, %llu]", 1566 generation, 1567 btrfs_super_generation(fs_info->super_copy) + 1); 1568 return -EUCLEAN; 1569 } 1570 if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA | 1571 BTRFS_EXTENT_FLAG_TREE_BLOCK)))) { 1572 extent_err(leaf, slot, 1573 "invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx", 1574 flags, BTRFS_EXTENT_FLAG_DATA | 1575 BTRFS_EXTENT_FLAG_TREE_BLOCK); 1576 return -EUCLEAN; 1577 } 1578 is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK); 1579 if (is_tree_block) { 1580 if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY && 1581 key->offset != fs_info->nodesize)) { 1582 extent_err(leaf, slot, 1583 "invalid extent length, have %llu expect %u", 1584 key->offset, fs_info->nodesize); 1585 return -EUCLEAN; 1586 } 1587 } else { 1588 if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) { 1589 extent_err(leaf, slot, 1590 "invalid key type, have %u expect %u for data backref", 1591 key->type, BTRFS_EXTENT_ITEM_KEY); 1592 return -EUCLEAN; 1593 } 1594 if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) { 1595 extent_err(leaf, slot, 1596 "invalid extent length, have %llu expect aligned to %u", 1597 key->offset, fs_info->sectorsize); 1598 return -EUCLEAN; 1599 } 1600 if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1601 extent_err(leaf, slot, 1602 "invalid extent flag, data has full backref set"); 1603 return -EUCLEAN; 1604 } 1605 } 1606 ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1); 1607 1608 /* Check the special case of btrfs_tree_block_info */ 1609 if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) { 1610 struct btrfs_tree_block_info *info; 1611 1612 info = (struct btrfs_tree_block_info *)ptr; 1613 if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) { 1614 extent_err(leaf, slot, 1615 "invalid tree block info level, have %u expect [0, %u]", 1616 btrfs_tree_block_level(leaf, info), 1617 BTRFS_MAX_LEVEL - 1); 1618 return -EUCLEAN; 1619 } 1620 ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1); 1621 } 1622 1623 /* Check inline refs */ 1624 while (ptr < end) { 1625 struct btrfs_extent_inline_ref *iref; 1626 struct btrfs_extent_data_ref *dref; 1627 struct btrfs_shared_data_ref *sref; 1628 u64 seq; 1629 u64 dref_root; 1630 u64 dref_objectid; 1631 u64 dref_offset; 1632 u64 inline_offset; 1633 u8 inline_type; 1634 1635 if (unlikely(ptr + sizeof(*iref) > end)) { 1636 extent_err(leaf, slot, 1637 "inline ref item overflows extent item, ptr %lu iref size %zu end %lu", 1638 ptr, sizeof(*iref), end); 1639 return -EUCLEAN; 1640 } 1641 iref = (struct btrfs_extent_inline_ref *)ptr; 1642 inline_type = btrfs_extent_inline_ref_type(leaf, iref); 1643 inline_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1644 seq = inline_offset; 1645 if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { 1646 extent_err(leaf, slot, 1647 "inline ref item overflows extent item, ptr %lu iref size %u end %lu", 1648 ptr, btrfs_extent_inline_ref_size(inline_type), end); 1649 return -EUCLEAN; 1650 } 1651 1652 switch (inline_type) { 1653 /* inline_offset is subvolid of the owner, no need to check */ 1654 case BTRFS_TREE_BLOCK_REF_KEY: 1655 inline_refs++; 1656 break; 1657 /* Contains parent bytenr */ 1658 case BTRFS_SHARED_BLOCK_REF_KEY: 1659 if (unlikely(!IS_ALIGNED(inline_offset, 1660 fs_info->sectorsize))) { 1661 extent_err(leaf, slot, 1662 "invalid tree parent bytenr, have %llu expect aligned to %u", 1663 inline_offset, fs_info->sectorsize); 1664 return -EUCLEAN; 1665 } 1666 inline_refs++; 1667 break; 1668 /* 1669 * Contains owner subvolid, owner key objectid, adjusted offset. 1670 * The only obvious corruption can happen in that offset. 1671 */ 1672 case BTRFS_EXTENT_DATA_REF_KEY: 1673 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1674 dref_root = btrfs_extent_data_ref_root(leaf, dref); 1675 dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); 1676 dref_offset = btrfs_extent_data_ref_offset(leaf, dref); 1677 seq = hash_extent_data_ref( 1678 btrfs_extent_data_ref_root(leaf, dref), 1679 btrfs_extent_data_ref_objectid(leaf, dref), 1680 btrfs_extent_data_ref_offset(leaf, dref)); 1681 if (unlikely(!is_valid_dref_root(dref_root))) { 1682 extent_err(leaf, slot, 1683 "invalid data ref root value %llu", 1684 dref_root); 1685 return -EUCLEAN; 1686 } 1687 if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || 1688 dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { 1689 extent_err(leaf, slot, 1690 "invalid data ref objectid value %llu", 1691 dref_objectid); 1692 return -EUCLEAN; 1693 } 1694 if (unlikely(!IS_ALIGNED(dref_offset, 1695 fs_info->sectorsize))) { 1696 extent_err(leaf, slot, 1697 "invalid data ref offset, have %llu expect aligned to %u", 1698 dref_offset, fs_info->sectorsize); 1699 return -EUCLEAN; 1700 } 1701 if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { 1702 extent_err(leaf, slot, 1703 "invalid data ref count, should have non-zero value"); 1704 return -EUCLEAN; 1705 } 1706 inline_refs += btrfs_extent_data_ref_count(leaf, dref); 1707 break; 1708 /* Contains parent bytenr and ref count */ 1709 case BTRFS_SHARED_DATA_REF_KEY: 1710 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1711 if (unlikely(!IS_ALIGNED(inline_offset, 1712 fs_info->sectorsize))) { 1713 extent_err(leaf, slot, 1714 "invalid data parent bytenr, have %llu expect aligned to %u", 1715 inline_offset, fs_info->sectorsize); 1716 return -EUCLEAN; 1717 } 1718 if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { 1719 extent_err(leaf, slot, 1720 "invalid shared data ref count, should have non-zero value"); 1721 return -EUCLEAN; 1722 } 1723 inline_refs += btrfs_shared_data_ref_count(leaf, sref); 1724 break; 1725 case BTRFS_EXTENT_OWNER_REF_KEY: 1726 WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); 1727 break; 1728 default: 1729 extent_err(leaf, slot, "unknown inline ref type: %u", 1730 inline_type); 1731 return -EUCLEAN; 1732 } 1733 if (unlikely(inline_type < last_type)) { 1734 extent_err(leaf, slot, 1735 "inline ref out-of-order: has type %u, prev type %u", 1736 inline_type, last_type); 1737 return -EUCLEAN; 1738 } 1739 /* Type changed, allow the sequence starts from U64_MAX again. */ 1740 if (inline_type > last_type) 1741 last_seq = U64_MAX; 1742 if (unlikely(seq > last_seq)) { 1743 extent_err(leaf, slot, 1744 "inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx", 1745 inline_type, inline_offset, seq, 1746 last_type, last_seq); 1747 return -EUCLEAN; 1748 } 1749 last_type = inline_type; 1750 last_seq = seq; 1751 ptr += btrfs_extent_inline_ref_size(inline_type); 1752 } 1753 /* No padding is allowed */ 1754 if (unlikely(ptr != end)) { 1755 extent_err(leaf, slot, 1756 "invalid extent item size, padding bytes found"); 1757 return -EUCLEAN; 1758 } 1759 1760 /* Finally, check the inline refs against total refs */ 1761 if (unlikely(inline_refs > total_refs)) { 1762 extent_err(leaf, slot, 1763 "invalid extent refs, have %llu expect >= inline %llu", 1764 total_refs, inline_refs); 1765 return -EUCLEAN; 1766 } 1767 1768 if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || 1769 (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { 1770 u64 prev_end = prev_key->objectid; 1771 1772 if (prev_key->type == BTRFS_METADATA_ITEM_KEY) 1773 prev_end += fs_info->nodesize; 1774 else 1775 prev_end += prev_key->offset; 1776 1777 if (unlikely(prev_end > key->objectid)) { 1778 extent_err(leaf, slot, 1779 "previous extent " BTRFS_KEY_FMT " overlaps current extent " BTRFS_KEY_FMT, 1780 BTRFS_KEY_FMT_VALUE(prev_key), 1781 BTRFS_KEY_FMT_VALUE(key)); 1782 return -EUCLEAN; 1783 } 1784 } 1785 1786 return 0; 1787 } 1788 1789 static int check_simple_keyed_refs(struct extent_buffer *leaf, 1790 struct btrfs_key *key, int slot) 1791 { 1792 u32 expect_item_size = 0; 1793 1794 if (key->type == BTRFS_SHARED_DATA_REF_KEY) { 1795 struct btrfs_shared_data_ref *sref; 1796 1797 sref = btrfs_item_ptr(leaf, slot, struct btrfs_shared_data_ref); 1798 if (unlikely(btrfs_shared_data_ref_count(leaf, sref) == 0)) { 1799 extent_err(leaf, slot, 1800 "invalid shared data backref count, should have non-zero value"); 1801 return -EUCLEAN; 1802 } 1803 1804 expect_item_size = sizeof(struct btrfs_shared_data_ref); 1805 } 1806 1807 if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) { 1808 generic_err(leaf, slot, 1809 "invalid item size, have %u expect %u for key type %u", 1810 btrfs_item_size(leaf, slot), 1811 expect_item_size, key->type); 1812 return -EUCLEAN; 1813 } 1814 if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { 1815 generic_err(leaf, slot, 1816 "invalid key objectid for shared block ref, have %llu expect aligned to %u", 1817 key->objectid, leaf->fs_info->sectorsize); 1818 return -EUCLEAN; 1819 } 1820 if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY && 1821 !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) { 1822 extent_err(leaf, slot, 1823 "invalid tree parent bytenr, have %llu expect aligned to %u", 1824 key->offset, leaf->fs_info->sectorsize); 1825 return -EUCLEAN; 1826 } 1827 return 0; 1828 } 1829 1830 static int check_extent_data_ref(struct extent_buffer *leaf, 1831 struct btrfs_key *key, int slot) 1832 { 1833 struct btrfs_extent_data_ref *dref; 1834 unsigned long ptr = btrfs_item_ptr_offset(leaf, slot); 1835 const unsigned long end = ptr + btrfs_item_size(leaf, slot); 1836 1837 if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) { 1838 generic_err(leaf, slot, 1839 "invalid item size, have %u expect aligned to %zu for key type %u", 1840 btrfs_item_size(leaf, slot), 1841 sizeof(*dref), key->type); 1842 return -EUCLEAN; 1843 } 1844 if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { 1845 generic_err(leaf, slot, 1846 "invalid key objectid for shared block ref, have %llu expect aligned to %u", 1847 key->objectid, leaf->fs_info->sectorsize); 1848 return -EUCLEAN; 1849 } 1850 for (; ptr < end; ptr += sizeof(*dref)) { 1851 u64 root; 1852 u64 objectid; 1853 u64 offset; 1854 1855 /* 1856 * We cannot check the extent_data_ref hash due to possible 1857 * overflow from the leaf due to hash collisions. 1858 */ 1859 dref = (struct btrfs_extent_data_ref *)ptr; 1860 root = btrfs_extent_data_ref_root(leaf, dref); 1861 objectid = btrfs_extent_data_ref_objectid(leaf, dref); 1862 offset = btrfs_extent_data_ref_offset(leaf, dref); 1863 if (unlikely(!is_valid_dref_root(root))) { 1864 extent_err(leaf, slot, 1865 "invalid extent data backref root value %llu", 1866 root); 1867 return -EUCLEAN; 1868 } 1869 if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || 1870 objectid > BTRFS_LAST_FREE_OBJECTID)) { 1871 extent_err(leaf, slot, 1872 "invalid extent data backref objectid value %llu", 1873 objectid); 1874 return -EUCLEAN; 1875 } 1876 if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { 1877 extent_err(leaf, slot, 1878 "invalid extent data backref offset, have %llu expect aligned to %u", 1879 offset, leaf->fs_info->sectorsize); 1880 return -EUCLEAN; 1881 } 1882 if (unlikely(btrfs_extent_data_ref_count(leaf, dref) == 0)) { 1883 extent_err(leaf, slot, 1884 "invalid extent data backref count, should have non-zero value"); 1885 return -EUCLEAN; 1886 } 1887 } 1888 return 0; 1889 } 1890 1891 #define inode_ref_err(eb, slot, fmt, args...) \ 1892 inode_item_err(eb, slot, fmt, ##args) 1893 static int check_inode_ref(struct extent_buffer *leaf, 1894 struct btrfs_key *key, struct btrfs_key *prev_key, 1895 int slot) 1896 { 1897 struct btrfs_inode_ref *iref; 1898 unsigned long ptr; 1899 unsigned long end; 1900 1901 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 1902 return -EUCLEAN; 1903 /* namelen can't be 0, so item_size == sizeof() is also invalid */ 1904 if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) { 1905 inode_ref_err(leaf, slot, 1906 "invalid item size, have %u expect (%zu, %u)", 1907 btrfs_item_size(leaf, slot), 1908 sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); 1909 return -EUCLEAN; 1910 } 1911 1912 ptr = btrfs_item_ptr_offset(leaf, slot); 1913 end = ptr + btrfs_item_size(leaf, slot); 1914 while (ptr < end) { 1915 u16 namelen; 1916 1917 if (unlikely(ptr + sizeof(*iref) > end)) { 1918 inode_ref_err(leaf, slot, 1919 "inode ref overflow, ptr %lu end %lu inode_ref_size %zu", 1920 ptr, end, sizeof(*iref)); 1921 return -EUCLEAN; 1922 } 1923 1924 iref = (struct btrfs_inode_ref *)ptr; 1925 namelen = btrfs_inode_ref_name_len(leaf, iref); 1926 if (unlikely(ptr + sizeof(*iref) + namelen > end)) { 1927 inode_ref_err(leaf, slot, 1928 "inode ref overflow, ptr %lu end %lu namelen %u", 1929 ptr, end, namelen); 1930 return -EUCLEAN; 1931 } 1932 1933 /* 1934 * NOTE: In theory we should record all found index numbers 1935 * to find any duplicated indexes, but that will be too time 1936 * consuming for inodes with too many hard links. 1937 */ 1938 ptr += sizeof(*iref) + namelen; 1939 } 1940 return 0; 1941 } 1942 1943 static int check_inode_extref(struct extent_buffer *leaf, 1944 struct btrfs_key *key, struct btrfs_key *prev_key, 1945 int slot) 1946 { 1947 unsigned long ptr = btrfs_item_ptr_offset(leaf, slot); 1948 unsigned long end = ptr + btrfs_item_size(leaf, slot); 1949 1950 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 1951 return -EUCLEAN; 1952 1953 while (ptr < end) { 1954 struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr; 1955 u16 namelen; 1956 1957 if (unlikely(ptr + sizeof(*extref) > end)) { 1958 inode_ref_err(leaf, slot, 1959 "inode extref overflow, ptr %lu end %lu inode_extref size %zu", 1960 ptr, end, sizeof(*extref)); 1961 return -EUCLEAN; 1962 } 1963 1964 namelen = btrfs_inode_extref_name_len(leaf, extref); 1965 if (unlikely(ptr + sizeof(*extref) + namelen > end)) { 1966 inode_ref_err(leaf, slot, 1967 "inode extref overflow, ptr %lu end %lu namelen %u", 1968 ptr, end, namelen); 1969 return -EUCLEAN; 1970 } 1971 ptr += sizeof(*extref) + namelen; 1972 } 1973 return 0; 1974 } 1975 1976 static int check_raid_stripe_extent(const struct extent_buffer *leaf, 1977 const struct btrfs_key *key, int slot) 1978 { 1979 if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { 1980 generic_err(leaf, slot, 1981 "invalid key objectid for raid stripe extent, have %llu expect aligned to %u", 1982 key->objectid, leaf->fs_info->sectorsize); 1983 return -EUCLEAN; 1984 } 1985 1986 if (unlikely(!btrfs_fs_incompat(leaf->fs_info, RAID_STRIPE_TREE))) { 1987 generic_err(leaf, slot, 1988 "RAID_STRIPE_EXTENT present but RAID_STRIPE_TREE incompat bit unset"); 1989 return -EUCLEAN; 1990 } 1991 1992 return 0; 1993 } 1994 1995 static int check_remap_key(const struct extent_buffer *leaf, 1996 const struct btrfs_key *key, int slot) 1997 { 1998 const u32 item_size = btrfs_item_size(leaf, slot); 1999 const u32 sectorsize = leaf->fs_info->sectorsize; 2000 u64 end; 2001 2002 if (unlikely(!btrfs_fs_incompat(leaf->fs_info, REMAP_TREE))) { 2003 generic_err(leaf, slot, 2004 "remap key type %u present but REMAP_TREE incompat bit unset", 2005 key->type); 2006 return -EUCLEAN; 2007 } 2008 2009 switch (key->type) { 2010 case BTRFS_IDENTITY_REMAP_KEY: 2011 if (unlikely(item_size != 0)) { 2012 generic_err(leaf, slot, 2013 "invalid item size for IDENTITY_REMAP, have %u expect 0", 2014 item_size); 2015 return -EUCLEAN; 2016 } 2017 break; 2018 case BTRFS_REMAP_KEY: 2019 case BTRFS_REMAP_BACKREF_KEY: 2020 if (unlikely(item_size != sizeof(struct btrfs_remap_item))) { 2021 generic_err(leaf, slot, 2022 "invalid item size for remap key type %u, have %u expect %zu", 2023 key->type, item_size, 2024 sizeof(struct btrfs_remap_item)); 2025 return -EUCLEAN; 2026 } 2027 break; 2028 } 2029 2030 if (unlikely(key->offset == 0)) { 2031 generic_err(leaf, slot, 2032 "invalid remap key length, have 0 expect nonzero"); 2033 return -EUCLEAN; 2034 } 2035 2036 if (unlikely(!IS_ALIGNED(key->objectid, sectorsize))) { 2037 generic_err(leaf, slot, 2038 "invalid remap key objectid, have %llu expect aligned to %u", 2039 key->objectid, sectorsize); 2040 return -EUCLEAN; 2041 } 2042 2043 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 2044 generic_err(leaf, slot, 2045 "invalid remap key offset (length), have %llu expect aligned to %u", 2046 key->offset, sectorsize); 2047 return -EUCLEAN; 2048 } 2049 2050 if (unlikely(check_add_overflow(key->objectid, key->offset, &end))) { 2051 generic_err(leaf, slot, 2052 "remap key overflow, objectid %llu + offset %llu wraps", 2053 key->objectid, key->offset); 2054 return -EUCLEAN; 2055 } 2056 2057 return 0; 2058 } 2059 2060 static int check_dev_extent_item(const struct extent_buffer *leaf, 2061 const struct btrfs_key *key, 2062 int slot, 2063 struct btrfs_key *prev_key) 2064 { 2065 struct btrfs_dev_extent *de; 2066 const u32 sectorsize = leaf->fs_info->sectorsize; 2067 2068 de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); 2069 /* Basic fixed member checks. */ 2070 if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != 2071 BTRFS_CHUNK_TREE_OBJECTID)) { 2072 generic_err(leaf, slot, 2073 "invalid dev extent chunk tree id, has %llu expect %llu", 2074 btrfs_dev_extent_chunk_tree(leaf, de), 2075 BTRFS_CHUNK_TREE_OBJECTID); 2076 return -EUCLEAN; 2077 } 2078 if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != 2079 BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { 2080 generic_err(leaf, slot, 2081 "invalid dev extent chunk objectid, has %llu expect %llu", 2082 btrfs_dev_extent_chunk_objectid(leaf, de), 2083 BTRFS_FIRST_CHUNK_TREE_OBJECTID); 2084 return -EUCLEAN; 2085 } 2086 /* Alignment check. */ 2087 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 2088 generic_err(leaf, slot, 2089 "invalid dev extent key.offset, has %llu not aligned to %u", 2090 key->offset, sectorsize); 2091 return -EUCLEAN; 2092 } 2093 if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), 2094 sectorsize))) { 2095 generic_err(leaf, slot, 2096 "invalid dev extent chunk offset, has %llu not aligned to %u", 2097 btrfs_dev_extent_chunk_objectid(leaf, de), 2098 sectorsize); 2099 return -EUCLEAN; 2100 } 2101 if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), 2102 sectorsize))) { 2103 generic_err(leaf, slot, 2104 "invalid dev extent length, has %llu not aligned to %u", 2105 btrfs_dev_extent_length(leaf, de), sectorsize); 2106 return -EUCLEAN; 2107 } 2108 /* Overlap check with previous dev extent. */ 2109 if (slot && prev_key->objectid == key->objectid && 2110 prev_key->type == key->type) { 2111 struct btrfs_dev_extent *prev_de; 2112 u64 prev_len; 2113 2114 prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); 2115 prev_len = btrfs_dev_extent_length(leaf, prev_de); 2116 if (unlikely(prev_key->offset + prev_len > key->offset)) { 2117 generic_err(leaf, slot, 2118 "dev extent overlap, prev offset %llu len %llu current offset %llu", 2119 prev_key->offset, prev_len, key->offset); 2120 return -EUCLEAN; 2121 } 2122 } 2123 return 0; 2124 } 2125 2126 static int check_free_space_info(struct extent_buffer *leaf, struct btrfs_key *key, 2127 int slot) 2128 { 2129 struct btrfs_fs_info *fs_info = leaf->fs_info; 2130 struct btrfs_free_space_info *fsi; 2131 const u32 blocksize = fs_info->sectorsize; 2132 u64 end; 2133 u32 flags; 2134 2135 if (unlikely(!IS_ALIGNED(key->objectid, blocksize))) { 2136 generic_err(leaf, slot, 2137 "free space info key objectid is not aligned to %u, has " BTRFS_KEY_FMT, 2138 blocksize, BTRFS_KEY_FMT_VALUE(key)); 2139 return -EUCLEAN; 2140 } 2141 if (unlikely(!IS_ALIGNED(key->offset, blocksize))) { 2142 generic_err(leaf, slot, 2143 "free space info key offset is not aligned to %u, has " BTRFS_KEY_FMT, 2144 blocksize, BTRFS_KEY_FMT_VALUE(key)); 2145 return -EUCLEAN; 2146 } 2147 if (unlikely(check_add_overflow(key->objectid, key->offset, &end))) { 2148 generic_err(leaf, slot, 2149 "free space info key overflows, has " BTRFS_KEY_FMT, 2150 BTRFS_KEY_FMT_VALUE(key)); 2151 return -EUCLEAN; 2152 } 2153 if (unlikely(btrfs_item_size(leaf, slot) != 2154 sizeof(struct btrfs_free_space_info))) { 2155 generic_err(leaf, slot, 2156 "invalid item size for free space info, has %u expect %zu", 2157 btrfs_item_size(leaf, slot), 2158 sizeof(struct btrfs_free_space_info)); 2159 return -EUCLEAN; 2160 } 2161 fsi = btrfs_item_ptr(leaf, slot, struct btrfs_free_space_info); 2162 flags = btrfs_free_space_flags(leaf, fsi); 2163 if (unlikely(flags & ~BTRFS_FREE_SPACE_FLAGS_MASK)) { 2164 generic_err(leaf, slot, 2165 "unknown flags for free space info, has 0x%x valid mask 0x%lx", 2166 flags, BTRFS_FREE_SPACE_FLAGS_MASK); 2167 return -EUCLEAN; 2168 } 2169 if (unlikely(btrfs_free_space_extent_count(leaf, fsi) > 2170 key->offset >> fs_info->sectorsize_bits)) { 2171 generic_err(leaf, slot, 2172 "suspicious extent count, has %u max valid %llu", 2173 btrfs_free_space_extent_count(leaf, fsi), 2174 key->offset >> fs_info->sectorsize_bits); 2175 return -EUCLEAN; 2176 } 2177 return 0; 2178 } 2179 2180 static int check_free_space_common_key(struct extent_buffer *leaf, struct btrfs_key *key, int slot, 2181 struct btrfs_key *prev_key) 2182 { 2183 struct btrfs_fs_info *fs_info = leaf->fs_info; 2184 const u32 blocksize = fs_info->sectorsize; 2185 const char *type_str = (key->type == BTRFS_FREE_SPACE_EXTENT_KEY) ? "extent" : "bitmap"; 2186 u64 end; 2187 2188 if (unlikely(!IS_ALIGNED(key->objectid, blocksize))) { 2189 generic_err(leaf, slot, 2190 "free space %s key objectid is not aligned to %u, has " BTRFS_KEY_FMT, 2191 type_str, blocksize, BTRFS_KEY_FMT_VALUE(key)); 2192 return -EUCLEAN; 2193 } 2194 if (unlikely(!IS_ALIGNED(key->offset, blocksize))) { 2195 generic_err(leaf, slot, 2196 "free space %s key offset is not aligned to %u, has " BTRFS_KEY_FMT, 2197 type_str, blocksize, BTRFS_KEY_FMT_VALUE(key)); 2198 return -EUCLEAN; 2199 } 2200 if (unlikely(key->offset == 0)) { 2201 generic_err(leaf, slot, "free space %s length is 0", type_str); 2202 return -EUCLEAN; 2203 } 2204 if (unlikely(check_add_overflow(key->objectid, key->offset, &end))) { 2205 generic_err(leaf, slot, 2206 "free space %s end overflow, have objectid %llu offset %llu", 2207 type_str, key->objectid, key->offset); 2208 return -EUCLEAN; 2209 } 2210 if (slot == 0) 2211 return 0; 2212 2213 /* 2214 * Make sure the current key is inside the block group, and matching 2215 * the expected info type. 2216 */ 2217 if (prev_key->type == BTRFS_FREE_SPACE_INFO_KEY) { 2218 struct btrfs_free_space_info *fsi; 2219 u32 info_flags; 2220 2221 if (unlikely(key->objectid < prev_key->objectid || 2222 key->objectid + key->offset > prev_key->objectid + prev_key->offset)) { 2223 generic_err(leaf, slot, 2224 "free space %s is not inside the space info, prev key " BTRFS_KEY_FMT " current key " BTRFS_KEY_FMT, 2225 type_str, BTRFS_KEY_FMT_VALUE(prev_key), 2226 BTRFS_KEY_FMT_VALUE(key)); 2227 return -EUCLEAN; 2228 } 2229 fsi = btrfs_item_ptr(leaf, slot - 1, struct btrfs_free_space_info); 2230 info_flags = btrfs_free_space_flags(leaf, fsi); 2231 if (unlikely((info_flags == BTRFS_FREE_SPACE_USING_BITMAPS && 2232 key->type == BTRFS_FREE_SPACE_EXTENT_KEY) || 2233 (info_flags != BTRFS_FREE_SPACE_USING_BITMAPS && 2234 key->type == BTRFS_FREE_SPACE_BITMAP_KEY))) { 2235 generic_err(leaf, slot, 2236 "free space %s key type is not matching the type of space info, key type %u space info flags %u", 2237 type_str, key->type, info_flags); 2238 return -EUCLEAN; 2239 } 2240 return 0; 2241 } 2242 /* 2243 * Previous key should be either FREE_SPACE_EXTENT or FREE_SPACE_BITMAP. 2244 * Inside the same block group the key type should match each other, and 2245 * no overlaps. 2246 */ 2247 if (unlikely(key->type != prev_key->type)) { 2248 generic_err(leaf, slot, 2249 "free space %s key type is not matching the type of previous key, key type %u prev key type %u", 2250 type_str, key->type, prev_key->type); 2251 return -EUCLEAN; 2252 } 2253 if (unlikely(prev_key->objectid + prev_key->offset > key->objectid)) { 2254 generic_err(leaf, slot, 2255 "free space %s key overlaps previous key, prev key " BTRFS_KEY_FMT " current key " BTRFS_KEY_FMT, 2256 type_str, BTRFS_KEY_FMT_VALUE(prev_key), 2257 BTRFS_KEY_FMT_VALUE(key)); 2258 return -EUCLEAN; 2259 } 2260 return 0; 2261 } 2262 2263 static int check_free_space_extent(struct extent_buffer *leaf, struct btrfs_key *key, int slot, 2264 struct btrfs_key *prev_key) 2265 { 2266 int ret; 2267 2268 ret = check_free_space_common_key(leaf, key, slot, prev_key); 2269 if (unlikely(ret < 0)) 2270 return ret; 2271 2272 if (unlikely(btrfs_item_size(leaf, slot) != 0)) { 2273 generic_err(leaf, slot, 2274 "invalid item size for free space info, has %u expect 0", 2275 btrfs_item_size(leaf, slot)); 2276 return -EUCLEAN; 2277 } 2278 return 0; 2279 } 2280 2281 static int check_free_space_bitmap(struct extent_buffer *leaf, 2282 struct btrfs_key *key, int slot, 2283 struct btrfs_key *prev_key) 2284 { 2285 struct btrfs_fs_info *fs_info = leaf->fs_info; 2286 u32 expected_item_size; 2287 int ret; 2288 2289 ret = check_free_space_common_key(leaf, key, slot, prev_key); 2290 if (unlikely(ret < 0)) 2291 return ret; 2292 2293 /* 2294 * The item must hold exactly the right number of bitmap bytes for the 2295 * range described by key->offset. A mismatch means the item was 2296 * truncated or the key is corrupt; either way the bitmap data is not 2297 * safe to access. 2298 */ 2299 expected_item_size = DIV_ROUND_UP(key->offset >> fs_info->sectorsize_bits, 2300 BITS_PER_BYTE); 2301 if (unlikely(btrfs_item_size(leaf, slot) != expected_item_size)) { 2302 generic_err(leaf, slot, 2303 "invalid item size for free space bitmap, has %u expect %u", 2304 btrfs_item_size(leaf, slot), expected_item_size); 2305 return -EUCLEAN; 2306 } 2307 return 0; 2308 } 2309 2310 /* 2311 * Common point to switch the item-specific validation. 2312 */ 2313 static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, 2314 struct btrfs_key *key, 2315 int slot, 2316 struct btrfs_key *prev_key) 2317 { 2318 int ret = 0; 2319 struct btrfs_chunk *chunk; 2320 2321 switch (key->type) { 2322 case BTRFS_EXTENT_DATA_KEY: 2323 ret = check_extent_data_item(leaf, key, slot, prev_key); 2324 break; 2325 case BTRFS_EXTENT_CSUM_KEY: 2326 ret = check_csum_item(leaf, key, slot, prev_key); 2327 break; 2328 case BTRFS_DIR_ITEM_KEY: 2329 case BTRFS_DIR_INDEX_KEY: 2330 case BTRFS_XATTR_ITEM_KEY: 2331 ret = check_dir_item(leaf, key, prev_key, slot); 2332 break; 2333 case BTRFS_INODE_REF_KEY: 2334 ret = check_inode_ref(leaf, key, prev_key, slot); 2335 break; 2336 case BTRFS_INODE_EXTREF_KEY: 2337 ret = check_inode_extref(leaf, key, prev_key, slot); 2338 break; 2339 case BTRFS_BLOCK_GROUP_ITEM_KEY: 2340 ret = check_block_group_item(leaf, key, slot); 2341 break; 2342 case BTRFS_CHUNK_ITEM_KEY: 2343 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 2344 ret = check_leaf_chunk_item(leaf, chunk, key, slot); 2345 break; 2346 case BTRFS_DEV_ITEM_KEY: 2347 ret = check_dev_item(leaf, key, slot); 2348 break; 2349 case BTRFS_DEV_EXTENT_KEY: 2350 ret = check_dev_extent_item(leaf, key, slot, prev_key); 2351 break; 2352 case BTRFS_INODE_ITEM_KEY: 2353 ret = check_inode_item(leaf, key, slot); 2354 break; 2355 case BTRFS_ROOT_ITEM_KEY: 2356 ret = check_root_item(leaf, key, slot); 2357 break; 2358 case BTRFS_ROOT_REF_KEY: 2359 case BTRFS_ROOT_BACKREF_KEY: 2360 ret = check_root_ref(leaf, key, slot); 2361 break; 2362 case BTRFS_EXTENT_ITEM_KEY: 2363 case BTRFS_METADATA_ITEM_KEY: 2364 ret = check_extent_item(leaf, key, slot, prev_key); 2365 break; 2366 case BTRFS_TREE_BLOCK_REF_KEY: 2367 case BTRFS_SHARED_DATA_REF_KEY: 2368 case BTRFS_SHARED_BLOCK_REF_KEY: 2369 ret = check_simple_keyed_refs(leaf, key, slot); 2370 break; 2371 case BTRFS_EXTENT_DATA_REF_KEY: 2372 ret = check_extent_data_ref(leaf, key, slot); 2373 break; 2374 case BTRFS_RAID_STRIPE_KEY: 2375 ret = check_raid_stripe_extent(leaf, key, slot); 2376 break; 2377 case BTRFS_FREE_SPACE_INFO_KEY: 2378 ret = check_free_space_info(leaf, key, slot); 2379 break; 2380 case BTRFS_FREE_SPACE_EXTENT_KEY: 2381 ret = check_free_space_extent(leaf, key, slot, prev_key); 2382 break; 2383 case BTRFS_FREE_SPACE_BITMAP_KEY: 2384 ret = check_free_space_bitmap(leaf, key, slot, prev_key); 2385 break; 2386 case BTRFS_IDENTITY_REMAP_KEY: 2387 case BTRFS_REMAP_KEY: 2388 case BTRFS_REMAP_BACKREF_KEY: 2389 ret = check_remap_key(leaf, key, slot); 2390 break; 2391 } 2392 2393 if (unlikely(ret)) 2394 return BTRFS_TREE_BLOCK_INVALID_ITEM; 2395 return BTRFS_TREE_BLOCK_CLEAN; 2396 } 2397 2398 enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) 2399 { 2400 struct btrfs_fs_info *fs_info = leaf->fs_info; 2401 /* No valid key type is 0, so all key should be larger than this key */ 2402 struct btrfs_key prev_key = {0, 0, 0}; 2403 struct btrfs_key key; 2404 u32 nritems = btrfs_header_nritems(leaf); 2405 int slot; 2406 2407 if (unlikely(btrfs_header_level(leaf) != 0)) { 2408 generic_err(leaf, 0, 2409 "invalid level for leaf, have %d expect 0", 2410 btrfs_header_level(leaf)); 2411 return BTRFS_TREE_BLOCK_INVALID_LEVEL; 2412 } 2413 2414 if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) { 2415 generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set"); 2416 return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET; 2417 } 2418 2419 /* 2420 * Extent buffers from a relocation tree have a owner field that 2421 * corresponds to the subvolume tree they are based on. So just from an 2422 * extent buffer alone we can not find out what is the id of the 2423 * corresponding subvolume tree, so we can not figure out if the extent 2424 * buffer corresponds to the root of the relocation tree or not. So 2425 * skip this check for relocation trees. 2426 */ 2427 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { 2428 u64 owner = btrfs_header_owner(leaf); 2429 2430 /* These trees must never be empty */ 2431 if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID || 2432 owner == BTRFS_CHUNK_TREE_OBJECTID || 2433 owner == BTRFS_DEV_TREE_OBJECTID || 2434 owner == BTRFS_FS_TREE_OBJECTID || 2435 owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) { 2436 generic_err(leaf, 0, 2437 "invalid root, root %llu must never be empty", 2438 owner); 2439 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 2440 } 2441 2442 /* Unknown tree */ 2443 if (unlikely(owner == 0)) { 2444 generic_err(leaf, 0, 2445 "invalid owner, root 0 is not defined"); 2446 return BTRFS_TREE_BLOCK_INVALID_OWNER; 2447 } 2448 2449 /* EXTENT_TREE_V2 can have empty extent trees. */ 2450 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 2451 return BTRFS_TREE_BLOCK_CLEAN; 2452 2453 if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) { 2454 generic_err(leaf, 0, 2455 "invalid root, root %llu must never be empty", 2456 owner); 2457 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 2458 } 2459 2460 return BTRFS_TREE_BLOCK_CLEAN; 2461 } 2462 2463 if (unlikely(nritems == 0)) 2464 return BTRFS_TREE_BLOCK_CLEAN; 2465 2466 /* 2467 * Check the following things to make sure this is a good leaf, and 2468 * leaf users won't need to bother with similar sanity checks: 2469 * 2470 * 1) key ordering 2471 * 2) item offset and size 2472 * No overlap, no hole, all inside the leaf. 2473 * 3) item content 2474 * If possible, do comprehensive sanity check. 2475 * NOTE: All checks must only rely on the item data itself. 2476 */ 2477 for (slot = 0; slot < nritems; slot++) { 2478 u32 item_end_expected; 2479 u64 item_data_end; 2480 enum btrfs_tree_block_status ret; 2481 2482 btrfs_item_key_to_cpu(leaf, &key, slot); 2483 2484 /* Make sure the keys are in the right order */ 2485 if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) { 2486 generic_err(leaf, slot, 2487 "bad key order, prev " BTRFS_KEY_FMT " current " BTRFS_KEY_FMT, 2488 BTRFS_KEY_FMT_VALUE(&prev_key), 2489 BTRFS_KEY_FMT_VALUE(&key)); 2490 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 2491 } 2492 2493 item_data_end = (u64)btrfs_item_offset(leaf, slot) + 2494 btrfs_item_size(leaf, slot); 2495 /* 2496 * Make sure the offset and ends are right, remember that the 2497 * item data starts at the end of the leaf and grows towards the 2498 * front. 2499 */ 2500 if (slot == 0) 2501 item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); 2502 else 2503 item_end_expected = btrfs_item_offset(leaf, 2504 slot - 1); 2505 if (unlikely(item_data_end != item_end_expected)) { 2506 generic_err(leaf, slot, 2507 "unexpected item end, have %llu expect %u", 2508 item_data_end, item_end_expected); 2509 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 2510 } 2511 2512 /* 2513 * Check to make sure that we don't point outside of the leaf, 2514 * just in case all the items are consistent to each other, but 2515 * all point outside of the leaf. 2516 */ 2517 if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) { 2518 generic_err(leaf, slot, 2519 "slot end outside of leaf, have %llu expect range [0, %u]", 2520 item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info)); 2521 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 2522 } 2523 2524 /* Also check if the item pointer overlaps with btrfs item. */ 2525 if (unlikely(btrfs_item_ptr_offset(leaf, slot) < 2526 btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) { 2527 generic_err(leaf, slot, 2528 "slot overlaps with its data, item end %lu data start %lu", 2529 btrfs_item_nr_offset(leaf, slot) + 2530 sizeof(struct btrfs_item), 2531 btrfs_item_ptr_offset(leaf, slot)); 2532 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 2533 } 2534 2535 /* Check if the item size and content meet other criteria. */ 2536 ret = check_leaf_item(leaf, &key, slot, &prev_key); 2537 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 2538 return ret; 2539 2540 prev_key.objectid = key.objectid; 2541 prev_key.type = key.type; 2542 prev_key.offset = key.offset; 2543 } 2544 2545 return BTRFS_TREE_BLOCK_CLEAN; 2546 } 2547 2548 int btrfs_check_leaf(struct extent_buffer *leaf) 2549 { 2550 enum btrfs_tree_block_status ret; 2551 2552 ret = __btrfs_check_leaf(leaf); 2553 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 2554 return -EUCLEAN; 2555 return 0; 2556 } 2557 ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO); 2558 2559 enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) 2560 { 2561 struct btrfs_fs_info *fs_info = node->fs_info; 2562 unsigned long nr = btrfs_header_nritems(node); 2563 struct btrfs_key key, next_key; 2564 int slot; 2565 int level = btrfs_header_level(node); 2566 u64 bytenr; 2567 2568 if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) { 2569 generic_err(node, 0, "invalid flag for node, WRITTEN not set"); 2570 return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET; 2571 } 2572 2573 if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) { 2574 generic_err(node, 0, 2575 "invalid level for node, have %d expect [1, %d]", 2576 level, BTRFS_MAX_LEVEL - 1); 2577 return BTRFS_TREE_BLOCK_INVALID_LEVEL; 2578 } 2579 if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) { 2580 btrfs_crit(fs_info, 2581 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", 2582 btrfs_header_owner(node), node->start, 2583 nr == 0 ? "small" : "large", nr, 2584 BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 2585 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 2586 } 2587 2588 for (slot = 0; slot < nr - 1; slot++) { 2589 bytenr = btrfs_node_blockptr(node, slot); 2590 btrfs_node_key_to_cpu(node, &key, slot); 2591 btrfs_node_key_to_cpu(node, &next_key, slot + 1); 2592 2593 if (unlikely(!bytenr)) { 2594 generic_err(node, slot, 2595 "invalid NULL node pointer"); 2596 return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; 2597 } 2598 if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) { 2599 generic_err(node, slot, 2600 "unaligned pointer, have %llu should be aligned to %u", 2601 bytenr, fs_info->sectorsize); 2602 return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; 2603 } 2604 2605 if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { 2606 generic_err(node, slot, 2607 "bad key order, current " BTRFS_KEY_FMT " next " BTRFS_KEY_FMT, 2608 BTRFS_KEY_FMT_VALUE(&key), 2609 BTRFS_KEY_FMT_VALUE(&next_key)); 2610 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 2611 } 2612 } 2613 return BTRFS_TREE_BLOCK_CLEAN; 2614 } 2615 2616 int btrfs_check_node(struct extent_buffer *node) 2617 { 2618 enum btrfs_tree_block_status ret; 2619 2620 ret = __btrfs_check_node(node); 2621 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 2622 return -EUCLEAN; 2623 return 0; 2624 } 2625 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); 2626 2627 int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) 2628 { 2629 const bool is_subvol = btrfs_is_fstree(root_owner); 2630 const u64 eb_owner = btrfs_header_owner(eb); 2631 2632 /* 2633 * Skip dummy fs, as selftests don't create unique ebs for each dummy 2634 * root. 2635 */ 2636 if (btrfs_is_testing(eb->fs_info)) 2637 return 0; 2638 /* 2639 * There are several call sites (backref walking, qgroup, and data 2640 * reloc) passing 0 as @root_owner, as they are not holding the 2641 * tree root. In that case, we can not do a reliable ownership check, 2642 * so just exit. 2643 */ 2644 if (root_owner == 0) 2645 return 0; 2646 /* 2647 * These trees use key.offset as their owner, our callers don't have 2648 * the extra capacity to pass key.offset here. So we just skip them. 2649 */ 2650 if (root_owner == BTRFS_TREE_LOG_OBJECTID || 2651 root_owner == BTRFS_TREE_RELOC_OBJECTID) 2652 return 0; 2653 2654 if (!is_subvol) { 2655 /* For non-subvolume trees, the eb owner should match root owner */ 2656 if (unlikely(root_owner != eb_owner)) { 2657 btrfs_crit(eb->fs_info, 2658 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu", 2659 btrfs_header_level(eb) == 0 ? "leaf" : "node", 2660 root_owner, btrfs_header_bytenr(eb), eb_owner, 2661 root_owner); 2662 return -EUCLEAN; 2663 } 2664 return 0; 2665 } 2666 2667 /* 2668 * For subvolume trees, owners can mismatch, but they should all belong 2669 * to subvolume trees. 2670 */ 2671 if (unlikely(is_subvol != btrfs_is_fstree(eb_owner))) { 2672 btrfs_crit(eb->fs_info, 2673 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]", 2674 btrfs_header_level(eb) == 0 ? "leaf" : "node", 2675 root_owner, btrfs_header_bytenr(eb), eb_owner, 2676 BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID); 2677 return -EUCLEAN; 2678 } 2679 return 0; 2680 } 2681 2682 int btrfs_verify_level_key(struct extent_buffer *eb, 2683 const struct btrfs_tree_parent_check *check) 2684 { 2685 struct btrfs_fs_info *fs_info = eb->fs_info; 2686 int found_level; 2687 struct btrfs_key found_key; 2688 int ret; 2689 2690 found_level = btrfs_header_level(eb); 2691 if (unlikely(found_level != check->level)) { 2692 DEBUG_WARN(); 2693 btrfs_err(fs_info, 2694 "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", 2695 eb->start, check->level, found_level); 2696 return -EUCLEAN; 2697 } 2698 2699 if (!check->has_first_key) 2700 return 0; 2701 2702 /* 2703 * For live tree block (new tree blocks in current transaction), 2704 * we need proper lock context to avoid race, which is impossible here. 2705 * So we only checks tree blocks which is read from disk, whose 2706 * generation <= fs_info->last_trans_committed. 2707 */ 2708 if (btrfs_header_generation(eb) > btrfs_get_last_trans_committed(fs_info)) 2709 return 0; 2710 2711 /* We have @first_key, so this @eb must have at least one item */ 2712 if (unlikely(btrfs_header_nritems(eb) == 0)) { 2713 btrfs_err(fs_info, 2714 "invalid tree nritems, bytenr=%llu nritems=0 expect >0", 2715 eb->start); 2716 DEBUG_WARN(); 2717 return -EUCLEAN; 2718 } 2719 2720 if (found_level) 2721 btrfs_node_key_to_cpu(eb, &found_key, 0); 2722 else 2723 btrfs_item_key_to_cpu(eb, &found_key, 0); 2724 2725 ret = btrfs_comp_cpu_keys(&check->first_key, &found_key); 2726 if (unlikely(ret)) { 2727 DEBUG_WARN(); 2728 btrfs_err(fs_info, 2729 "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", 2730 eb->start, check->transid, check->first_key.objectid, 2731 check->first_key.type, check->first_key.offset, 2732 found_key.objectid, found_key.type, 2733 found_key.offset); 2734 } 2735 return ret; 2736 } 2737