1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/bio.h> 7 #include <linux/slab.h> 8 #include <linux/pagemap.h> 9 #include <linux/highmem.h> 10 #include <linux/sched/mm.h> 11 #include <crypto/hash.h> 12 #include "ctree.h" 13 #include "disk-io.h" 14 #include "transaction.h" 15 #include "volumes.h" 16 #include "print-tree.h" 17 #include "compression.h" 18 19 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 20 sizeof(struct btrfs_item) * 2) / \ 21 size) - 1)) 22 23 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ 24 PAGE_SIZE)) 25 26 /** 27 * @inode - the inode we want to update the disk_i_size for 28 * @new_i_size - the i_size we want to set to, 0 if we use i_size 29 * 30 * With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read() 31 * returns as it is perfectly fine with a file that has holes without hole file 32 * extent items. 33 * 34 * However without NO_HOLES we need to only return the area that is contiguous 35 * from the 0 offset of the file. Otherwise we could end up adjust i_size up 36 * to an extent that has a gap in between. 37 * 38 * Finally new_i_size should only be set in the case of truncate where we're not 39 * ready to use i_size_read() as the limiter yet. 40 */ 41 void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) 42 { 43 struct btrfs_fs_info *fs_info = inode->root->fs_info; 44 u64 start, end, i_size; 45 int ret; 46 47 i_size = new_i_size ?: i_size_read(&inode->vfs_inode); 48 if (btrfs_fs_incompat(fs_info, NO_HOLES)) { 49 inode->disk_i_size = i_size; 50 return; 51 } 52 53 spin_lock(&inode->lock); 54 ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start, 55 &end, EXTENT_DIRTY); 56 if (!ret && start == 0) 57 i_size = min(i_size, end + 1); 58 else 59 i_size = 0; 60 inode->disk_i_size = i_size; 61 spin_unlock(&inode->lock); 62 } 63 64 /** 65 * @inode - the inode we're modifying 66 * @start - the start file offset of the file extent we've inserted 67 * @len - the logical length of the file extent item 68 * 69 * Call when we are inserting a new file extent where there was none before. 70 * Does not need to call this in the case where we're replacing an existing file 71 * extent, however if not sure it's fine to call this multiple times. 72 * 73 * The start and len must match the file extent item, so thus must be sectorsize 74 * aligned. 75 */ 76 int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, 77 u64 len) 78 { 79 if (len == 0) 80 return 0; 81 82 ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); 83 84 if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) 85 return 0; 86 return set_extent_bits(&inode->file_extent_tree, start, start + len - 1, 87 EXTENT_DIRTY); 88 } 89 90 /** 91 * @inode - the inode we're modifying 92 * @start - the start file offset of the file extent we've inserted 93 * @len - the logical length of the file extent item 94 * 95 * Called when we drop a file extent, for example when we truncate. Doesn't 96 * need to be called for cases where we're replacing a file extent, like when 97 * we've COWed a file extent. 98 * 99 * The start and len must match the file extent item, so thus must be sectorsize 100 * aligned. 101 */ 102 int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, 103 u64 len) 104 { 105 if (len == 0) 106 return 0; 107 108 ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || 109 len == (u64)-1); 110 111 if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES)) 112 return 0; 113 return clear_extent_bit(&inode->file_extent_tree, start, 114 start + len - 1, EXTENT_DIRTY, 0, 0, NULL); 115 } 116 117 static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, 118 u16 csum_size) 119 { 120 u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size; 121 122 return ncsums * fs_info->sectorsize; 123 } 124 125 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 126 struct btrfs_root *root, 127 u64 objectid, u64 pos, 128 u64 disk_offset, u64 disk_num_bytes, 129 u64 num_bytes, u64 offset, u64 ram_bytes, 130 u8 compression, u8 encryption, u16 other_encoding) 131 { 132 int ret = 0; 133 struct btrfs_file_extent_item *item; 134 struct btrfs_key file_key; 135 struct btrfs_path *path; 136 struct extent_buffer *leaf; 137 138 path = btrfs_alloc_path(); 139 if (!path) 140 return -ENOMEM; 141 file_key.objectid = objectid; 142 file_key.offset = pos; 143 file_key.type = BTRFS_EXTENT_DATA_KEY; 144 145 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 146 sizeof(*item)); 147 if (ret < 0) 148 goto out; 149 BUG_ON(ret); /* Can't happen */ 150 leaf = path->nodes[0]; 151 item = btrfs_item_ptr(leaf, path->slots[0], 152 struct btrfs_file_extent_item); 153 btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset); 154 btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); 155 btrfs_set_file_extent_offset(leaf, item, offset); 156 btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); 157 btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes); 158 btrfs_set_file_extent_generation(leaf, item, trans->transid); 159 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); 160 btrfs_set_file_extent_compression(leaf, item, compression); 161 btrfs_set_file_extent_encryption(leaf, item, encryption); 162 btrfs_set_file_extent_other_encoding(leaf, item, other_encoding); 163 164 btrfs_mark_buffer_dirty(leaf); 165 out: 166 btrfs_free_path(path); 167 return ret; 168 } 169 170 static struct btrfs_csum_item * 171 btrfs_lookup_csum(struct btrfs_trans_handle *trans, 172 struct btrfs_root *root, 173 struct btrfs_path *path, 174 u64 bytenr, int cow) 175 { 176 struct btrfs_fs_info *fs_info = root->fs_info; 177 int ret; 178 struct btrfs_key file_key; 179 struct btrfs_key found_key; 180 struct btrfs_csum_item *item; 181 struct extent_buffer *leaf; 182 u64 csum_offset = 0; 183 const u32 csum_size = fs_info->csum_size; 184 int csums_in_item; 185 186 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 187 file_key.offset = bytenr; 188 file_key.type = BTRFS_EXTENT_CSUM_KEY; 189 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 190 if (ret < 0) 191 goto fail; 192 leaf = path->nodes[0]; 193 if (ret > 0) { 194 ret = 1; 195 if (path->slots[0] == 0) 196 goto fail; 197 path->slots[0]--; 198 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 199 if (found_key.type != BTRFS_EXTENT_CSUM_KEY) 200 goto fail; 201 202 csum_offset = (bytenr - found_key.offset) >> 203 fs_info->sectorsize_bits; 204 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 205 csums_in_item /= csum_size; 206 207 if (csum_offset == csums_in_item) { 208 ret = -EFBIG; 209 goto fail; 210 } else if (csum_offset > csums_in_item) { 211 goto fail; 212 } 213 } 214 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 215 item = (struct btrfs_csum_item *)((unsigned char *)item + 216 csum_offset * csum_size); 217 return item; 218 fail: 219 if (ret > 0) 220 ret = -ENOENT; 221 return ERR_PTR(ret); 222 } 223 224 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, 225 struct btrfs_root *root, 226 struct btrfs_path *path, u64 objectid, 227 u64 offset, int mod) 228 { 229 int ret; 230 struct btrfs_key file_key; 231 int ins_len = mod < 0 ? -1 : 0; 232 int cow = mod != 0; 233 234 file_key.objectid = objectid; 235 file_key.offset = offset; 236 file_key.type = BTRFS_EXTENT_DATA_KEY; 237 ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); 238 return ret; 239 } 240 241 /* 242 * Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and 243 * estore the result to @dst. 244 * 245 * Return >0 for the number of sectors we found. 246 * Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum 247 * for it. Caller may want to try next sector until one range is hit. 248 * Return <0 for fatal error. 249 */ 250 static int search_csum_tree(struct btrfs_fs_info *fs_info, 251 struct btrfs_path *path, u64 disk_bytenr, 252 u64 len, u8 *dst) 253 { 254 struct btrfs_csum_item *item = NULL; 255 struct btrfs_key key; 256 const u32 sectorsize = fs_info->sectorsize; 257 const u32 csum_size = fs_info->csum_size; 258 u32 itemsize; 259 int ret; 260 u64 csum_start; 261 u64 csum_len; 262 263 ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) && 264 IS_ALIGNED(len, sectorsize)); 265 266 /* Check if the current csum item covers disk_bytenr */ 267 if (path->nodes[0]) { 268 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 269 struct btrfs_csum_item); 270 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 271 itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 272 273 csum_start = key.offset; 274 csum_len = (itemsize / csum_size) * sectorsize; 275 276 if (in_range(disk_bytenr, csum_start, csum_len)) 277 goto found; 278 } 279 280 /* Current item doesn't contain the desired range, search again */ 281 btrfs_release_path(path); 282 item = btrfs_lookup_csum(NULL, fs_info->csum_root, path, disk_bytenr, 0); 283 if (IS_ERR(item)) { 284 ret = PTR_ERR(item); 285 goto out; 286 } 287 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 288 itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 289 290 csum_start = key.offset; 291 csum_len = (itemsize / csum_size) * sectorsize; 292 ASSERT(in_range(disk_bytenr, csum_start, csum_len)); 293 294 found: 295 ret = (min(csum_start + csum_len, disk_bytenr + len) - 296 disk_bytenr) >> fs_info->sectorsize_bits; 297 read_extent_buffer(path->nodes[0], dst, (unsigned long)item, 298 ret * csum_size); 299 out: 300 if (ret == -ENOENT) 301 ret = 0; 302 return ret; 303 } 304 305 /* 306 * Locate the file_offset of @cur_disk_bytenr of a @bio. 307 * 308 * Bio of btrfs represents read range of 309 * [bi_sector << 9, bi_sector << 9 + bi_size). 310 * Knowing this, we can iterate through each bvec to locate the page belong to 311 * @cur_disk_bytenr and get the file offset. 312 * 313 * @inode is used to determine if the bvec page really belongs to @inode. 314 * 315 * Return 0 if we can't find the file offset 316 * Return >0 if we find the file offset and restore it to @file_offset_ret 317 */ 318 static int search_file_offset_in_bio(struct bio *bio, struct inode *inode, 319 u64 disk_bytenr, u64 *file_offset_ret) 320 { 321 struct bvec_iter iter; 322 struct bio_vec bvec; 323 u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT; 324 int ret = 0; 325 326 bio_for_each_segment(bvec, bio, iter) { 327 struct page *page = bvec.bv_page; 328 329 if (cur > disk_bytenr) 330 break; 331 if (cur + bvec.bv_len <= disk_bytenr) { 332 cur += bvec.bv_len; 333 continue; 334 } 335 ASSERT(in_range(disk_bytenr, cur, bvec.bv_len)); 336 if (page->mapping && page->mapping->host && 337 page->mapping->host == inode) { 338 ret = 1; 339 *file_offset_ret = page_offset(page) + bvec.bv_offset + 340 disk_bytenr - cur; 341 break; 342 } 343 } 344 return ret; 345 } 346 347 /** 348 * Lookup the checksum for the read bio in csum tree. 349 * 350 * @inode: inode that the bio is for. 351 * @bio: bio to look up. 352 * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return 353 * checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If 354 * NULL, the checksum buffer is allocated and returned in 355 * btrfs_io_bio(bio)->csum instead. 356 * 357 * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. 358 */ 359 blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst) 360 { 361 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 362 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 363 struct btrfs_path *path; 364 const u32 sectorsize = fs_info->sectorsize; 365 const u32 csum_size = fs_info->csum_size; 366 u32 orig_len = bio->bi_iter.bi_size; 367 u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; 368 u64 cur_disk_bytenr; 369 u8 *csum; 370 const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; 371 int count = 0; 372 373 if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 374 return BLK_STS_OK; 375 376 /* 377 * This function is only called for read bio. 378 * 379 * This means two things: 380 * - All our csums should only be in csum tree 381 * No ordered extents csums, as ordered extents are only for write 382 * path. 383 * - No need to bother any other info from bvec 384 * Since we're looking up csums, the only important info is the 385 * disk_bytenr and the length, which can be extracted from bi_iter 386 * directly. 387 */ 388 ASSERT(bio_op(bio) == REQ_OP_READ); 389 path = btrfs_alloc_path(); 390 if (!path) 391 return BLK_STS_RESOURCE; 392 393 if (!dst) { 394 struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); 395 396 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 397 btrfs_bio->csum = kmalloc_array(nblocks, csum_size, 398 GFP_NOFS); 399 if (!btrfs_bio->csum) { 400 btrfs_free_path(path); 401 return BLK_STS_RESOURCE; 402 } 403 } else { 404 btrfs_bio->csum = btrfs_bio->csum_inline; 405 } 406 csum = btrfs_bio->csum; 407 } else { 408 csum = dst; 409 } 410 411 /* 412 * If requested number of sectors is larger than one leaf can contain, 413 * kick the readahead for csum tree. 414 */ 415 if (nblocks > fs_info->csums_per_leaf) 416 path->reada = READA_FORWARD; 417 418 /* 419 * the free space stuff is only read when it hasn't been 420 * updated in the current transaction. So, we can safely 421 * read from the commit root and sidestep a nasty deadlock 422 * between reading the free space cache and updating the csum tree. 423 */ 424 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 425 path->search_commit_root = 1; 426 path->skip_locking = 1; 427 } 428 429 for (cur_disk_bytenr = orig_disk_bytenr; 430 cur_disk_bytenr < orig_disk_bytenr + orig_len; 431 cur_disk_bytenr += (count * sectorsize)) { 432 u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr; 433 unsigned int sector_offset; 434 u8 *csum_dst; 435 436 /* 437 * Although both cur_disk_bytenr and orig_disk_bytenr is u64, 438 * we're calculating the offset to the bio start. 439 * 440 * Bio size is limited to UINT_MAX, thus unsigned int is large 441 * enough to contain the raw result, not to mention the right 442 * shifted result. 443 */ 444 ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX); 445 sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >> 446 fs_info->sectorsize_bits; 447 csum_dst = csum + sector_offset * csum_size; 448 449 count = search_csum_tree(fs_info, path, cur_disk_bytenr, 450 search_len, csum_dst); 451 if (count <= 0) { 452 /* 453 * Either we hit a critical error or we didn't find 454 * the csum. 455 * Either way, we put zero into the csums dst, and skip 456 * to the next sector. 457 */ 458 memset(csum_dst, 0, csum_size); 459 count = 1; 460 461 /* 462 * For data reloc inode, we need to mark the range 463 * NODATASUM so that balance won't report false csum 464 * error. 465 */ 466 if (BTRFS_I(inode)->root->root_key.objectid == 467 BTRFS_DATA_RELOC_TREE_OBJECTID) { 468 u64 file_offset; 469 int ret; 470 471 ret = search_file_offset_in_bio(bio, inode, 472 cur_disk_bytenr, &file_offset); 473 if (ret) 474 set_extent_bits(io_tree, file_offset, 475 file_offset + sectorsize - 1, 476 EXTENT_NODATASUM); 477 } else { 478 btrfs_warn_rl(fs_info, 479 "csum hole found for disk bytenr range [%llu, %llu)", 480 cur_disk_bytenr, cur_disk_bytenr + sectorsize); 481 } 482 } 483 } 484 485 btrfs_free_path(path); 486 return BLK_STS_OK; 487 } 488 489 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 490 struct list_head *list, int search_commit) 491 { 492 struct btrfs_fs_info *fs_info = root->fs_info; 493 struct btrfs_key key; 494 struct btrfs_path *path; 495 struct extent_buffer *leaf; 496 struct btrfs_ordered_sum *sums; 497 struct btrfs_csum_item *item; 498 LIST_HEAD(tmplist); 499 unsigned long offset; 500 int ret; 501 size_t size; 502 u64 csum_end; 503 const u32 csum_size = fs_info->csum_size; 504 505 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 506 IS_ALIGNED(end + 1, fs_info->sectorsize)); 507 508 path = btrfs_alloc_path(); 509 if (!path) 510 return -ENOMEM; 511 512 if (search_commit) { 513 path->skip_locking = 1; 514 path->reada = READA_FORWARD; 515 path->search_commit_root = 1; 516 } 517 518 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 519 key.offset = start; 520 key.type = BTRFS_EXTENT_CSUM_KEY; 521 522 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 523 if (ret < 0) 524 goto fail; 525 if (ret > 0 && path->slots[0] > 0) { 526 leaf = path->nodes[0]; 527 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1); 528 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID && 529 key.type == BTRFS_EXTENT_CSUM_KEY) { 530 offset = (start - key.offset) >> fs_info->sectorsize_bits; 531 if (offset * csum_size < 532 btrfs_item_size_nr(leaf, path->slots[0] - 1)) 533 path->slots[0]--; 534 } 535 } 536 537 while (start <= end) { 538 leaf = path->nodes[0]; 539 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 540 ret = btrfs_next_leaf(root, path); 541 if (ret < 0) 542 goto fail; 543 if (ret > 0) 544 break; 545 leaf = path->nodes[0]; 546 } 547 548 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 549 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 550 key.type != BTRFS_EXTENT_CSUM_KEY || 551 key.offset > end) 552 break; 553 554 if (key.offset > start) 555 start = key.offset; 556 557 size = btrfs_item_size_nr(leaf, path->slots[0]); 558 csum_end = key.offset + (size / csum_size) * fs_info->sectorsize; 559 if (csum_end <= start) { 560 path->slots[0]++; 561 continue; 562 } 563 564 csum_end = min(csum_end, end + 1); 565 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 566 struct btrfs_csum_item); 567 while (start < csum_end) { 568 size = min_t(size_t, csum_end - start, 569 max_ordered_sum_bytes(fs_info, csum_size)); 570 sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), 571 GFP_NOFS); 572 if (!sums) { 573 ret = -ENOMEM; 574 goto fail; 575 } 576 577 sums->bytenr = start; 578 sums->len = (int)size; 579 580 offset = (start - key.offset) >> fs_info->sectorsize_bits; 581 offset *= csum_size; 582 size >>= fs_info->sectorsize_bits; 583 584 read_extent_buffer(path->nodes[0], 585 sums->sums, 586 ((unsigned long)item) + offset, 587 csum_size * size); 588 589 start += fs_info->sectorsize * size; 590 list_add_tail(&sums->list, &tmplist); 591 } 592 path->slots[0]++; 593 } 594 ret = 0; 595 fail: 596 while (ret < 0 && !list_empty(&tmplist)) { 597 sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); 598 list_del(&sums->list); 599 kfree(sums); 600 } 601 list_splice_tail(&tmplist, list); 602 603 btrfs_free_path(path); 604 return ret; 605 } 606 607 /* 608 * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio 609 * @inode: Owner of the data inside the bio 610 * @bio: Contains the data to be checksummed 611 * @file_start: offset in file this bio begins to describe 612 * @contig: Boolean. If true/1 means all bio vecs in this bio are 613 * contiguous and they begin at @file_start in the file. False/0 614 * means this bio can contains potentially discontigous bio vecs 615 * so the logical offset of each should be calculated separately. 616 */ 617 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, 618 u64 file_start, int contig) 619 { 620 struct btrfs_fs_info *fs_info = inode->root->fs_info; 621 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 622 struct btrfs_ordered_sum *sums; 623 struct btrfs_ordered_extent *ordered = NULL; 624 char *data; 625 struct bvec_iter iter; 626 struct bio_vec bvec; 627 int index; 628 int nr_sectors; 629 unsigned long total_bytes = 0; 630 unsigned long this_sum_bytes = 0; 631 int i; 632 u64 offset; 633 unsigned nofs_flag; 634 635 nofs_flag = memalloc_nofs_save(); 636 sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), 637 GFP_KERNEL); 638 memalloc_nofs_restore(nofs_flag); 639 640 if (!sums) 641 return BLK_STS_RESOURCE; 642 643 sums->len = bio->bi_iter.bi_size; 644 INIT_LIST_HEAD(&sums->list); 645 646 if (contig) 647 offset = file_start; 648 else 649 offset = 0; /* shut up gcc */ 650 651 sums->bytenr = bio->bi_iter.bi_sector << 9; 652 index = 0; 653 654 shash->tfm = fs_info->csum_shash; 655 656 bio_for_each_segment(bvec, bio, iter) { 657 if (!contig) 658 offset = page_offset(bvec.bv_page) + bvec.bv_offset; 659 660 if (!ordered) { 661 ordered = btrfs_lookup_ordered_extent(inode, offset); 662 BUG_ON(!ordered); /* Logic error */ 663 } 664 665 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, 666 bvec.bv_len + fs_info->sectorsize 667 - 1); 668 669 for (i = 0; i < nr_sectors; i++) { 670 if (offset >= ordered->file_offset + ordered->num_bytes || 671 offset < ordered->file_offset) { 672 unsigned long bytes_left; 673 674 sums->len = this_sum_bytes; 675 this_sum_bytes = 0; 676 btrfs_add_ordered_sum(ordered, sums); 677 btrfs_put_ordered_extent(ordered); 678 679 bytes_left = bio->bi_iter.bi_size - total_bytes; 680 681 nofs_flag = memalloc_nofs_save(); 682 sums = kvzalloc(btrfs_ordered_sum_size(fs_info, 683 bytes_left), GFP_KERNEL); 684 memalloc_nofs_restore(nofs_flag); 685 BUG_ON(!sums); /* -ENOMEM */ 686 sums->len = bytes_left; 687 ordered = btrfs_lookup_ordered_extent(inode, 688 offset); 689 ASSERT(ordered); /* Logic error */ 690 sums->bytenr = (bio->bi_iter.bi_sector << 9) 691 + total_bytes; 692 index = 0; 693 } 694 695 data = kmap_atomic(bvec.bv_page); 696 crypto_shash_digest(shash, data + bvec.bv_offset 697 + (i * fs_info->sectorsize), 698 fs_info->sectorsize, 699 sums->sums + index); 700 kunmap_atomic(data); 701 index += fs_info->csum_size; 702 offset += fs_info->sectorsize; 703 this_sum_bytes += fs_info->sectorsize; 704 total_bytes += fs_info->sectorsize; 705 } 706 707 } 708 this_sum_bytes = 0; 709 btrfs_add_ordered_sum(ordered, sums); 710 btrfs_put_ordered_extent(ordered); 711 return 0; 712 } 713 714 /* 715 * helper function for csum removal, this expects the 716 * key to describe the csum pointed to by the path, and it expects 717 * the csum to overlap the range [bytenr, len] 718 * 719 * The csum should not be entirely contained in the range and the 720 * range should not be entirely contained in the csum. 721 * 722 * This calls btrfs_truncate_item with the correct args based on the 723 * overlap, and fixes up the key as required. 724 */ 725 static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, 726 struct btrfs_path *path, 727 struct btrfs_key *key, 728 u64 bytenr, u64 len) 729 { 730 struct extent_buffer *leaf; 731 const u32 csum_size = fs_info->csum_size; 732 u64 csum_end; 733 u64 end_byte = bytenr + len; 734 u32 blocksize_bits = fs_info->sectorsize_bits; 735 736 leaf = path->nodes[0]; 737 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; 738 csum_end <<= blocksize_bits; 739 csum_end += key->offset; 740 741 if (key->offset < bytenr && csum_end <= end_byte) { 742 /* 743 * [ bytenr - len ] 744 * [ ] 745 * [csum ] 746 * A simple truncate off the end of the item 747 */ 748 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 749 new_size *= csum_size; 750 btrfs_truncate_item(path, new_size, 1); 751 } else if (key->offset >= bytenr && csum_end > end_byte && 752 end_byte > key->offset) { 753 /* 754 * [ bytenr - len ] 755 * [ ] 756 * [csum ] 757 * we need to truncate from the beginning of the csum 758 */ 759 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 760 new_size *= csum_size; 761 762 btrfs_truncate_item(path, new_size, 0); 763 764 key->offset = end_byte; 765 btrfs_set_item_key_safe(fs_info, path, key); 766 } else { 767 BUG(); 768 } 769 } 770 771 /* 772 * deletes the csum items from the csum tree for a given 773 * range of bytes. 774 */ 775 int btrfs_del_csums(struct btrfs_trans_handle *trans, 776 struct btrfs_root *root, u64 bytenr, u64 len) 777 { 778 struct btrfs_fs_info *fs_info = trans->fs_info; 779 struct btrfs_path *path; 780 struct btrfs_key key; 781 u64 end_byte = bytenr + len; 782 u64 csum_end; 783 struct extent_buffer *leaf; 784 int ret; 785 const u32 csum_size = fs_info->csum_size; 786 u32 blocksize_bits = fs_info->sectorsize_bits; 787 788 ASSERT(root == fs_info->csum_root || 789 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 790 791 path = btrfs_alloc_path(); 792 if (!path) 793 return -ENOMEM; 794 795 while (1) { 796 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 797 key.offset = end_byte - 1; 798 key.type = BTRFS_EXTENT_CSUM_KEY; 799 800 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 801 if (ret > 0) { 802 if (path->slots[0] == 0) 803 break; 804 path->slots[0]--; 805 } else if (ret < 0) { 806 break; 807 } 808 809 leaf = path->nodes[0]; 810 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 811 812 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 813 key.type != BTRFS_EXTENT_CSUM_KEY) { 814 break; 815 } 816 817 if (key.offset >= end_byte) 818 break; 819 820 csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size; 821 csum_end <<= blocksize_bits; 822 csum_end += key.offset; 823 824 /* this csum ends before we start, we're done */ 825 if (csum_end <= bytenr) 826 break; 827 828 /* delete the entire item, it is inside our range */ 829 if (key.offset >= bytenr && csum_end <= end_byte) { 830 int del_nr = 1; 831 832 /* 833 * Check how many csum items preceding this one in this 834 * leaf correspond to our range and then delete them all 835 * at once. 836 */ 837 if (key.offset > bytenr && path->slots[0] > 0) { 838 int slot = path->slots[0] - 1; 839 840 while (slot >= 0) { 841 struct btrfs_key pk; 842 843 btrfs_item_key_to_cpu(leaf, &pk, slot); 844 if (pk.offset < bytenr || 845 pk.type != BTRFS_EXTENT_CSUM_KEY || 846 pk.objectid != 847 BTRFS_EXTENT_CSUM_OBJECTID) 848 break; 849 path->slots[0] = slot; 850 del_nr++; 851 key.offset = pk.offset; 852 slot--; 853 } 854 } 855 ret = btrfs_del_items(trans, root, path, 856 path->slots[0], del_nr); 857 if (ret) 858 goto out; 859 if (key.offset == bytenr) 860 break; 861 } else if (key.offset < bytenr && csum_end > end_byte) { 862 unsigned long offset; 863 unsigned long shift_len; 864 unsigned long item_offset; 865 /* 866 * [ bytenr - len ] 867 * [csum ] 868 * 869 * Our bytes are in the middle of the csum, 870 * we need to split this item and insert a new one. 871 * 872 * But we can't drop the path because the 873 * csum could change, get removed, extended etc. 874 * 875 * The trick here is the max size of a csum item leaves 876 * enough room in the tree block for a single 877 * item header. So, we split the item in place, 878 * adding a new header pointing to the existing 879 * bytes. Then we loop around again and we have 880 * a nicely formed csum item that we can neatly 881 * truncate. 882 */ 883 offset = (bytenr - key.offset) >> blocksize_bits; 884 offset *= csum_size; 885 886 shift_len = (len >> blocksize_bits) * csum_size; 887 888 item_offset = btrfs_item_ptr_offset(leaf, 889 path->slots[0]); 890 891 memzero_extent_buffer(leaf, item_offset + offset, 892 shift_len); 893 key.offset = bytenr; 894 895 /* 896 * btrfs_split_item returns -EAGAIN when the 897 * item changed size or key 898 */ 899 ret = btrfs_split_item(trans, root, path, &key, offset); 900 if (ret && ret != -EAGAIN) { 901 btrfs_abort_transaction(trans, ret); 902 goto out; 903 } 904 905 key.offset = end_byte - 1; 906 } else { 907 truncate_one_csum(fs_info, path, &key, bytenr, len); 908 if (key.offset < bytenr) 909 break; 910 } 911 btrfs_release_path(path); 912 } 913 ret = 0; 914 out: 915 btrfs_free_path(path); 916 return ret; 917 } 918 919 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 920 struct btrfs_root *root, 921 struct btrfs_ordered_sum *sums) 922 { 923 struct btrfs_fs_info *fs_info = root->fs_info; 924 struct btrfs_key file_key; 925 struct btrfs_key found_key; 926 struct btrfs_path *path; 927 struct btrfs_csum_item *item; 928 struct btrfs_csum_item *item_end; 929 struct extent_buffer *leaf = NULL; 930 u64 next_offset; 931 u64 total_bytes = 0; 932 u64 csum_offset; 933 u64 bytenr; 934 u32 nritems; 935 u32 ins_size; 936 int index = 0; 937 int found_next; 938 int ret; 939 const u32 csum_size = fs_info->csum_size; 940 941 path = btrfs_alloc_path(); 942 if (!path) 943 return -ENOMEM; 944 again: 945 next_offset = (u64)-1; 946 found_next = 0; 947 bytenr = sums->bytenr + total_bytes; 948 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 949 file_key.offset = bytenr; 950 file_key.type = BTRFS_EXTENT_CSUM_KEY; 951 952 item = btrfs_lookup_csum(trans, root, path, bytenr, 1); 953 if (!IS_ERR(item)) { 954 ret = 0; 955 leaf = path->nodes[0]; 956 item_end = btrfs_item_ptr(leaf, path->slots[0], 957 struct btrfs_csum_item); 958 item_end = (struct btrfs_csum_item *)((char *)item_end + 959 btrfs_item_size_nr(leaf, path->slots[0])); 960 goto found; 961 } 962 ret = PTR_ERR(item); 963 if (ret != -EFBIG && ret != -ENOENT) 964 goto out; 965 966 if (ret == -EFBIG) { 967 u32 item_size; 968 /* we found one, but it isn't big enough yet */ 969 leaf = path->nodes[0]; 970 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 971 if ((item_size / csum_size) >= 972 MAX_CSUM_ITEMS(fs_info, csum_size)) { 973 /* already at max size, make a new one */ 974 goto insert; 975 } 976 } else { 977 int slot = path->slots[0] + 1; 978 /* we didn't find a csum item, insert one */ 979 nritems = btrfs_header_nritems(path->nodes[0]); 980 if (!nritems || (path->slots[0] >= nritems - 1)) { 981 ret = btrfs_next_leaf(root, path); 982 if (ret < 0) { 983 goto out; 984 } else if (ret > 0) { 985 found_next = 1; 986 goto insert; 987 } 988 slot = path->slots[0]; 989 } 990 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 991 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 992 found_key.type != BTRFS_EXTENT_CSUM_KEY) { 993 found_next = 1; 994 goto insert; 995 } 996 next_offset = found_key.offset; 997 found_next = 1; 998 goto insert; 999 } 1000 1001 /* 1002 * At this point, we know the tree has a checksum item that ends at an 1003 * offset matching the start of the checksum range we want to insert. 1004 * We try to extend that item as much as possible and then add as many 1005 * checksums to it as they fit. 1006 * 1007 * First check if the leaf has enough free space for at least one 1008 * checksum. If it has go directly to the item extension code, otherwise 1009 * release the path and do a search for insertion before the extension. 1010 */ 1011 if (btrfs_leaf_free_space(leaf) >= csum_size) { 1012 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1013 csum_offset = (bytenr - found_key.offset) >> 1014 fs_info->sectorsize_bits; 1015 goto extend_csum; 1016 } 1017 1018 btrfs_release_path(path); 1019 path->search_for_extension = 1; 1020 ret = btrfs_search_slot(trans, root, &file_key, path, 1021 csum_size, 1); 1022 path->search_for_extension = 0; 1023 if (ret < 0) 1024 goto out; 1025 1026 if (ret > 0) { 1027 if (path->slots[0] == 0) 1028 goto insert; 1029 path->slots[0]--; 1030 } 1031 1032 leaf = path->nodes[0]; 1033 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1034 csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits; 1035 1036 if (found_key.type != BTRFS_EXTENT_CSUM_KEY || 1037 found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 1038 csum_offset >= MAX_CSUM_ITEMS(fs_info, csum_size)) { 1039 goto insert; 1040 } 1041 1042 extend_csum: 1043 if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) / 1044 csum_size) { 1045 int extend_nr; 1046 u64 tmp; 1047 u32 diff; 1048 1049 tmp = sums->len - total_bytes; 1050 tmp >>= fs_info->sectorsize_bits; 1051 WARN_ON(tmp < 1); 1052 1053 extend_nr = max_t(int, 1, (int)tmp); 1054 diff = (csum_offset + extend_nr) * csum_size; 1055 diff = min(diff, 1056 MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size); 1057 1058 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); 1059 diff = min_t(u32, btrfs_leaf_free_space(leaf), diff); 1060 diff /= csum_size; 1061 diff *= csum_size; 1062 1063 btrfs_extend_item(path, diff); 1064 ret = 0; 1065 goto csum; 1066 } 1067 1068 insert: 1069 btrfs_release_path(path); 1070 csum_offset = 0; 1071 if (found_next) { 1072 u64 tmp; 1073 1074 tmp = sums->len - total_bytes; 1075 tmp >>= fs_info->sectorsize_bits; 1076 tmp = min(tmp, (next_offset - file_key.offset) >> 1077 fs_info->sectorsize_bits); 1078 1079 tmp = max_t(u64, 1, tmp); 1080 tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size)); 1081 ins_size = csum_size * tmp; 1082 } else { 1083 ins_size = csum_size; 1084 } 1085 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 1086 ins_size); 1087 if (ret < 0) 1088 goto out; 1089 if (WARN_ON(ret != 0)) 1090 goto out; 1091 leaf = path->nodes[0]; 1092 csum: 1093 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 1094 item_end = (struct btrfs_csum_item *)((unsigned char *)item + 1095 btrfs_item_size_nr(leaf, path->slots[0])); 1096 item = (struct btrfs_csum_item *)((unsigned char *)item + 1097 csum_offset * csum_size); 1098 found: 1099 ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits; 1100 ins_size *= csum_size; 1101 ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item, 1102 ins_size); 1103 write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, 1104 ins_size); 1105 1106 index += ins_size; 1107 ins_size /= csum_size; 1108 total_bytes += ins_size * fs_info->sectorsize; 1109 1110 btrfs_mark_buffer_dirty(path->nodes[0]); 1111 if (total_bytes < sums->len) { 1112 btrfs_release_path(path); 1113 cond_resched(); 1114 goto again; 1115 } 1116 out: 1117 btrfs_free_path(path); 1118 return ret; 1119 } 1120 1121 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, 1122 const struct btrfs_path *path, 1123 struct btrfs_file_extent_item *fi, 1124 const bool new_inline, 1125 struct extent_map *em) 1126 { 1127 struct btrfs_fs_info *fs_info = inode->root->fs_info; 1128 struct btrfs_root *root = inode->root; 1129 struct extent_buffer *leaf = path->nodes[0]; 1130 const int slot = path->slots[0]; 1131 struct btrfs_key key; 1132 u64 extent_start, extent_end; 1133 u64 bytenr; 1134 u8 type = btrfs_file_extent_type(leaf, fi); 1135 int compress_type = btrfs_file_extent_compression(leaf, fi); 1136 1137 btrfs_item_key_to_cpu(leaf, &key, slot); 1138 extent_start = key.offset; 1139 extent_end = btrfs_file_extent_end(path); 1140 em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 1141 if (type == BTRFS_FILE_EXTENT_REG || 1142 type == BTRFS_FILE_EXTENT_PREALLOC) { 1143 em->start = extent_start; 1144 em->len = extent_end - extent_start; 1145 em->orig_start = extent_start - 1146 btrfs_file_extent_offset(leaf, fi); 1147 em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); 1148 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1149 if (bytenr == 0) { 1150 em->block_start = EXTENT_MAP_HOLE; 1151 return; 1152 } 1153 if (compress_type != BTRFS_COMPRESS_NONE) { 1154 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 1155 em->compress_type = compress_type; 1156 em->block_start = bytenr; 1157 em->block_len = em->orig_block_len; 1158 } else { 1159 bytenr += btrfs_file_extent_offset(leaf, fi); 1160 em->block_start = bytenr; 1161 em->block_len = em->len; 1162 if (type == BTRFS_FILE_EXTENT_PREALLOC) 1163 set_bit(EXTENT_FLAG_PREALLOC, &em->flags); 1164 } 1165 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 1166 em->block_start = EXTENT_MAP_INLINE; 1167 em->start = extent_start; 1168 em->len = extent_end - extent_start; 1169 /* 1170 * Initialize orig_start and block_len with the same values 1171 * as in inode.c:btrfs_get_extent(). 1172 */ 1173 em->orig_start = EXTENT_MAP_HOLE; 1174 em->block_len = (u64)-1; 1175 if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) { 1176 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 1177 em->compress_type = compress_type; 1178 } 1179 } else { 1180 btrfs_err(fs_info, 1181 "unknown file extent item type %d, inode %llu, offset %llu, " 1182 "root %llu", type, btrfs_ino(inode), extent_start, 1183 root->root_key.objectid); 1184 } 1185 } 1186 1187 /* 1188 * Returns the end offset (non inclusive) of the file extent item the given path 1189 * points to. If it points to an inline extent, the returned offset is rounded 1190 * up to the sector size. 1191 */ 1192 u64 btrfs_file_extent_end(const struct btrfs_path *path) 1193 { 1194 const struct extent_buffer *leaf = path->nodes[0]; 1195 const int slot = path->slots[0]; 1196 struct btrfs_file_extent_item *fi; 1197 struct btrfs_key key; 1198 u64 end; 1199 1200 btrfs_item_key_to_cpu(leaf, &key, slot); 1201 ASSERT(key.type == BTRFS_EXTENT_DATA_KEY); 1202 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 1203 1204 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { 1205 end = btrfs_file_extent_ram_bytes(leaf, fi); 1206 end = ALIGN(key.offset + end, leaf->fs_info->sectorsize); 1207 } else { 1208 end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 1209 } 1210 1211 return end; 1212 } 1213