1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "ctree.h" 15 #include "free-space-cache.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "extent_io.h" 19 #include "inode-map.h" 20 #include "volumes.h" 21 #include "space-info.h" 22 #include "delalloc-space.h" 23 #include "block-group.h" 24 #include "discard.h" 25 26 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 27 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 28 #define FORCE_EXTENT_THRESHOLD SZ_1M 29 30 struct btrfs_trim_range { 31 u64 start; 32 u64 bytes; 33 struct list_head list; 34 }; 35 36 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *bitmap_info); 38 static int link_free_space(struct btrfs_free_space_ctl *ctl, 39 struct btrfs_free_space *info); 40 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 41 struct btrfs_free_space *info); 42 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 43 struct btrfs_trans_handle *trans, 44 struct btrfs_io_ctl *io_ctl, 45 struct btrfs_path *path); 46 47 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 48 struct btrfs_path *path, 49 u64 offset) 50 { 51 struct btrfs_fs_info *fs_info = root->fs_info; 52 struct btrfs_key key; 53 struct btrfs_key location; 54 struct btrfs_disk_key disk_key; 55 struct btrfs_free_space_header *header; 56 struct extent_buffer *leaf; 57 struct inode *inode = NULL; 58 unsigned nofs_flag; 59 int ret; 60 61 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 62 key.offset = offset; 63 key.type = 0; 64 65 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 66 if (ret < 0) 67 return ERR_PTR(ret); 68 if (ret > 0) { 69 btrfs_release_path(path); 70 return ERR_PTR(-ENOENT); 71 } 72 73 leaf = path->nodes[0]; 74 header = btrfs_item_ptr(leaf, path->slots[0], 75 struct btrfs_free_space_header); 76 btrfs_free_space_key(leaf, header, &disk_key); 77 btrfs_disk_key_to_cpu(&location, &disk_key); 78 btrfs_release_path(path); 79 80 /* 81 * We are often under a trans handle at this point, so we need to make 82 * sure NOFS is set to keep us from deadlocking. 83 */ 84 nofs_flag = memalloc_nofs_save(); 85 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 86 btrfs_release_path(path); 87 memalloc_nofs_restore(nofs_flag); 88 if (IS_ERR(inode)) 89 return inode; 90 91 mapping_set_gfp_mask(inode->i_mapping, 92 mapping_gfp_constraint(inode->i_mapping, 93 ~(__GFP_FS | __GFP_HIGHMEM))); 94 95 return inode; 96 } 97 98 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 99 struct btrfs_path *path) 100 { 101 struct btrfs_fs_info *fs_info = block_group->fs_info; 102 struct inode *inode = NULL; 103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 104 105 spin_lock(&block_group->lock); 106 if (block_group->inode) 107 inode = igrab(block_group->inode); 108 spin_unlock(&block_group->lock); 109 if (inode) 110 return inode; 111 112 inode = __lookup_free_space_inode(fs_info->tree_root, path, 113 block_group->start); 114 if (IS_ERR(inode)) 115 return inode; 116 117 spin_lock(&block_group->lock); 118 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 119 btrfs_info(fs_info, "Old style space inode found, converting."); 120 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 121 BTRFS_INODE_NODATACOW; 122 block_group->disk_cache_state = BTRFS_DC_CLEAR; 123 } 124 125 if (!block_group->iref) { 126 block_group->inode = igrab(inode); 127 block_group->iref = 1; 128 } 129 spin_unlock(&block_group->lock); 130 131 return inode; 132 } 133 134 static int __create_free_space_inode(struct btrfs_root *root, 135 struct btrfs_trans_handle *trans, 136 struct btrfs_path *path, 137 u64 ino, u64 offset) 138 { 139 struct btrfs_key key; 140 struct btrfs_disk_key disk_key; 141 struct btrfs_free_space_header *header; 142 struct btrfs_inode_item *inode_item; 143 struct extent_buffer *leaf; 144 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 145 int ret; 146 147 ret = btrfs_insert_empty_inode(trans, root, path, ino); 148 if (ret) 149 return ret; 150 151 /* We inline crc's for the free disk space cache */ 152 if (ino != BTRFS_FREE_INO_OBJECTID) 153 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193 } 194 195 int create_free_space_inode(struct btrfs_trans_handle *trans, 196 struct btrfs_block_group *block_group, 197 struct btrfs_path *path) 198 { 199 int ret; 200 u64 ino; 201 202 ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino); 203 if (ret < 0) 204 return ret; 205 206 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 207 ino, block_group->start); 208 } 209 210 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 211 struct btrfs_block_rsv *rsv) 212 { 213 u64 needed_bytes; 214 int ret; 215 216 /* 1 for slack space, 1 for updating the inode */ 217 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 218 btrfs_calc_metadata_size(fs_info, 1); 219 220 spin_lock(&rsv->lock); 221 if (rsv->reserved < needed_bytes) 222 ret = -ENOSPC; 223 else 224 ret = 0; 225 spin_unlock(&rsv->lock); 226 return ret; 227 } 228 229 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 230 struct btrfs_block_group *block_group, 231 struct inode *inode) 232 { 233 struct btrfs_root *root = BTRFS_I(inode)->root; 234 int ret = 0; 235 bool locked = false; 236 237 if (block_group) { 238 struct btrfs_path *path = btrfs_alloc_path(); 239 240 if (!path) { 241 ret = -ENOMEM; 242 goto fail; 243 } 244 locked = true; 245 mutex_lock(&trans->transaction->cache_write_mutex); 246 if (!list_empty(&block_group->io_list)) { 247 list_del_init(&block_group->io_list); 248 249 btrfs_wait_cache_io(trans, block_group, path); 250 btrfs_put_block_group(block_group); 251 } 252 253 /* 254 * now that we've truncated the cache away, its no longer 255 * setup or written 256 */ 257 spin_lock(&block_group->lock); 258 block_group->disk_cache_state = BTRFS_DC_CLEAR; 259 spin_unlock(&block_group->lock); 260 btrfs_free_path(path); 261 } 262 263 btrfs_i_size_write(BTRFS_I(inode), 0); 264 truncate_pagecache(inode, 0); 265 266 /* 267 * We skip the throttling logic for free space cache inodes, so we don't 268 * need to check for -EAGAIN. 269 */ 270 ret = btrfs_truncate_inode_items(trans, root, inode, 271 0, BTRFS_EXTENT_DATA_KEY); 272 if (ret) 273 goto fail; 274 275 ret = btrfs_update_inode(trans, root, inode); 276 277 fail: 278 if (locked) 279 mutex_unlock(&trans->transaction->cache_write_mutex); 280 if (ret) 281 btrfs_abort_transaction(trans, ret); 282 283 return ret; 284 } 285 286 static void readahead_cache(struct inode *inode) 287 { 288 struct file_ra_state *ra; 289 unsigned long last_index; 290 291 ra = kzalloc(sizeof(*ra), GFP_NOFS); 292 if (!ra) 293 return; 294 295 file_ra_state_init(ra, inode->i_mapping); 296 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 297 298 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 299 300 kfree(ra); 301 } 302 303 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 304 int write) 305 { 306 int num_pages; 307 int check_crcs = 0; 308 309 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 310 311 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 312 check_crcs = 1; 313 314 /* Make sure we can fit our crcs and generation into the first page */ 315 if (write && check_crcs && 316 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 317 return -ENOSPC; 318 319 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 320 321 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 322 if (!io_ctl->pages) 323 return -ENOMEM; 324 325 io_ctl->num_pages = num_pages; 326 io_ctl->fs_info = btrfs_sb(inode->i_sb); 327 io_ctl->check_crcs = check_crcs; 328 io_ctl->inode = inode; 329 330 return 0; 331 } 332 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 333 334 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 335 { 336 kfree(io_ctl->pages); 337 io_ctl->pages = NULL; 338 } 339 340 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 341 { 342 if (io_ctl->cur) { 343 io_ctl->cur = NULL; 344 io_ctl->orig = NULL; 345 } 346 } 347 348 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 349 { 350 ASSERT(io_ctl->index < io_ctl->num_pages); 351 io_ctl->page = io_ctl->pages[io_ctl->index++]; 352 io_ctl->cur = page_address(io_ctl->page); 353 io_ctl->orig = io_ctl->cur; 354 io_ctl->size = PAGE_SIZE; 355 if (clear) 356 clear_page(io_ctl->cur); 357 } 358 359 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 360 { 361 int i; 362 363 io_ctl_unmap_page(io_ctl); 364 365 for (i = 0; i < io_ctl->num_pages; i++) { 366 if (io_ctl->pages[i]) { 367 ClearPageChecked(io_ctl->pages[i]); 368 unlock_page(io_ctl->pages[i]); 369 put_page(io_ctl->pages[i]); 370 } 371 } 372 } 373 374 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 375 { 376 struct page *page; 377 struct inode *inode = io_ctl->inode; 378 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 379 int i; 380 381 for (i = 0; i < io_ctl->num_pages; i++) { 382 page = find_or_create_page(inode->i_mapping, i, mask); 383 if (!page) { 384 io_ctl_drop_pages(io_ctl); 385 return -ENOMEM; 386 } 387 io_ctl->pages[i] = page; 388 if (uptodate && !PageUptodate(page)) { 389 btrfs_readpage(NULL, page); 390 lock_page(page); 391 if (page->mapping != inode->i_mapping) { 392 btrfs_err(BTRFS_I(inode)->root->fs_info, 393 "free space cache page truncated"); 394 io_ctl_drop_pages(io_ctl); 395 return -EIO; 396 } 397 if (!PageUptodate(page)) { 398 btrfs_err(BTRFS_I(inode)->root->fs_info, 399 "error reading free space cache"); 400 io_ctl_drop_pages(io_ctl); 401 return -EIO; 402 } 403 } 404 } 405 406 for (i = 0; i < io_ctl->num_pages; i++) { 407 clear_page_dirty_for_io(io_ctl->pages[i]); 408 set_page_extent_mapped(io_ctl->pages[i]); 409 } 410 411 return 0; 412 } 413 414 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 415 { 416 io_ctl_map_page(io_ctl, 1); 417 418 /* 419 * Skip the csum areas. If we don't check crcs then we just have a 420 * 64bit chunk at the front of the first page. 421 */ 422 if (io_ctl->check_crcs) { 423 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 424 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 425 } else { 426 io_ctl->cur += sizeof(u64); 427 io_ctl->size -= sizeof(u64) * 2; 428 } 429 430 put_unaligned_le64(generation, io_ctl->cur); 431 io_ctl->cur += sizeof(u64); 432 } 433 434 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 435 { 436 u64 cache_gen; 437 438 /* 439 * Skip the crc area. If we don't check crcs then we just have a 64bit 440 * chunk at the front of the first page. 441 */ 442 if (io_ctl->check_crcs) { 443 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 444 io_ctl->size -= sizeof(u64) + 445 (sizeof(u32) * io_ctl->num_pages); 446 } else { 447 io_ctl->cur += sizeof(u64); 448 io_ctl->size -= sizeof(u64) * 2; 449 } 450 451 cache_gen = get_unaligned_le64(io_ctl->cur); 452 if (cache_gen != generation) { 453 btrfs_err_rl(io_ctl->fs_info, 454 "space cache generation (%llu) does not match inode (%llu)", 455 cache_gen, generation); 456 io_ctl_unmap_page(io_ctl); 457 return -EIO; 458 } 459 io_ctl->cur += sizeof(u64); 460 return 0; 461 } 462 463 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 464 { 465 u32 *tmp; 466 u32 crc = ~(u32)0; 467 unsigned offset = 0; 468 469 if (!io_ctl->check_crcs) { 470 io_ctl_unmap_page(io_ctl); 471 return; 472 } 473 474 if (index == 0) 475 offset = sizeof(u32) * io_ctl->num_pages; 476 477 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 478 btrfs_crc32c_final(crc, (u8 *)&crc); 479 io_ctl_unmap_page(io_ctl); 480 tmp = page_address(io_ctl->pages[0]); 481 tmp += index; 482 *tmp = crc; 483 } 484 485 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 486 { 487 u32 *tmp, val; 488 u32 crc = ~(u32)0; 489 unsigned offset = 0; 490 491 if (!io_ctl->check_crcs) { 492 io_ctl_map_page(io_ctl, 0); 493 return 0; 494 } 495 496 if (index == 0) 497 offset = sizeof(u32) * io_ctl->num_pages; 498 499 tmp = page_address(io_ctl->pages[0]); 500 tmp += index; 501 val = *tmp; 502 503 io_ctl_map_page(io_ctl, 0); 504 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 505 btrfs_crc32c_final(crc, (u8 *)&crc); 506 if (val != crc) { 507 btrfs_err_rl(io_ctl->fs_info, 508 "csum mismatch on free space cache"); 509 io_ctl_unmap_page(io_ctl); 510 return -EIO; 511 } 512 513 return 0; 514 } 515 516 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 517 void *bitmap) 518 { 519 struct btrfs_free_space_entry *entry; 520 521 if (!io_ctl->cur) 522 return -ENOSPC; 523 524 entry = io_ctl->cur; 525 put_unaligned_le64(offset, &entry->offset); 526 put_unaligned_le64(bytes, &entry->bytes); 527 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 528 BTRFS_FREE_SPACE_EXTENT; 529 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 530 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 531 532 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 533 return 0; 534 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 537 /* No more pages to map */ 538 if (io_ctl->index >= io_ctl->num_pages) 539 return 0; 540 541 /* map the next page */ 542 io_ctl_map_page(io_ctl, 1); 543 return 0; 544 } 545 546 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 547 { 548 if (!io_ctl->cur) 549 return -ENOSPC; 550 551 /* 552 * If we aren't at the start of the current page, unmap this one and 553 * map the next one if there is any left. 554 */ 555 if (io_ctl->cur != io_ctl->orig) { 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 557 if (io_ctl->index >= io_ctl->num_pages) 558 return -ENOSPC; 559 io_ctl_map_page(io_ctl, 0); 560 } 561 562 copy_page(io_ctl->cur, bitmap); 563 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 564 if (io_ctl->index < io_ctl->num_pages) 565 io_ctl_map_page(io_ctl, 0); 566 return 0; 567 } 568 569 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 570 { 571 /* 572 * If we're not on the boundary we know we've modified the page and we 573 * need to crc the page. 574 */ 575 if (io_ctl->cur != io_ctl->orig) 576 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 577 else 578 io_ctl_unmap_page(io_ctl); 579 580 while (io_ctl->index < io_ctl->num_pages) { 581 io_ctl_map_page(io_ctl, 1); 582 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 583 } 584 } 585 586 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 587 struct btrfs_free_space *entry, u8 *type) 588 { 589 struct btrfs_free_space_entry *e; 590 int ret; 591 592 if (!io_ctl->cur) { 593 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 594 if (ret) 595 return ret; 596 } 597 598 e = io_ctl->cur; 599 entry->offset = get_unaligned_le64(&e->offset); 600 entry->bytes = get_unaligned_le64(&e->bytes); 601 *type = e->type; 602 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 603 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 604 605 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 606 return 0; 607 608 io_ctl_unmap_page(io_ctl); 609 610 return 0; 611 } 612 613 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 614 struct btrfs_free_space *entry) 615 { 616 int ret; 617 618 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 619 if (ret) 620 return ret; 621 622 copy_page(entry->bitmap, io_ctl->cur); 623 io_ctl_unmap_page(io_ctl); 624 625 return 0; 626 } 627 628 /* 629 * Since we attach pinned extents after the fact we can have contiguous sections 630 * of free space that are split up in entries. This poses a problem with the 631 * tree logging stuff since it could have allocated across what appears to be 2 632 * entries since we would have merged the entries when adding the pinned extents 633 * back to the free space cache. So run through the space cache that we just 634 * loaded and merge contiguous entries. This will make the log replay stuff not 635 * blow up and it will make for nicer allocator behavior. 636 */ 637 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 638 { 639 struct btrfs_free_space *e, *prev = NULL; 640 struct rb_node *n; 641 642 again: 643 spin_lock(&ctl->tree_lock); 644 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 645 e = rb_entry(n, struct btrfs_free_space, offset_index); 646 if (!prev) 647 goto next; 648 if (e->bitmap || prev->bitmap) 649 goto next; 650 if (prev->offset + prev->bytes == e->offset) { 651 unlink_free_space(ctl, prev); 652 unlink_free_space(ctl, e); 653 prev->bytes += e->bytes; 654 kmem_cache_free(btrfs_free_space_cachep, e); 655 link_free_space(ctl, prev); 656 prev = NULL; 657 spin_unlock(&ctl->tree_lock); 658 goto again; 659 } 660 next: 661 prev = e; 662 } 663 spin_unlock(&ctl->tree_lock); 664 } 665 666 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 667 struct btrfs_free_space_ctl *ctl, 668 struct btrfs_path *path, u64 offset) 669 { 670 struct btrfs_fs_info *fs_info = root->fs_info; 671 struct btrfs_free_space_header *header; 672 struct extent_buffer *leaf; 673 struct btrfs_io_ctl io_ctl; 674 struct btrfs_key key; 675 struct btrfs_free_space *e, *n; 676 LIST_HEAD(bitmaps); 677 u64 num_entries; 678 u64 num_bitmaps; 679 u64 generation; 680 u8 type; 681 int ret = 0; 682 683 /* Nothing in the space cache, goodbye */ 684 if (!i_size_read(inode)) 685 return 0; 686 687 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 688 key.offset = offset; 689 key.type = 0; 690 691 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 692 if (ret < 0) 693 return 0; 694 else if (ret > 0) { 695 btrfs_release_path(path); 696 return 0; 697 } 698 699 ret = -1; 700 701 leaf = path->nodes[0]; 702 header = btrfs_item_ptr(leaf, path->slots[0], 703 struct btrfs_free_space_header); 704 num_entries = btrfs_free_space_entries(leaf, header); 705 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 706 generation = btrfs_free_space_generation(leaf, header); 707 btrfs_release_path(path); 708 709 if (!BTRFS_I(inode)->generation) { 710 btrfs_info(fs_info, 711 "the free space cache file (%llu) is invalid, skip it", 712 offset); 713 return 0; 714 } 715 716 if (BTRFS_I(inode)->generation != generation) { 717 btrfs_err(fs_info, 718 "free space inode generation (%llu) did not match free space cache generation (%llu)", 719 BTRFS_I(inode)->generation, generation); 720 return 0; 721 } 722 723 if (!num_entries) 724 return 0; 725 726 ret = io_ctl_init(&io_ctl, inode, 0); 727 if (ret) 728 return ret; 729 730 readahead_cache(inode); 731 732 ret = io_ctl_prepare_pages(&io_ctl, true); 733 if (ret) 734 goto out; 735 736 ret = io_ctl_check_crc(&io_ctl, 0); 737 if (ret) 738 goto free_cache; 739 740 ret = io_ctl_check_generation(&io_ctl, generation); 741 if (ret) 742 goto free_cache; 743 744 while (num_entries) { 745 e = kmem_cache_zalloc(btrfs_free_space_cachep, 746 GFP_NOFS); 747 if (!e) 748 goto free_cache; 749 750 ret = io_ctl_read_entry(&io_ctl, e, &type); 751 if (ret) { 752 kmem_cache_free(btrfs_free_space_cachep, e); 753 goto free_cache; 754 } 755 756 /* 757 * Sync discard ensures that the free space cache is always 758 * trimmed. So when reading this in, the state should reflect 759 * that. We also do this for async as a stop gap for lack of 760 * persistence. 761 */ 762 if (btrfs_test_opt(fs_info, DISCARD_SYNC) || 763 btrfs_test_opt(fs_info, DISCARD_ASYNC)) 764 e->trim_state = BTRFS_TRIM_STATE_TRIMMED; 765 766 if (!e->bytes) { 767 kmem_cache_free(btrfs_free_space_cachep, e); 768 goto free_cache; 769 } 770 771 if (type == BTRFS_FREE_SPACE_EXTENT) { 772 spin_lock(&ctl->tree_lock); 773 ret = link_free_space(ctl, e); 774 spin_unlock(&ctl->tree_lock); 775 if (ret) { 776 btrfs_err(fs_info, 777 "Duplicate entries in free space cache, dumping"); 778 kmem_cache_free(btrfs_free_space_cachep, e); 779 goto free_cache; 780 } 781 } else { 782 ASSERT(num_bitmaps); 783 num_bitmaps--; 784 e->bitmap = kmem_cache_zalloc( 785 btrfs_free_space_bitmap_cachep, GFP_NOFS); 786 if (!e->bitmap) { 787 kmem_cache_free( 788 btrfs_free_space_cachep, e); 789 goto free_cache; 790 } 791 spin_lock(&ctl->tree_lock); 792 ret = link_free_space(ctl, e); 793 ctl->total_bitmaps++; 794 ctl->op->recalc_thresholds(ctl); 795 spin_unlock(&ctl->tree_lock); 796 if (ret) { 797 btrfs_err(fs_info, 798 "Duplicate entries in free space cache, dumping"); 799 kmem_cache_free(btrfs_free_space_cachep, e); 800 goto free_cache; 801 } 802 list_add_tail(&e->list, &bitmaps); 803 } 804 805 num_entries--; 806 } 807 808 io_ctl_unmap_page(&io_ctl); 809 810 /* 811 * We add the bitmaps at the end of the entries in order that 812 * the bitmap entries are added to the cache. 813 */ 814 list_for_each_entry_safe(e, n, &bitmaps, list) { 815 list_del_init(&e->list); 816 ret = io_ctl_read_bitmap(&io_ctl, e); 817 if (ret) 818 goto free_cache; 819 e->bitmap_extents = count_bitmap_extents(ctl, e); 820 if (!btrfs_free_space_trimmed(e)) { 821 ctl->discardable_extents[BTRFS_STAT_CURR] += 822 e->bitmap_extents; 823 ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes; 824 } 825 } 826 827 io_ctl_drop_pages(&io_ctl); 828 merge_space_tree(ctl); 829 ret = 1; 830 out: 831 btrfs_discard_update_discardable(ctl->private, ctl); 832 io_ctl_free(&io_ctl); 833 return ret; 834 free_cache: 835 io_ctl_drop_pages(&io_ctl); 836 __btrfs_remove_free_space_cache(ctl); 837 goto out; 838 } 839 840 int load_free_space_cache(struct btrfs_block_group *block_group) 841 { 842 struct btrfs_fs_info *fs_info = block_group->fs_info; 843 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 844 struct inode *inode; 845 struct btrfs_path *path; 846 int ret = 0; 847 bool matched; 848 u64 used = block_group->used; 849 850 /* 851 * If this block group has been marked to be cleared for one reason or 852 * another then we can't trust the on disk cache, so just return. 853 */ 854 spin_lock(&block_group->lock); 855 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 856 spin_unlock(&block_group->lock); 857 return 0; 858 } 859 spin_unlock(&block_group->lock); 860 861 path = btrfs_alloc_path(); 862 if (!path) 863 return 0; 864 path->search_commit_root = 1; 865 path->skip_locking = 1; 866 867 /* 868 * We must pass a path with search_commit_root set to btrfs_iget in 869 * order to avoid a deadlock when allocating extents for the tree root. 870 * 871 * When we are COWing an extent buffer from the tree root, when looking 872 * for a free extent, at extent-tree.c:find_free_extent(), we can find 873 * block group without its free space cache loaded. When we find one 874 * we must load its space cache which requires reading its free space 875 * cache's inode item from the root tree. If this inode item is located 876 * in the same leaf that we started COWing before, then we end up in 877 * deadlock on the extent buffer (trying to read lock it when we 878 * previously write locked it). 879 * 880 * It's safe to read the inode item using the commit root because 881 * block groups, once loaded, stay in memory forever (until they are 882 * removed) as well as their space caches once loaded. New block groups 883 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 884 * we will never try to read their inode item while the fs is mounted. 885 */ 886 inode = lookup_free_space_inode(block_group, path); 887 if (IS_ERR(inode)) { 888 btrfs_free_path(path); 889 return 0; 890 } 891 892 /* We may have converted the inode and made the cache invalid. */ 893 spin_lock(&block_group->lock); 894 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 895 spin_unlock(&block_group->lock); 896 btrfs_free_path(path); 897 goto out; 898 } 899 spin_unlock(&block_group->lock); 900 901 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 902 path, block_group->start); 903 btrfs_free_path(path); 904 if (ret <= 0) 905 goto out; 906 907 spin_lock(&ctl->tree_lock); 908 matched = (ctl->free_space == (block_group->length - used - 909 block_group->bytes_super)); 910 spin_unlock(&ctl->tree_lock); 911 912 if (!matched) { 913 __btrfs_remove_free_space_cache(ctl); 914 btrfs_warn(fs_info, 915 "block group %llu has wrong amount of free space", 916 block_group->start); 917 ret = -1; 918 } 919 out: 920 if (ret < 0) { 921 /* This cache is bogus, make sure it gets cleared */ 922 spin_lock(&block_group->lock); 923 block_group->disk_cache_state = BTRFS_DC_CLEAR; 924 spin_unlock(&block_group->lock); 925 ret = 0; 926 927 btrfs_warn(fs_info, 928 "failed to load free space cache for block group %llu, rebuilding it now", 929 block_group->start); 930 } 931 932 iput(inode); 933 return ret; 934 } 935 936 static noinline_for_stack 937 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 938 struct btrfs_free_space_ctl *ctl, 939 struct btrfs_block_group *block_group, 940 int *entries, int *bitmaps, 941 struct list_head *bitmap_list) 942 { 943 int ret; 944 struct btrfs_free_cluster *cluster = NULL; 945 struct btrfs_free_cluster *cluster_locked = NULL; 946 struct rb_node *node = rb_first(&ctl->free_space_offset); 947 struct btrfs_trim_range *trim_entry; 948 949 /* Get the cluster for this block_group if it exists */ 950 if (block_group && !list_empty(&block_group->cluster_list)) { 951 cluster = list_entry(block_group->cluster_list.next, 952 struct btrfs_free_cluster, 953 block_group_list); 954 } 955 956 if (!node && cluster) { 957 cluster_locked = cluster; 958 spin_lock(&cluster_locked->lock); 959 node = rb_first(&cluster->root); 960 cluster = NULL; 961 } 962 963 /* Write out the extent entries */ 964 while (node) { 965 struct btrfs_free_space *e; 966 967 e = rb_entry(node, struct btrfs_free_space, offset_index); 968 *entries += 1; 969 970 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 971 e->bitmap); 972 if (ret) 973 goto fail; 974 975 if (e->bitmap) { 976 list_add_tail(&e->list, bitmap_list); 977 *bitmaps += 1; 978 } 979 node = rb_next(node); 980 if (!node && cluster) { 981 node = rb_first(&cluster->root); 982 cluster_locked = cluster; 983 spin_lock(&cluster_locked->lock); 984 cluster = NULL; 985 } 986 } 987 if (cluster_locked) { 988 spin_unlock(&cluster_locked->lock); 989 cluster_locked = NULL; 990 } 991 992 /* 993 * Make sure we don't miss any range that was removed from our rbtree 994 * because trimming is running. Otherwise after a umount+mount (or crash 995 * after committing the transaction) we would leak free space and get 996 * an inconsistent free space cache report from fsck. 997 */ 998 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 999 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1000 trim_entry->bytes, NULL); 1001 if (ret) 1002 goto fail; 1003 *entries += 1; 1004 } 1005 1006 return 0; 1007 fail: 1008 if (cluster_locked) 1009 spin_unlock(&cluster_locked->lock); 1010 return -ENOSPC; 1011 } 1012 1013 static noinline_for_stack int 1014 update_cache_item(struct btrfs_trans_handle *trans, 1015 struct btrfs_root *root, 1016 struct inode *inode, 1017 struct btrfs_path *path, u64 offset, 1018 int entries, int bitmaps) 1019 { 1020 struct btrfs_key key; 1021 struct btrfs_free_space_header *header; 1022 struct extent_buffer *leaf; 1023 int ret; 1024 1025 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1026 key.offset = offset; 1027 key.type = 0; 1028 1029 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1030 if (ret < 0) { 1031 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1032 EXTENT_DELALLOC, 0, 0, NULL); 1033 goto fail; 1034 } 1035 leaf = path->nodes[0]; 1036 if (ret > 0) { 1037 struct btrfs_key found_key; 1038 ASSERT(path->slots[0]); 1039 path->slots[0]--; 1040 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1041 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1042 found_key.offset != offset) { 1043 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1044 inode->i_size - 1, EXTENT_DELALLOC, 0, 1045 0, NULL); 1046 btrfs_release_path(path); 1047 goto fail; 1048 } 1049 } 1050 1051 BTRFS_I(inode)->generation = trans->transid; 1052 header = btrfs_item_ptr(leaf, path->slots[0], 1053 struct btrfs_free_space_header); 1054 btrfs_set_free_space_entries(leaf, header, entries); 1055 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1056 btrfs_set_free_space_generation(leaf, header, trans->transid); 1057 btrfs_mark_buffer_dirty(leaf); 1058 btrfs_release_path(path); 1059 1060 return 0; 1061 1062 fail: 1063 return -1; 1064 } 1065 1066 static noinline_for_stack int write_pinned_extent_entries( 1067 struct btrfs_trans_handle *trans, 1068 struct btrfs_block_group *block_group, 1069 struct btrfs_io_ctl *io_ctl, 1070 int *entries) 1071 { 1072 u64 start, extent_start, extent_end, len; 1073 struct extent_io_tree *unpin = NULL; 1074 int ret; 1075 1076 if (!block_group) 1077 return 0; 1078 1079 /* 1080 * We want to add any pinned extents to our free space cache 1081 * so we don't leak the space 1082 * 1083 * We shouldn't have switched the pinned extents yet so this is the 1084 * right one 1085 */ 1086 unpin = &trans->transaction->pinned_extents; 1087 1088 start = block_group->start; 1089 1090 while (start < block_group->start + block_group->length) { 1091 ret = find_first_extent_bit(unpin, start, 1092 &extent_start, &extent_end, 1093 EXTENT_DIRTY, NULL); 1094 if (ret) 1095 return 0; 1096 1097 /* This pinned extent is out of our range */ 1098 if (extent_start >= block_group->start + block_group->length) 1099 return 0; 1100 1101 extent_start = max(extent_start, start); 1102 extent_end = min(block_group->start + block_group->length, 1103 extent_end + 1); 1104 len = extent_end - extent_start; 1105 1106 *entries += 1; 1107 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1108 if (ret) 1109 return -ENOSPC; 1110 1111 start = extent_end; 1112 } 1113 1114 return 0; 1115 } 1116 1117 static noinline_for_stack int 1118 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1119 { 1120 struct btrfs_free_space *entry, *next; 1121 int ret; 1122 1123 /* Write out the bitmaps */ 1124 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1125 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1126 if (ret) 1127 return -ENOSPC; 1128 list_del_init(&entry->list); 1129 } 1130 1131 return 0; 1132 } 1133 1134 static int flush_dirty_cache(struct inode *inode) 1135 { 1136 int ret; 1137 1138 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1139 if (ret) 1140 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1141 EXTENT_DELALLOC, 0, 0, NULL); 1142 1143 return ret; 1144 } 1145 1146 static void noinline_for_stack 1147 cleanup_bitmap_list(struct list_head *bitmap_list) 1148 { 1149 struct btrfs_free_space *entry, *next; 1150 1151 list_for_each_entry_safe(entry, next, bitmap_list, list) 1152 list_del_init(&entry->list); 1153 } 1154 1155 static void noinline_for_stack 1156 cleanup_write_cache_enospc(struct inode *inode, 1157 struct btrfs_io_ctl *io_ctl, 1158 struct extent_state **cached_state) 1159 { 1160 io_ctl_drop_pages(io_ctl); 1161 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1162 i_size_read(inode) - 1, cached_state); 1163 } 1164 1165 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1166 struct btrfs_trans_handle *trans, 1167 struct btrfs_block_group *block_group, 1168 struct btrfs_io_ctl *io_ctl, 1169 struct btrfs_path *path, u64 offset) 1170 { 1171 int ret; 1172 struct inode *inode = io_ctl->inode; 1173 1174 if (!inode) 1175 return 0; 1176 1177 /* Flush the dirty pages in the cache file. */ 1178 ret = flush_dirty_cache(inode); 1179 if (ret) 1180 goto out; 1181 1182 /* Update the cache item to tell everyone this cache file is valid. */ 1183 ret = update_cache_item(trans, root, inode, path, offset, 1184 io_ctl->entries, io_ctl->bitmaps); 1185 out: 1186 if (ret) { 1187 invalidate_inode_pages2(inode->i_mapping); 1188 BTRFS_I(inode)->generation = 0; 1189 if (block_group) 1190 btrfs_debug(root->fs_info, 1191 "failed to write free space cache for block group %llu error %d", 1192 block_group->start, ret); 1193 } 1194 btrfs_update_inode(trans, root, inode); 1195 1196 if (block_group) { 1197 /* the dirty list is protected by the dirty_bgs_lock */ 1198 spin_lock(&trans->transaction->dirty_bgs_lock); 1199 1200 /* the disk_cache_state is protected by the block group lock */ 1201 spin_lock(&block_group->lock); 1202 1203 /* 1204 * only mark this as written if we didn't get put back on 1205 * the dirty list while waiting for IO. Otherwise our 1206 * cache state won't be right, and we won't get written again 1207 */ 1208 if (!ret && list_empty(&block_group->dirty_list)) 1209 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1210 else if (ret) 1211 block_group->disk_cache_state = BTRFS_DC_ERROR; 1212 1213 spin_unlock(&block_group->lock); 1214 spin_unlock(&trans->transaction->dirty_bgs_lock); 1215 io_ctl->inode = NULL; 1216 iput(inode); 1217 } 1218 1219 return ret; 1220 1221 } 1222 1223 static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1224 struct btrfs_trans_handle *trans, 1225 struct btrfs_io_ctl *io_ctl, 1226 struct btrfs_path *path) 1227 { 1228 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1229 } 1230 1231 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1232 struct btrfs_block_group *block_group, 1233 struct btrfs_path *path) 1234 { 1235 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1236 block_group, &block_group->io_ctl, 1237 path, block_group->start); 1238 } 1239 1240 /** 1241 * __btrfs_write_out_cache - write out cached info to an inode 1242 * @root - the root the inode belongs to 1243 * @ctl - the free space cache we are going to write out 1244 * @block_group - the block_group for this cache if it belongs to a block_group 1245 * @trans - the trans handle 1246 * 1247 * This function writes out a free space cache struct to disk for quick recovery 1248 * on mount. This will return 0 if it was successful in writing the cache out, 1249 * or an errno if it was not. 1250 */ 1251 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1252 struct btrfs_free_space_ctl *ctl, 1253 struct btrfs_block_group *block_group, 1254 struct btrfs_io_ctl *io_ctl, 1255 struct btrfs_trans_handle *trans) 1256 { 1257 struct extent_state *cached_state = NULL; 1258 LIST_HEAD(bitmap_list); 1259 int entries = 0; 1260 int bitmaps = 0; 1261 int ret; 1262 int must_iput = 0; 1263 1264 if (!i_size_read(inode)) 1265 return -EIO; 1266 1267 WARN_ON(io_ctl->pages); 1268 ret = io_ctl_init(io_ctl, inode, 1); 1269 if (ret) 1270 return ret; 1271 1272 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1273 down_write(&block_group->data_rwsem); 1274 spin_lock(&block_group->lock); 1275 if (block_group->delalloc_bytes) { 1276 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1277 spin_unlock(&block_group->lock); 1278 up_write(&block_group->data_rwsem); 1279 BTRFS_I(inode)->generation = 0; 1280 ret = 0; 1281 must_iput = 1; 1282 goto out; 1283 } 1284 spin_unlock(&block_group->lock); 1285 } 1286 1287 /* Lock all pages first so we can lock the extent safely. */ 1288 ret = io_ctl_prepare_pages(io_ctl, false); 1289 if (ret) 1290 goto out_unlock; 1291 1292 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1293 &cached_state); 1294 1295 io_ctl_set_generation(io_ctl, trans->transid); 1296 1297 mutex_lock(&ctl->cache_writeout_mutex); 1298 /* Write out the extent entries in the free space cache */ 1299 spin_lock(&ctl->tree_lock); 1300 ret = write_cache_extent_entries(io_ctl, ctl, 1301 block_group, &entries, &bitmaps, 1302 &bitmap_list); 1303 if (ret) 1304 goto out_nospc_locked; 1305 1306 /* 1307 * Some spaces that are freed in the current transaction are pinned, 1308 * they will be added into free space cache after the transaction is 1309 * committed, we shouldn't lose them. 1310 * 1311 * If this changes while we are working we'll get added back to 1312 * the dirty list and redo it. No locking needed 1313 */ 1314 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1315 if (ret) 1316 goto out_nospc_locked; 1317 1318 /* 1319 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1320 * locked while doing it because a concurrent trim can be manipulating 1321 * or freeing the bitmap. 1322 */ 1323 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1324 spin_unlock(&ctl->tree_lock); 1325 mutex_unlock(&ctl->cache_writeout_mutex); 1326 if (ret) 1327 goto out_nospc; 1328 1329 /* Zero out the rest of the pages just to make sure */ 1330 io_ctl_zero_remaining_pages(io_ctl); 1331 1332 /* Everything is written out, now we dirty the pages in the file. */ 1333 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1334 io_ctl->num_pages, 0, i_size_read(inode), 1335 &cached_state); 1336 if (ret) 1337 goto out_nospc; 1338 1339 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1340 up_write(&block_group->data_rwsem); 1341 /* 1342 * Release the pages and unlock the extent, we will flush 1343 * them out later 1344 */ 1345 io_ctl_drop_pages(io_ctl); 1346 io_ctl_free(io_ctl); 1347 1348 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1349 i_size_read(inode) - 1, &cached_state); 1350 1351 /* 1352 * at this point the pages are under IO and we're happy, 1353 * The caller is responsible for waiting on them and updating 1354 * the cache and the inode 1355 */ 1356 io_ctl->entries = entries; 1357 io_ctl->bitmaps = bitmaps; 1358 1359 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1360 if (ret) 1361 goto out; 1362 1363 return 0; 1364 1365 out_nospc_locked: 1366 cleanup_bitmap_list(&bitmap_list); 1367 spin_unlock(&ctl->tree_lock); 1368 mutex_unlock(&ctl->cache_writeout_mutex); 1369 1370 out_nospc: 1371 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1372 1373 out_unlock: 1374 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1375 up_write(&block_group->data_rwsem); 1376 1377 out: 1378 io_ctl->inode = NULL; 1379 io_ctl_free(io_ctl); 1380 if (ret) { 1381 invalidate_inode_pages2(inode->i_mapping); 1382 BTRFS_I(inode)->generation = 0; 1383 } 1384 btrfs_update_inode(trans, root, inode); 1385 if (must_iput) 1386 iput(inode); 1387 return ret; 1388 } 1389 1390 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1391 struct btrfs_block_group *block_group, 1392 struct btrfs_path *path) 1393 { 1394 struct btrfs_fs_info *fs_info = trans->fs_info; 1395 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1396 struct inode *inode; 1397 int ret = 0; 1398 1399 spin_lock(&block_group->lock); 1400 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1401 spin_unlock(&block_group->lock); 1402 return 0; 1403 } 1404 spin_unlock(&block_group->lock); 1405 1406 inode = lookup_free_space_inode(block_group, path); 1407 if (IS_ERR(inode)) 1408 return 0; 1409 1410 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1411 block_group, &block_group->io_ctl, trans); 1412 if (ret) { 1413 btrfs_debug(fs_info, 1414 "failed to write free space cache for block group %llu error %d", 1415 block_group->start, ret); 1416 spin_lock(&block_group->lock); 1417 block_group->disk_cache_state = BTRFS_DC_ERROR; 1418 spin_unlock(&block_group->lock); 1419 1420 block_group->io_ctl.inode = NULL; 1421 iput(inode); 1422 } 1423 1424 /* 1425 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1426 * to wait for IO and put the inode 1427 */ 1428 1429 return ret; 1430 } 1431 1432 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1433 u64 offset) 1434 { 1435 ASSERT(offset >= bitmap_start); 1436 offset -= bitmap_start; 1437 return (unsigned long)(div_u64(offset, unit)); 1438 } 1439 1440 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1441 { 1442 return (unsigned long)(div_u64(bytes, unit)); 1443 } 1444 1445 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1446 u64 offset) 1447 { 1448 u64 bitmap_start; 1449 u64 bytes_per_bitmap; 1450 1451 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1452 bitmap_start = offset - ctl->start; 1453 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1454 bitmap_start *= bytes_per_bitmap; 1455 bitmap_start += ctl->start; 1456 1457 return bitmap_start; 1458 } 1459 1460 static int tree_insert_offset(struct rb_root *root, u64 offset, 1461 struct rb_node *node, int bitmap) 1462 { 1463 struct rb_node **p = &root->rb_node; 1464 struct rb_node *parent = NULL; 1465 struct btrfs_free_space *info; 1466 1467 while (*p) { 1468 parent = *p; 1469 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1470 1471 if (offset < info->offset) { 1472 p = &(*p)->rb_left; 1473 } else if (offset > info->offset) { 1474 p = &(*p)->rb_right; 1475 } else { 1476 /* 1477 * we could have a bitmap entry and an extent entry 1478 * share the same offset. If this is the case, we want 1479 * the extent entry to always be found first if we do a 1480 * linear search through the tree, since we want to have 1481 * the quickest allocation time, and allocating from an 1482 * extent is faster than allocating from a bitmap. So 1483 * if we're inserting a bitmap and we find an entry at 1484 * this offset, we want to go right, or after this entry 1485 * logically. If we are inserting an extent and we've 1486 * found a bitmap, we want to go left, or before 1487 * logically. 1488 */ 1489 if (bitmap) { 1490 if (info->bitmap) { 1491 WARN_ON_ONCE(1); 1492 return -EEXIST; 1493 } 1494 p = &(*p)->rb_right; 1495 } else { 1496 if (!info->bitmap) { 1497 WARN_ON_ONCE(1); 1498 return -EEXIST; 1499 } 1500 p = &(*p)->rb_left; 1501 } 1502 } 1503 } 1504 1505 rb_link_node(node, parent, p); 1506 rb_insert_color(node, root); 1507 1508 return 0; 1509 } 1510 1511 /* 1512 * searches the tree for the given offset. 1513 * 1514 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1515 * want a section that has at least bytes size and comes at or after the given 1516 * offset. 1517 */ 1518 static struct btrfs_free_space * 1519 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1520 u64 offset, int bitmap_only, int fuzzy) 1521 { 1522 struct rb_node *n = ctl->free_space_offset.rb_node; 1523 struct btrfs_free_space *entry, *prev = NULL; 1524 1525 /* find entry that is closest to the 'offset' */ 1526 while (1) { 1527 if (!n) { 1528 entry = NULL; 1529 break; 1530 } 1531 1532 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1533 prev = entry; 1534 1535 if (offset < entry->offset) 1536 n = n->rb_left; 1537 else if (offset > entry->offset) 1538 n = n->rb_right; 1539 else 1540 break; 1541 } 1542 1543 if (bitmap_only) { 1544 if (!entry) 1545 return NULL; 1546 if (entry->bitmap) 1547 return entry; 1548 1549 /* 1550 * bitmap entry and extent entry may share same offset, 1551 * in that case, bitmap entry comes after extent entry. 1552 */ 1553 n = rb_next(n); 1554 if (!n) 1555 return NULL; 1556 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1557 if (entry->offset != offset) 1558 return NULL; 1559 1560 WARN_ON(!entry->bitmap); 1561 return entry; 1562 } else if (entry) { 1563 if (entry->bitmap) { 1564 /* 1565 * if previous extent entry covers the offset, 1566 * we should return it instead of the bitmap entry 1567 */ 1568 n = rb_prev(&entry->offset_index); 1569 if (n) { 1570 prev = rb_entry(n, struct btrfs_free_space, 1571 offset_index); 1572 if (!prev->bitmap && 1573 prev->offset + prev->bytes > offset) 1574 entry = prev; 1575 } 1576 } 1577 return entry; 1578 } 1579 1580 if (!prev) 1581 return NULL; 1582 1583 /* find last entry before the 'offset' */ 1584 entry = prev; 1585 if (entry->offset > offset) { 1586 n = rb_prev(&entry->offset_index); 1587 if (n) { 1588 entry = rb_entry(n, struct btrfs_free_space, 1589 offset_index); 1590 ASSERT(entry->offset <= offset); 1591 } else { 1592 if (fuzzy) 1593 return entry; 1594 else 1595 return NULL; 1596 } 1597 } 1598 1599 if (entry->bitmap) { 1600 n = rb_prev(&entry->offset_index); 1601 if (n) { 1602 prev = rb_entry(n, struct btrfs_free_space, 1603 offset_index); 1604 if (!prev->bitmap && 1605 prev->offset + prev->bytes > offset) 1606 return prev; 1607 } 1608 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1609 return entry; 1610 } else if (entry->offset + entry->bytes > offset) 1611 return entry; 1612 1613 if (!fuzzy) 1614 return NULL; 1615 1616 while (1) { 1617 if (entry->bitmap) { 1618 if (entry->offset + BITS_PER_BITMAP * 1619 ctl->unit > offset) 1620 break; 1621 } else { 1622 if (entry->offset + entry->bytes > offset) 1623 break; 1624 } 1625 1626 n = rb_next(&entry->offset_index); 1627 if (!n) 1628 return NULL; 1629 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1630 } 1631 return entry; 1632 } 1633 1634 static inline void 1635 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1636 struct btrfs_free_space *info) 1637 { 1638 rb_erase(&info->offset_index, &ctl->free_space_offset); 1639 ctl->free_extents--; 1640 1641 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1642 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1643 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1644 } 1645 } 1646 1647 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1648 struct btrfs_free_space *info) 1649 { 1650 __unlink_free_space(ctl, info); 1651 ctl->free_space -= info->bytes; 1652 } 1653 1654 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1655 struct btrfs_free_space *info) 1656 { 1657 int ret = 0; 1658 1659 ASSERT(info->bytes || info->bitmap); 1660 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1661 &info->offset_index, (info->bitmap != NULL)); 1662 if (ret) 1663 return ret; 1664 1665 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1666 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1667 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1668 } 1669 1670 ctl->free_space += info->bytes; 1671 ctl->free_extents++; 1672 return ret; 1673 } 1674 1675 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1676 { 1677 struct btrfs_block_group *block_group = ctl->private; 1678 u64 max_bytes; 1679 u64 bitmap_bytes; 1680 u64 extent_bytes; 1681 u64 size = block_group->length; 1682 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1683 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1684 1685 max_bitmaps = max_t(u64, max_bitmaps, 1); 1686 1687 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1688 1689 /* 1690 * We are trying to keep the total amount of memory used per 1GiB of 1691 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 1692 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 1693 * bitmaps, we may end up using more memory than this. 1694 */ 1695 if (size < SZ_1G) 1696 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1697 else 1698 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1699 1700 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 1701 1702 /* 1703 * we want the extent entry threshold to always be at most 1/2 the max 1704 * bytes we can have, or whatever is less than that. 1705 */ 1706 extent_bytes = max_bytes - bitmap_bytes; 1707 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1708 1709 ctl->extents_thresh = 1710 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1711 } 1712 1713 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1714 struct btrfs_free_space *info, 1715 u64 offset, u64 bytes) 1716 { 1717 unsigned long start, count, end; 1718 int extent_delta = -1; 1719 1720 start = offset_to_bit(info->offset, ctl->unit, offset); 1721 count = bytes_to_bits(bytes, ctl->unit); 1722 end = start + count; 1723 ASSERT(end <= BITS_PER_BITMAP); 1724 1725 bitmap_clear(info->bitmap, start, count); 1726 1727 info->bytes -= bytes; 1728 if (info->max_extent_size > ctl->unit) 1729 info->max_extent_size = 0; 1730 1731 if (start && test_bit(start - 1, info->bitmap)) 1732 extent_delta++; 1733 1734 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1735 extent_delta++; 1736 1737 info->bitmap_extents += extent_delta; 1738 if (!btrfs_free_space_trimmed(info)) { 1739 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1740 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1741 } 1742 } 1743 1744 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1745 struct btrfs_free_space *info, u64 offset, 1746 u64 bytes) 1747 { 1748 __bitmap_clear_bits(ctl, info, offset, bytes); 1749 ctl->free_space -= bytes; 1750 } 1751 1752 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1753 struct btrfs_free_space *info, u64 offset, 1754 u64 bytes) 1755 { 1756 unsigned long start, count, end; 1757 int extent_delta = 1; 1758 1759 start = offset_to_bit(info->offset, ctl->unit, offset); 1760 count = bytes_to_bits(bytes, ctl->unit); 1761 end = start + count; 1762 ASSERT(end <= BITS_PER_BITMAP); 1763 1764 bitmap_set(info->bitmap, start, count); 1765 1766 info->bytes += bytes; 1767 ctl->free_space += bytes; 1768 1769 if (start && test_bit(start - 1, info->bitmap)) 1770 extent_delta--; 1771 1772 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1773 extent_delta--; 1774 1775 info->bitmap_extents += extent_delta; 1776 if (!btrfs_free_space_trimmed(info)) { 1777 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1778 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1779 } 1780 } 1781 1782 /* 1783 * If we can not find suitable extent, we will use bytes to record 1784 * the size of the max extent. 1785 */ 1786 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1787 struct btrfs_free_space *bitmap_info, u64 *offset, 1788 u64 *bytes, bool for_alloc) 1789 { 1790 unsigned long found_bits = 0; 1791 unsigned long max_bits = 0; 1792 unsigned long bits, i; 1793 unsigned long next_zero; 1794 unsigned long extent_bits; 1795 1796 /* 1797 * Skip searching the bitmap if we don't have a contiguous section that 1798 * is large enough for this allocation. 1799 */ 1800 if (for_alloc && 1801 bitmap_info->max_extent_size && 1802 bitmap_info->max_extent_size < *bytes) { 1803 *bytes = bitmap_info->max_extent_size; 1804 return -1; 1805 } 1806 1807 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1808 max_t(u64, *offset, bitmap_info->offset)); 1809 bits = bytes_to_bits(*bytes, ctl->unit); 1810 1811 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1812 if (for_alloc && bits == 1) { 1813 found_bits = 1; 1814 break; 1815 } 1816 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1817 BITS_PER_BITMAP, i); 1818 extent_bits = next_zero - i; 1819 if (extent_bits >= bits) { 1820 found_bits = extent_bits; 1821 break; 1822 } else if (extent_bits > max_bits) { 1823 max_bits = extent_bits; 1824 } 1825 i = next_zero; 1826 } 1827 1828 if (found_bits) { 1829 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1830 *bytes = (u64)(found_bits) * ctl->unit; 1831 return 0; 1832 } 1833 1834 *bytes = (u64)(max_bits) * ctl->unit; 1835 bitmap_info->max_extent_size = *bytes; 1836 return -1; 1837 } 1838 1839 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1840 { 1841 if (entry->bitmap) 1842 return entry->max_extent_size; 1843 return entry->bytes; 1844 } 1845 1846 /* Cache the size of the max extent in bytes */ 1847 static struct btrfs_free_space * 1848 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1849 unsigned long align, u64 *max_extent_size) 1850 { 1851 struct btrfs_free_space *entry; 1852 struct rb_node *node; 1853 u64 tmp; 1854 u64 align_off; 1855 int ret; 1856 1857 if (!ctl->free_space_offset.rb_node) 1858 goto out; 1859 1860 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1861 if (!entry) 1862 goto out; 1863 1864 for (node = &entry->offset_index; node; node = rb_next(node)) { 1865 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1866 if (entry->bytes < *bytes) { 1867 *max_extent_size = max(get_max_extent_size(entry), 1868 *max_extent_size); 1869 continue; 1870 } 1871 1872 /* make sure the space returned is big enough 1873 * to match our requested alignment 1874 */ 1875 if (*bytes >= align) { 1876 tmp = entry->offset - ctl->start + align - 1; 1877 tmp = div64_u64(tmp, align); 1878 tmp = tmp * align + ctl->start; 1879 align_off = tmp - entry->offset; 1880 } else { 1881 align_off = 0; 1882 tmp = entry->offset; 1883 } 1884 1885 if (entry->bytes < *bytes + align_off) { 1886 *max_extent_size = max(get_max_extent_size(entry), 1887 *max_extent_size); 1888 continue; 1889 } 1890 1891 if (entry->bitmap) { 1892 u64 size = *bytes; 1893 1894 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1895 if (!ret) { 1896 *offset = tmp; 1897 *bytes = size; 1898 return entry; 1899 } else { 1900 *max_extent_size = 1901 max(get_max_extent_size(entry), 1902 *max_extent_size); 1903 } 1904 continue; 1905 } 1906 1907 *offset = tmp; 1908 *bytes = entry->bytes - align_off; 1909 return entry; 1910 } 1911 out: 1912 return NULL; 1913 } 1914 1915 static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, 1916 struct btrfs_free_space *bitmap_info) 1917 { 1918 struct btrfs_block_group *block_group = ctl->private; 1919 u64 bytes = bitmap_info->bytes; 1920 unsigned int rs, re; 1921 int count = 0; 1922 1923 if (!block_group || !bytes) 1924 return count; 1925 1926 bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0, 1927 BITS_PER_BITMAP) { 1928 bytes -= (rs - re) * ctl->unit; 1929 count++; 1930 1931 if (!bytes) 1932 break; 1933 } 1934 1935 return count; 1936 } 1937 1938 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1939 struct btrfs_free_space *info, u64 offset) 1940 { 1941 info->offset = offset_to_bitmap(ctl, offset); 1942 info->bytes = 0; 1943 info->bitmap_extents = 0; 1944 INIT_LIST_HEAD(&info->list); 1945 link_free_space(ctl, info); 1946 ctl->total_bitmaps++; 1947 1948 ctl->op->recalc_thresholds(ctl); 1949 } 1950 1951 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1952 struct btrfs_free_space *bitmap_info) 1953 { 1954 /* 1955 * Normally when this is called, the bitmap is completely empty. However, 1956 * if we are blowing up the free space cache for one reason or another 1957 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1958 * we may leave stats on the table. 1959 */ 1960 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1961 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1962 bitmap_info->bitmap_extents; 1963 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1964 1965 } 1966 unlink_free_space(ctl, bitmap_info); 1967 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1968 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1969 ctl->total_bitmaps--; 1970 ctl->op->recalc_thresholds(ctl); 1971 } 1972 1973 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1974 struct btrfs_free_space *bitmap_info, 1975 u64 *offset, u64 *bytes) 1976 { 1977 u64 end; 1978 u64 search_start, search_bytes; 1979 int ret; 1980 1981 again: 1982 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1983 1984 /* 1985 * We need to search for bits in this bitmap. We could only cover some 1986 * of the extent in this bitmap thanks to how we add space, so we need 1987 * to search for as much as it as we can and clear that amount, and then 1988 * go searching for the next bit. 1989 */ 1990 search_start = *offset; 1991 search_bytes = ctl->unit; 1992 search_bytes = min(search_bytes, end - search_start + 1); 1993 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1994 false); 1995 if (ret < 0 || search_start != *offset) 1996 return -EINVAL; 1997 1998 /* We may have found more bits than what we need */ 1999 search_bytes = min(search_bytes, *bytes); 2000 2001 /* Cannot clear past the end of the bitmap */ 2002 search_bytes = min(search_bytes, end - search_start + 1); 2003 2004 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2005 *offset += search_bytes; 2006 *bytes -= search_bytes; 2007 2008 if (*bytes) { 2009 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2010 if (!bitmap_info->bytes) 2011 free_bitmap(ctl, bitmap_info); 2012 2013 /* 2014 * no entry after this bitmap, but we still have bytes to 2015 * remove, so something has gone wrong. 2016 */ 2017 if (!next) 2018 return -EINVAL; 2019 2020 bitmap_info = rb_entry(next, struct btrfs_free_space, 2021 offset_index); 2022 2023 /* 2024 * if the next entry isn't a bitmap we need to return to let the 2025 * extent stuff do its work. 2026 */ 2027 if (!bitmap_info->bitmap) 2028 return -EAGAIN; 2029 2030 /* 2031 * Ok the next item is a bitmap, but it may not actually hold 2032 * the information for the rest of this free space stuff, so 2033 * look for it, and if we don't find it return so we can try 2034 * everything over again. 2035 */ 2036 search_start = *offset; 2037 search_bytes = ctl->unit; 2038 ret = search_bitmap(ctl, bitmap_info, &search_start, 2039 &search_bytes, false); 2040 if (ret < 0 || search_start != *offset) 2041 return -EAGAIN; 2042 2043 goto again; 2044 } else if (!bitmap_info->bytes) 2045 free_bitmap(ctl, bitmap_info); 2046 2047 return 0; 2048 } 2049 2050 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2051 struct btrfs_free_space *info, u64 offset, 2052 u64 bytes, enum btrfs_trim_state trim_state) 2053 { 2054 u64 bytes_to_set = 0; 2055 u64 end; 2056 2057 /* 2058 * This is a tradeoff to make bitmap trim state minimal. We mark the 2059 * whole bitmap untrimmed if at any point we add untrimmed regions. 2060 */ 2061 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2062 if (btrfs_free_space_trimmed(info)) { 2063 ctl->discardable_extents[BTRFS_STAT_CURR] += 2064 info->bitmap_extents; 2065 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2066 } 2067 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2068 } 2069 2070 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2071 2072 bytes_to_set = min(end - offset, bytes); 2073 2074 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2075 2076 /* 2077 * We set some bytes, we have no idea what the max extent size is 2078 * anymore. 2079 */ 2080 info->max_extent_size = 0; 2081 2082 return bytes_to_set; 2083 2084 } 2085 2086 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2087 struct btrfs_free_space *info) 2088 { 2089 struct btrfs_block_group *block_group = ctl->private; 2090 struct btrfs_fs_info *fs_info = block_group->fs_info; 2091 bool forced = false; 2092 2093 #ifdef CONFIG_BTRFS_DEBUG 2094 if (btrfs_should_fragment_free_space(block_group)) 2095 forced = true; 2096 #endif 2097 2098 /* This is a way to reclaim large regions from the bitmaps. */ 2099 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2100 return false; 2101 2102 /* 2103 * If we are below the extents threshold then we can add this as an 2104 * extent, and don't have to deal with the bitmap 2105 */ 2106 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2107 /* 2108 * If this block group has some small extents we don't want to 2109 * use up all of our free slots in the cache with them, we want 2110 * to reserve them to larger extents, however if we have plenty 2111 * of cache left then go ahead an dadd them, no sense in adding 2112 * the overhead of a bitmap if we don't have to. 2113 */ 2114 if (info->bytes <= fs_info->sectorsize * 8) { 2115 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2116 return false; 2117 } else { 2118 return false; 2119 } 2120 } 2121 2122 /* 2123 * The original block groups from mkfs can be really small, like 8 2124 * megabytes, so don't bother with a bitmap for those entries. However 2125 * some block groups can be smaller than what a bitmap would cover but 2126 * are still large enough that they could overflow the 32k memory limit, 2127 * so allow those block groups to still be allowed to have a bitmap 2128 * entry. 2129 */ 2130 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2131 return false; 2132 2133 return true; 2134 } 2135 2136 static const struct btrfs_free_space_op free_space_op = { 2137 .recalc_thresholds = recalculate_thresholds, 2138 .use_bitmap = use_bitmap, 2139 }; 2140 2141 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2142 struct btrfs_free_space *info) 2143 { 2144 struct btrfs_free_space *bitmap_info; 2145 struct btrfs_block_group *block_group = NULL; 2146 int added = 0; 2147 u64 bytes, offset, bytes_added; 2148 enum btrfs_trim_state trim_state; 2149 int ret; 2150 2151 bytes = info->bytes; 2152 offset = info->offset; 2153 trim_state = info->trim_state; 2154 2155 if (!ctl->op->use_bitmap(ctl, info)) 2156 return 0; 2157 2158 if (ctl->op == &free_space_op) 2159 block_group = ctl->private; 2160 again: 2161 /* 2162 * Since we link bitmaps right into the cluster we need to see if we 2163 * have a cluster here, and if so and it has our bitmap we need to add 2164 * the free space to that bitmap. 2165 */ 2166 if (block_group && !list_empty(&block_group->cluster_list)) { 2167 struct btrfs_free_cluster *cluster; 2168 struct rb_node *node; 2169 struct btrfs_free_space *entry; 2170 2171 cluster = list_entry(block_group->cluster_list.next, 2172 struct btrfs_free_cluster, 2173 block_group_list); 2174 spin_lock(&cluster->lock); 2175 node = rb_first(&cluster->root); 2176 if (!node) { 2177 spin_unlock(&cluster->lock); 2178 goto no_cluster_bitmap; 2179 } 2180 2181 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2182 if (!entry->bitmap) { 2183 spin_unlock(&cluster->lock); 2184 goto no_cluster_bitmap; 2185 } 2186 2187 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2188 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2189 bytes, trim_state); 2190 bytes -= bytes_added; 2191 offset += bytes_added; 2192 } 2193 spin_unlock(&cluster->lock); 2194 if (!bytes) { 2195 ret = 1; 2196 goto out; 2197 } 2198 } 2199 2200 no_cluster_bitmap: 2201 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2202 1, 0); 2203 if (!bitmap_info) { 2204 ASSERT(added == 0); 2205 goto new_bitmap; 2206 } 2207 2208 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2209 trim_state); 2210 bytes -= bytes_added; 2211 offset += bytes_added; 2212 added = 0; 2213 2214 if (!bytes) { 2215 ret = 1; 2216 goto out; 2217 } else 2218 goto again; 2219 2220 new_bitmap: 2221 if (info && info->bitmap) { 2222 add_new_bitmap(ctl, info, offset); 2223 added = 1; 2224 info = NULL; 2225 goto again; 2226 } else { 2227 spin_unlock(&ctl->tree_lock); 2228 2229 /* no pre-allocated info, allocate a new one */ 2230 if (!info) { 2231 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2232 GFP_NOFS); 2233 if (!info) { 2234 spin_lock(&ctl->tree_lock); 2235 ret = -ENOMEM; 2236 goto out; 2237 } 2238 } 2239 2240 /* allocate the bitmap */ 2241 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2242 GFP_NOFS); 2243 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2244 spin_lock(&ctl->tree_lock); 2245 if (!info->bitmap) { 2246 ret = -ENOMEM; 2247 goto out; 2248 } 2249 goto again; 2250 } 2251 2252 out: 2253 if (info) { 2254 if (info->bitmap) 2255 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2256 info->bitmap); 2257 kmem_cache_free(btrfs_free_space_cachep, info); 2258 } 2259 2260 return ret; 2261 } 2262 2263 /* 2264 * Free space merging rules: 2265 * 1) Merge trimmed areas together 2266 * 2) Let untrimmed areas coalesce with trimmed areas 2267 * 3) Always pull neighboring regions from bitmaps 2268 * 2269 * The above rules are for when we merge free space based on btrfs_trim_state. 2270 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2271 * same reason: to promote larger extent regions which makes life easier for 2272 * find_free_extent(). Rule 2 enables coalescing based on the common path 2273 * being returning free space from btrfs_finish_extent_commit(). So when free 2274 * space is trimmed, it will prevent aggregating trimmed new region and 2275 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2276 * and provide find_free_extent() with the largest extents possible hoping for 2277 * the reuse path. 2278 */ 2279 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2280 struct btrfs_free_space *info, bool update_stat) 2281 { 2282 struct btrfs_free_space *left_info = NULL; 2283 struct btrfs_free_space *right_info; 2284 bool merged = false; 2285 u64 offset = info->offset; 2286 u64 bytes = info->bytes; 2287 const bool is_trimmed = btrfs_free_space_trimmed(info); 2288 2289 /* 2290 * first we want to see if there is free space adjacent to the range we 2291 * are adding, if there is remove that struct and add a new one to 2292 * cover the entire range 2293 */ 2294 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2295 if (right_info && rb_prev(&right_info->offset_index)) 2296 left_info = rb_entry(rb_prev(&right_info->offset_index), 2297 struct btrfs_free_space, offset_index); 2298 else if (!right_info) 2299 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2300 2301 /* See try_merge_free_space() comment. */ 2302 if (right_info && !right_info->bitmap && 2303 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2304 if (update_stat) 2305 unlink_free_space(ctl, right_info); 2306 else 2307 __unlink_free_space(ctl, right_info); 2308 info->bytes += right_info->bytes; 2309 kmem_cache_free(btrfs_free_space_cachep, right_info); 2310 merged = true; 2311 } 2312 2313 /* See try_merge_free_space() comment. */ 2314 if (left_info && !left_info->bitmap && 2315 left_info->offset + left_info->bytes == offset && 2316 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2317 if (update_stat) 2318 unlink_free_space(ctl, left_info); 2319 else 2320 __unlink_free_space(ctl, left_info); 2321 info->offset = left_info->offset; 2322 info->bytes += left_info->bytes; 2323 kmem_cache_free(btrfs_free_space_cachep, left_info); 2324 merged = true; 2325 } 2326 2327 return merged; 2328 } 2329 2330 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2331 struct btrfs_free_space *info, 2332 bool update_stat) 2333 { 2334 struct btrfs_free_space *bitmap; 2335 unsigned long i; 2336 unsigned long j; 2337 const u64 end = info->offset + info->bytes; 2338 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2339 u64 bytes; 2340 2341 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2342 if (!bitmap) 2343 return false; 2344 2345 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2346 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2347 if (j == i) 2348 return false; 2349 bytes = (j - i) * ctl->unit; 2350 info->bytes += bytes; 2351 2352 /* See try_merge_free_space() comment. */ 2353 if (!btrfs_free_space_trimmed(bitmap)) 2354 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2355 2356 if (update_stat) 2357 bitmap_clear_bits(ctl, bitmap, end, bytes); 2358 else 2359 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2360 2361 if (!bitmap->bytes) 2362 free_bitmap(ctl, bitmap); 2363 2364 return true; 2365 } 2366 2367 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2368 struct btrfs_free_space *info, 2369 bool update_stat) 2370 { 2371 struct btrfs_free_space *bitmap; 2372 u64 bitmap_offset; 2373 unsigned long i; 2374 unsigned long j; 2375 unsigned long prev_j; 2376 u64 bytes; 2377 2378 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2379 /* If we're on a boundary, try the previous logical bitmap. */ 2380 if (bitmap_offset == info->offset) { 2381 if (info->offset == 0) 2382 return false; 2383 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2384 } 2385 2386 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2387 if (!bitmap) 2388 return false; 2389 2390 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2391 j = 0; 2392 prev_j = (unsigned long)-1; 2393 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2394 if (j > i) 2395 break; 2396 prev_j = j; 2397 } 2398 if (prev_j == i) 2399 return false; 2400 2401 if (prev_j == (unsigned long)-1) 2402 bytes = (i + 1) * ctl->unit; 2403 else 2404 bytes = (i - prev_j) * ctl->unit; 2405 2406 info->offset -= bytes; 2407 info->bytes += bytes; 2408 2409 /* See try_merge_free_space() comment. */ 2410 if (!btrfs_free_space_trimmed(bitmap)) 2411 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2412 2413 if (update_stat) 2414 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2415 else 2416 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2417 2418 if (!bitmap->bytes) 2419 free_bitmap(ctl, bitmap); 2420 2421 return true; 2422 } 2423 2424 /* 2425 * We prefer always to allocate from extent entries, both for clustered and 2426 * non-clustered allocation requests. So when attempting to add a new extent 2427 * entry, try to see if there's adjacent free space in bitmap entries, and if 2428 * there is, migrate that space from the bitmaps to the extent. 2429 * Like this we get better chances of satisfying space allocation requests 2430 * because we attempt to satisfy them based on a single cache entry, and never 2431 * on 2 or more entries - even if the entries represent a contiguous free space 2432 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2433 * ends). 2434 */ 2435 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2436 struct btrfs_free_space *info, 2437 bool update_stat) 2438 { 2439 /* 2440 * Only work with disconnected entries, as we can change their offset, 2441 * and must be extent entries. 2442 */ 2443 ASSERT(!info->bitmap); 2444 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2445 2446 if (ctl->total_bitmaps > 0) { 2447 bool stole_end; 2448 bool stole_front = false; 2449 2450 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2451 if (ctl->total_bitmaps > 0) 2452 stole_front = steal_from_bitmap_to_front(ctl, info, 2453 update_stat); 2454 2455 if (stole_end || stole_front) 2456 try_merge_free_space(ctl, info, update_stat); 2457 } 2458 } 2459 2460 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2461 struct btrfs_free_space_ctl *ctl, 2462 u64 offset, u64 bytes, 2463 enum btrfs_trim_state trim_state) 2464 { 2465 struct btrfs_block_group *block_group = ctl->private; 2466 struct btrfs_free_space *info; 2467 int ret = 0; 2468 u64 filter_bytes = bytes; 2469 2470 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2471 if (!info) 2472 return -ENOMEM; 2473 2474 info->offset = offset; 2475 info->bytes = bytes; 2476 info->trim_state = trim_state; 2477 RB_CLEAR_NODE(&info->offset_index); 2478 2479 spin_lock(&ctl->tree_lock); 2480 2481 if (try_merge_free_space(ctl, info, true)) 2482 goto link; 2483 2484 /* 2485 * There was no extent directly to the left or right of this new 2486 * extent then we know we're going to have to allocate a new extent, so 2487 * before we do that see if we need to drop this into a bitmap 2488 */ 2489 ret = insert_into_bitmap(ctl, info); 2490 if (ret < 0) { 2491 goto out; 2492 } else if (ret) { 2493 ret = 0; 2494 goto out; 2495 } 2496 link: 2497 /* 2498 * Only steal free space from adjacent bitmaps if we're sure we're not 2499 * going to add the new free space to existing bitmap entries - because 2500 * that would mean unnecessary work that would be reverted. Therefore 2501 * attempt to steal space from bitmaps if we're adding an extent entry. 2502 */ 2503 steal_from_bitmap(ctl, info, true); 2504 2505 filter_bytes = max(filter_bytes, info->bytes); 2506 2507 ret = link_free_space(ctl, info); 2508 if (ret) 2509 kmem_cache_free(btrfs_free_space_cachep, info); 2510 out: 2511 btrfs_discard_update_discardable(block_group, ctl); 2512 spin_unlock(&ctl->tree_lock); 2513 2514 if (ret) { 2515 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2516 ASSERT(ret != -EEXIST); 2517 } 2518 2519 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2520 btrfs_discard_check_filter(block_group, filter_bytes); 2521 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2522 } 2523 2524 return ret; 2525 } 2526 2527 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2528 u64 bytenr, u64 size) 2529 { 2530 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2531 2532 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2533 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2534 2535 return __btrfs_add_free_space(block_group->fs_info, 2536 block_group->free_space_ctl, 2537 bytenr, size, trim_state); 2538 } 2539 2540 /* 2541 * This is a subtle distinction because when adding free space back in general, 2542 * we want it to be added as untrimmed for async. But in the case where we add 2543 * it on loading of a block group, we want to consider it trimmed. 2544 */ 2545 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2546 u64 bytenr, u64 size) 2547 { 2548 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2549 2550 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2551 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2552 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2553 2554 return __btrfs_add_free_space(block_group->fs_info, 2555 block_group->free_space_ctl, 2556 bytenr, size, trim_state); 2557 } 2558 2559 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2560 u64 offset, u64 bytes) 2561 { 2562 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2563 struct btrfs_free_space *info; 2564 int ret; 2565 bool re_search = false; 2566 2567 spin_lock(&ctl->tree_lock); 2568 2569 again: 2570 ret = 0; 2571 if (!bytes) 2572 goto out_lock; 2573 2574 info = tree_search_offset(ctl, offset, 0, 0); 2575 if (!info) { 2576 /* 2577 * oops didn't find an extent that matched the space we wanted 2578 * to remove, look for a bitmap instead 2579 */ 2580 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2581 1, 0); 2582 if (!info) { 2583 /* 2584 * If we found a partial bit of our free space in a 2585 * bitmap but then couldn't find the other part this may 2586 * be a problem, so WARN about it. 2587 */ 2588 WARN_ON(re_search); 2589 goto out_lock; 2590 } 2591 } 2592 2593 re_search = false; 2594 if (!info->bitmap) { 2595 unlink_free_space(ctl, info); 2596 if (offset == info->offset) { 2597 u64 to_free = min(bytes, info->bytes); 2598 2599 info->bytes -= to_free; 2600 info->offset += to_free; 2601 if (info->bytes) { 2602 ret = link_free_space(ctl, info); 2603 WARN_ON(ret); 2604 } else { 2605 kmem_cache_free(btrfs_free_space_cachep, info); 2606 } 2607 2608 offset += to_free; 2609 bytes -= to_free; 2610 goto again; 2611 } else { 2612 u64 old_end = info->bytes + info->offset; 2613 2614 info->bytes = offset - info->offset; 2615 ret = link_free_space(ctl, info); 2616 WARN_ON(ret); 2617 if (ret) 2618 goto out_lock; 2619 2620 /* Not enough bytes in this entry to satisfy us */ 2621 if (old_end < offset + bytes) { 2622 bytes -= old_end - offset; 2623 offset = old_end; 2624 goto again; 2625 } else if (old_end == offset + bytes) { 2626 /* all done */ 2627 goto out_lock; 2628 } 2629 spin_unlock(&ctl->tree_lock); 2630 2631 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2632 offset + bytes, 2633 old_end - (offset + bytes), 2634 info->trim_state); 2635 WARN_ON(ret); 2636 goto out; 2637 } 2638 } 2639 2640 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2641 if (ret == -EAGAIN) { 2642 re_search = true; 2643 goto again; 2644 } 2645 out_lock: 2646 btrfs_discard_update_discardable(block_group, ctl); 2647 spin_unlock(&ctl->tree_lock); 2648 out: 2649 return ret; 2650 } 2651 2652 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2653 u64 bytes) 2654 { 2655 struct btrfs_fs_info *fs_info = block_group->fs_info; 2656 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2657 struct btrfs_free_space *info; 2658 struct rb_node *n; 2659 int count = 0; 2660 2661 spin_lock(&ctl->tree_lock); 2662 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2663 info = rb_entry(n, struct btrfs_free_space, offset_index); 2664 if (info->bytes >= bytes && !block_group->ro) 2665 count++; 2666 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2667 info->offset, info->bytes, 2668 (info->bitmap) ? "yes" : "no"); 2669 } 2670 spin_unlock(&ctl->tree_lock); 2671 btrfs_info(fs_info, "block group has cluster?: %s", 2672 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2673 btrfs_info(fs_info, 2674 "%d blocks of free space at or bigger than bytes is", count); 2675 } 2676 2677 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group) 2678 { 2679 struct btrfs_fs_info *fs_info = block_group->fs_info; 2680 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2681 2682 spin_lock_init(&ctl->tree_lock); 2683 ctl->unit = fs_info->sectorsize; 2684 ctl->start = block_group->start; 2685 ctl->private = block_group; 2686 ctl->op = &free_space_op; 2687 INIT_LIST_HEAD(&ctl->trimming_ranges); 2688 mutex_init(&ctl->cache_writeout_mutex); 2689 2690 /* 2691 * we only want to have 32k of ram per block group for keeping 2692 * track of free space, and if we pass 1/2 of that we want to 2693 * start converting things over to using bitmaps 2694 */ 2695 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2696 } 2697 2698 /* 2699 * for a given cluster, put all of its extents back into the free 2700 * space cache. If the block group passed doesn't match the block group 2701 * pointed to by the cluster, someone else raced in and freed the 2702 * cluster already. In that case, we just return without changing anything 2703 */ 2704 static void __btrfs_return_cluster_to_free_space( 2705 struct btrfs_block_group *block_group, 2706 struct btrfs_free_cluster *cluster) 2707 { 2708 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2709 struct btrfs_free_space *entry; 2710 struct rb_node *node; 2711 2712 spin_lock(&cluster->lock); 2713 if (cluster->block_group != block_group) 2714 goto out; 2715 2716 cluster->block_group = NULL; 2717 cluster->window_start = 0; 2718 list_del_init(&cluster->block_group_list); 2719 2720 node = rb_first(&cluster->root); 2721 while (node) { 2722 bool bitmap; 2723 2724 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2725 node = rb_next(&entry->offset_index); 2726 rb_erase(&entry->offset_index, &cluster->root); 2727 RB_CLEAR_NODE(&entry->offset_index); 2728 2729 bitmap = (entry->bitmap != NULL); 2730 if (!bitmap) { 2731 /* Merging treats extents as if they were new */ 2732 if (!btrfs_free_space_trimmed(entry)) { 2733 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2734 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2735 entry->bytes; 2736 } 2737 2738 try_merge_free_space(ctl, entry, false); 2739 steal_from_bitmap(ctl, entry, false); 2740 2741 /* As we insert directly, update these statistics */ 2742 if (!btrfs_free_space_trimmed(entry)) { 2743 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2744 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2745 entry->bytes; 2746 } 2747 } 2748 tree_insert_offset(&ctl->free_space_offset, 2749 entry->offset, &entry->offset_index, bitmap); 2750 } 2751 cluster->root = RB_ROOT; 2752 2753 out: 2754 spin_unlock(&cluster->lock); 2755 btrfs_put_block_group(block_group); 2756 } 2757 2758 static void __btrfs_remove_free_space_cache_locked( 2759 struct btrfs_free_space_ctl *ctl) 2760 { 2761 struct btrfs_free_space *info; 2762 struct rb_node *node; 2763 2764 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2765 info = rb_entry(node, struct btrfs_free_space, offset_index); 2766 if (!info->bitmap) { 2767 unlink_free_space(ctl, info); 2768 kmem_cache_free(btrfs_free_space_cachep, info); 2769 } else { 2770 free_bitmap(ctl, info); 2771 } 2772 2773 cond_resched_lock(&ctl->tree_lock); 2774 } 2775 } 2776 2777 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2778 { 2779 spin_lock(&ctl->tree_lock); 2780 __btrfs_remove_free_space_cache_locked(ctl); 2781 if (ctl->private) 2782 btrfs_discard_update_discardable(ctl->private, ctl); 2783 spin_unlock(&ctl->tree_lock); 2784 } 2785 2786 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2787 { 2788 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2789 struct btrfs_free_cluster *cluster; 2790 struct list_head *head; 2791 2792 spin_lock(&ctl->tree_lock); 2793 while ((head = block_group->cluster_list.next) != 2794 &block_group->cluster_list) { 2795 cluster = list_entry(head, struct btrfs_free_cluster, 2796 block_group_list); 2797 2798 WARN_ON(cluster->block_group != block_group); 2799 __btrfs_return_cluster_to_free_space(block_group, cluster); 2800 2801 cond_resched_lock(&ctl->tree_lock); 2802 } 2803 __btrfs_remove_free_space_cache_locked(ctl); 2804 btrfs_discard_update_discardable(block_group, ctl); 2805 spin_unlock(&ctl->tree_lock); 2806 2807 } 2808 2809 /** 2810 * btrfs_is_free_space_trimmed - see if everything is trimmed 2811 * @block_group: block_group of interest 2812 * 2813 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2814 */ 2815 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2816 { 2817 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2818 struct btrfs_free_space *info; 2819 struct rb_node *node; 2820 bool ret = true; 2821 2822 spin_lock(&ctl->tree_lock); 2823 node = rb_first(&ctl->free_space_offset); 2824 2825 while (node) { 2826 info = rb_entry(node, struct btrfs_free_space, offset_index); 2827 2828 if (!btrfs_free_space_trimmed(info)) { 2829 ret = false; 2830 break; 2831 } 2832 2833 node = rb_next(node); 2834 } 2835 2836 spin_unlock(&ctl->tree_lock); 2837 return ret; 2838 } 2839 2840 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2841 u64 offset, u64 bytes, u64 empty_size, 2842 u64 *max_extent_size) 2843 { 2844 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2845 struct btrfs_discard_ctl *discard_ctl = 2846 &block_group->fs_info->discard_ctl; 2847 struct btrfs_free_space *entry = NULL; 2848 u64 bytes_search = bytes + empty_size; 2849 u64 ret = 0; 2850 u64 align_gap = 0; 2851 u64 align_gap_len = 0; 2852 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2853 2854 spin_lock(&ctl->tree_lock); 2855 entry = find_free_space(ctl, &offset, &bytes_search, 2856 block_group->full_stripe_len, max_extent_size); 2857 if (!entry) 2858 goto out; 2859 2860 ret = offset; 2861 if (entry->bitmap) { 2862 bitmap_clear_bits(ctl, entry, offset, bytes); 2863 2864 if (!btrfs_free_space_trimmed(entry)) 2865 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2866 2867 if (!entry->bytes) 2868 free_bitmap(ctl, entry); 2869 } else { 2870 unlink_free_space(ctl, entry); 2871 align_gap_len = offset - entry->offset; 2872 align_gap = entry->offset; 2873 align_gap_trim_state = entry->trim_state; 2874 2875 if (!btrfs_free_space_trimmed(entry)) 2876 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2877 2878 entry->offset = offset + bytes; 2879 WARN_ON(entry->bytes < bytes + align_gap_len); 2880 2881 entry->bytes -= bytes + align_gap_len; 2882 if (!entry->bytes) 2883 kmem_cache_free(btrfs_free_space_cachep, entry); 2884 else 2885 link_free_space(ctl, entry); 2886 } 2887 out: 2888 btrfs_discard_update_discardable(block_group, ctl); 2889 spin_unlock(&ctl->tree_lock); 2890 2891 if (align_gap_len) 2892 __btrfs_add_free_space(block_group->fs_info, ctl, 2893 align_gap, align_gap_len, 2894 align_gap_trim_state); 2895 return ret; 2896 } 2897 2898 /* 2899 * given a cluster, put all of its extents back into the free space 2900 * cache. If a block group is passed, this function will only free 2901 * a cluster that belongs to the passed block group. 2902 * 2903 * Otherwise, it'll get a reference on the block group pointed to by the 2904 * cluster and remove the cluster from it. 2905 */ 2906 void btrfs_return_cluster_to_free_space( 2907 struct btrfs_block_group *block_group, 2908 struct btrfs_free_cluster *cluster) 2909 { 2910 struct btrfs_free_space_ctl *ctl; 2911 2912 /* first, get a safe pointer to the block group */ 2913 spin_lock(&cluster->lock); 2914 if (!block_group) { 2915 block_group = cluster->block_group; 2916 if (!block_group) { 2917 spin_unlock(&cluster->lock); 2918 return; 2919 } 2920 } else if (cluster->block_group != block_group) { 2921 /* someone else has already freed it don't redo their work */ 2922 spin_unlock(&cluster->lock); 2923 return; 2924 } 2925 btrfs_get_block_group(block_group); 2926 spin_unlock(&cluster->lock); 2927 2928 ctl = block_group->free_space_ctl; 2929 2930 /* now return any extents the cluster had on it */ 2931 spin_lock(&ctl->tree_lock); 2932 __btrfs_return_cluster_to_free_space(block_group, cluster); 2933 spin_unlock(&ctl->tree_lock); 2934 2935 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 2936 2937 /* finally drop our ref */ 2938 btrfs_put_block_group(block_group); 2939 } 2940 2941 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 2942 struct btrfs_free_cluster *cluster, 2943 struct btrfs_free_space *entry, 2944 u64 bytes, u64 min_start, 2945 u64 *max_extent_size) 2946 { 2947 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2948 int err; 2949 u64 search_start = cluster->window_start; 2950 u64 search_bytes = bytes; 2951 u64 ret = 0; 2952 2953 search_start = min_start; 2954 search_bytes = bytes; 2955 2956 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2957 if (err) { 2958 *max_extent_size = max(get_max_extent_size(entry), 2959 *max_extent_size); 2960 return 0; 2961 } 2962 2963 ret = search_start; 2964 __bitmap_clear_bits(ctl, entry, ret, bytes); 2965 2966 return ret; 2967 } 2968 2969 /* 2970 * given a cluster, try to allocate 'bytes' from it, returns 0 2971 * if it couldn't find anything suitably large, or a logical disk offset 2972 * if things worked out 2973 */ 2974 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 2975 struct btrfs_free_cluster *cluster, u64 bytes, 2976 u64 min_start, u64 *max_extent_size) 2977 { 2978 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2979 struct btrfs_discard_ctl *discard_ctl = 2980 &block_group->fs_info->discard_ctl; 2981 struct btrfs_free_space *entry = NULL; 2982 struct rb_node *node; 2983 u64 ret = 0; 2984 2985 spin_lock(&cluster->lock); 2986 if (bytes > cluster->max_size) 2987 goto out; 2988 2989 if (cluster->block_group != block_group) 2990 goto out; 2991 2992 node = rb_first(&cluster->root); 2993 if (!node) 2994 goto out; 2995 2996 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2997 while (1) { 2998 if (entry->bytes < bytes) 2999 *max_extent_size = max(get_max_extent_size(entry), 3000 *max_extent_size); 3001 3002 if (entry->bytes < bytes || 3003 (!entry->bitmap && entry->offset < min_start)) { 3004 node = rb_next(&entry->offset_index); 3005 if (!node) 3006 break; 3007 entry = rb_entry(node, struct btrfs_free_space, 3008 offset_index); 3009 continue; 3010 } 3011 3012 if (entry->bitmap) { 3013 ret = btrfs_alloc_from_bitmap(block_group, 3014 cluster, entry, bytes, 3015 cluster->window_start, 3016 max_extent_size); 3017 if (ret == 0) { 3018 node = rb_next(&entry->offset_index); 3019 if (!node) 3020 break; 3021 entry = rb_entry(node, struct btrfs_free_space, 3022 offset_index); 3023 continue; 3024 } 3025 cluster->window_start += bytes; 3026 } else { 3027 ret = entry->offset; 3028 3029 entry->offset += bytes; 3030 entry->bytes -= bytes; 3031 } 3032 3033 if (entry->bytes == 0) 3034 rb_erase(&entry->offset_index, &cluster->root); 3035 break; 3036 } 3037 out: 3038 spin_unlock(&cluster->lock); 3039 3040 if (!ret) 3041 return 0; 3042 3043 spin_lock(&ctl->tree_lock); 3044 3045 if (!btrfs_free_space_trimmed(entry)) 3046 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3047 3048 ctl->free_space -= bytes; 3049 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3050 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3051 if (entry->bytes == 0) { 3052 ctl->free_extents--; 3053 if (entry->bitmap) { 3054 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3055 entry->bitmap); 3056 ctl->total_bitmaps--; 3057 ctl->op->recalc_thresholds(ctl); 3058 } else if (!btrfs_free_space_trimmed(entry)) { 3059 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3060 } 3061 kmem_cache_free(btrfs_free_space_cachep, entry); 3062 } 3063 3064 spin_unlock(&ctl->tree_lock); 3065 3066 return ret; 3067 } 3068 3069 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3070 struct btrfs_free_space *entry, 3071 struct btrfs_free_cluster *cluster, 3072 u64 offset, u64 bytes, 3073 u64 cont1_bytes, u64 min_bytes) 3074 { 3075 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3076 unsigned long next_zero; 3077 unsigned long i; 3078 unsigned long want_bits; 3079 unsigned long min_bits; 3080 unsigned long found_bits; 3081 unsigned long max_bits = 0; 3082 unsigned long start = 0; 3083 unsigned long total_found = 0; 3084 int ret; 3085 3086 i = offset_to_bit(entry->offset, ctl->unit, 3087 max_t(u64, offset, entry->offset)); 3088 want_bits = bytes_to_bits(bytes, ctl->unit); 3089 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3090 3091 /* 3092 * Don't bother looking for a cluster in this bitmap if it's heavily 3093 * fragmented. 3094 */ 3095 if (entry->max_extent_size && 3096 entry->max_extent_size < cont1_bytes) 3097 return -ENOSPC; 3098 again: 3099 found_bits = 0; 3100 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3101 next_zero = find_next_zero_bit(entry->bitmap, 3102 BITS_PER_BITMAP, i); 3103 if (next_zero - i >= min_bits) { 3104 found_bits = next_zero - i; 3105 if (found_bits > max_bits) 3106 max_bits = found_bits; 3107 break; 3108 } 3109 if (next_zero - i > max_bits) 3110 max_bits = next_zero - i; 3111 i = next_zero; 3112 } 3113 3114 if (!found_bits) { 3115 entry->max_extent_size = (u64)max_bits * ctl->unit; 3116 return -ENOSPC; 3117 } 3118 3119 if (!total_found) { 3120 start = i; 3121 cluster->max_size = 0; 3122 } 3123 3124 total_found += found_bits; 3125 3126 if (cluster->max_size < found_bits * ctl->unit) 3127 cluster->max_size = found_bits * ctl->unit; 3128 3129 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3130 i = next_zero + 1; 3131 goto again; 3132 } 3133 3134 cluster->window_start = start * ctl->unit + entry->offset; 3135 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3136 ret = tree_insert_offset(&cluster->root, entry->offset, 3137 &entry->offset_index, 1); 3138 ASSERT(!ret); /* -EEXIST; Logic error */ 3139 3140 trace_btrfs_setup_cluster(block_group, cluster, 3141 total_found * ctl->unit, 1); 3142 return 0; 3143 } 3144 3145 /* 3146 * This searches the block group for just extents to fill the cluster with. 3147 * Try to find a cluster with at least bytes total bytes, at least one 3148 * extent of cont1_bytes, and other clusters of at least min_bytes. 3149 */ 3150 static noinline int 3151 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3152 struct btrfs_free_cluster *cluster, 3153 struct list_head *bitmaps, u64 offset, u64 bytes, 3154 u64 cont1_bytes, u64 min_bytes) 3155 { 3156 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3157 struct btrfs_free_space *first = NULL; 3158 struct btrfs_free_space *entry = NULL; 3159 struct btrfs_free_space *last; 3160 struct rb_node *node; 3161 u64 window_free; 3162 u64 max_extent; 3163 u64 total_size = 0; 3164 3165 entry = tree_search_offset(ctl, offset, 0, 1); 3166 if (!entry) 3167 return -ENOSPC; 3168 3169 /* 3170 * We don't want bitmaps, so just move along until we find a normal 3171 * extent entry. 3172 */ 3173 while (entry->bitmap || entry->bytes < min_bytes) { 3174 if (entry->bitmap && list_empty(&entry->list)) 3175 list_add_tail(&entry->list, bitmaps); 3176 node = rb_next(&entry->offset_index); 3177 if (!node) 3178 return -ENOSPC; 3179 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3180 } 3181 3182 window_free = entry->bytes; 3183 max_extent = entry->bytes; 3184 first = entry; 3185 last = entry; 3186 3187 for (node = rb_next(&entry->offset_index); node; 3188 node = rb_next(&entry->offset_index)) { 3189 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3190 3191 if (entry->bitmap) { 3192 if (list_empty(&entry->list)) 3193 list_add_tail(&entry->list, bitmaps); 3194 continue; 3195 } 3196 3197 if (entry->bytes < min_bytes) 3198 continue; 3199 3200 last = entry; 3201 window_free += entry->bytes; 3202 if (entry->bytes > max_extent) 3203 max_extent = entry->bytes; 3204 } 3205 3206 if (window_free < bytes || max_extent < cont1_bytes) 3207 return -ENOSPC; 3208 3209 cluster->window_start = first->offset; 3210 3211 node = &first->offset_index; 3212 3213 /* 3214 * now we've found our entries, pull them out of the free space 3215 * cache and put them into the cluster rbtree 3216 */ 3217 do { 3218 int ret; 3219 3220 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3221 node = rb_next(&entry->offset_index); 3222 if (entry->bitmap || entry->bytes < min_bytes) 3223 continue; 3224 3225 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3226 ret = tree_insert_offset(&cluster->root, entry->offset, 3227 &entry->offset_index, 0); 3228 total_size += entry->bytes; 3229 ASSERT(!ret); /* -EEXIST; Logic error */ 3230 } while (node && entry != last); 3231 3232 cluster->max_size = max_extent; 3233 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3234 return 0; 3235 } 3236 3237 /* 3238 * This specifically looks for bitmaps that may work in the cluster, we assume 3239 * that we have already failed to find extents that will work. 3240 */ 3241 static noinline int 3242 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3243 struct btrfs_free_cluster *cluster, 3244 struct list_head *bitmaps, u64 offset, u64 bytes, 3245 u64 cont1_bytes, u64 min_bytes) 3246 { 3247 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3248 struct btrfs_free_space *entry = NULL; 3249 int ret = -ENOSPC; 3250 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3251 3252 if (ctl->total_bitmaps == 0) 3253 return -ENOSPC; 3254 3255 /* 3256 * The bitmap that covers offset won't be in the list unless offset 3257 * is just its start offset. 3258 */ 3259 if (!list_empty(bitmaps)) 3260 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3261 3262 if (!entry || entry->offset != bitmap_offset) { 3263 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3264 if (entry && list_empty(&entry->list)) 3265 list_add(&entry->list, bitmaps); 3266 } 3267 3268 list_for_each_entry(entry, bitmaps, list) { 3269 if (entry->bytes < bytes) 3270 continue; 3271 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3272 bytes, cont1_bytes, min_bytes); 3273 if (!ret) 3274 return 0; 3275 } 3276 3277 /* 3278 * The bitmaps list has all the bitmaps that record free space 3279 * starting after offset, so no more search is required. 3280 */ 3281 return -ENOSPC; 3282 } 3283 3284 /* 3285 * here we try to find a cluster of blocks in a block group. The goal 3286 * is to find at least bytes+empty_size. 3287 * We might not find them all in one contiguous area. 3288 * 3289 * returns zero and sets up cluster if things worked out, otherwise 3290 * it returns -enospc 3291 */ 3292 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3293 struct btrfs_free_cluster *cluster, 3294 u64 offset, u64 bytes, u64 empty_size) 3295 { 3296 struct btrfs_fs_info *fs_info = block_group->fs_info; 3297 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3298 struct btrfs_free_space *entry, *tmp; 3299 LIST_HEAD(bitmaps); 3300 u64 min_bytes; 3301 u64 cont1_bytes; 3302 int ret; 3303 3304 /* 3305 * Choose the minimum extent size we'll require for this 3306 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3307 * For metadata, allow allocates with smaller extents. For 3308 * data, keep it dense. 3309 */ 3310 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3311 cont1_bytes = min_bytes = bytes + empty_size; 3312 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3313 cont1_bytes = bytes; 3314 min_bytes = fs_info->sectorsize; 3315 } else { 3316 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3317 min_bytes = fs_info->sectorsize; 3318 } 3319 3320 spin_lock(&ctl->tree_lock); 3321 3322 /* 3323 * If we know we don't have enough space to make a cluster don't even 3324 * bother doing all the work to try and find one. 3325 */ 3326 if (ctl->free_space < bytes) { 3327 spin_unlock(&ctl->tree_lock); 3328 return -ENOSPC; 3329 } 3330 3331 spin_lock(&cluster->lock); 3332 3333 /* someone already found a cluster, hooray */ 3334 if (cluster->block_group) { 3335 ret = 0; 3336 goto out; 3337 } 3338 3339 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3340 min_bytes); 3341 3342 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3343 bytes + empty_size, 3344 cont1_bytes, min_bytes); 3345 if (ret) 3346 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3347 offset, bytes + empty_size, 3348 cont1_bytes, min_bytes); 3349 3350 /* Clear our temporary list */ 3351 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3352 list_del_init(&entry->list); 3353 3354 if (!ret) { 3355 btrfs_get_block_group(block_group); 3356 list_add_tail(&cluster->block_group_list, 3357 &block_group->cluster_list); 3358 cluster->block_group = block_group; 3359 } else { 3360 trace_btrfs_failed_cluster_setup(block_group); 3361 } 3362 out: 3363 spin_unlock(&cluster->lock); 3364 spin_unlock(&ctl->tree_lock); 3365 3366 return ret; 3367 } 3368 3369 /* 3370 * simple code to zero out a cluster 3371 */ 3372 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3373 { 3374 spin_lock_init(&cluster->lock); 3375 spin_lock_init(&cluster->refill_lock); 3376 cluster->root = RB_ROOT; 3377 cluster->max_size = 0; 3378 cluster->fragmented = false; 3379 INIT_LIST_HEAD(&cluster->block_group_list); 3380 cluster->block_group = NULL; 3381 } 3382 3383 static int do_trimming(struct btrfs_block_group *block_group, 3384 u64 *total_trimmed, u64 start, u64 bytes, 3385 u64 reserved_start, u64 reserved_bytes, 3386 enum btrfs_trim_state reserved_trim_state, 3387 struct btrfs_trim_range *trim_entry) 3388 { 3389 struct btrfs_space_info *space_info = block_group->space_info; 3390 struct btrfs_fs_info *fs_info = block_group->fs_info; 3391 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3392 int ret; 3393 int update = 0; 3394 const u64 end = start + bytes; 3395 const u64 reserved_end = reserved_start + reserved_bytes; 3396 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3397 u64 trimmed = 0; 3398 3399 spin_lock(&space_info->lock); 3400 spin_lock(&block_group->lock); 3401 if (!block_group->ro) { 3402 block_group->reserved += reserved_bytes; 3403 space_info->bytes_reserved += reserved_bytes; 3404 update = 1; 3405 } 3406 spin_unlock(&block_group->lock); 3407 spin_unlock(&space_info->lock); 3408 3409 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3410 if (!ret) { 3411 *total_trimmed += trimmed; 3412 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3413 } 3414 3415 mutex_lock(&ctl->cache_writeout_mutex); 3416 if (reserved_start < start) 3417 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3418 start - reserved_start, 3419 reserved_trim_state); 3420 if (start + bytes < reserved_start + reserved_bytes) 3421 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3422 reserved_trim_state); 3423 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3424 list_del(&trim_entry->list); 3425 mutex_unlock(&ctl->cache_writeout_mutex); 3426 3427 if (update) { 3428 spin_lock(&space_info->lock); 3429 spin_lock(&block_group->lock); 3430 if (block_group->ro) 3431 space_info->bytes_readonly += reserved_bytes; 3432 block_group->reserved -= reserved_bytes; 3433 space_info->bytes_reserved -= reserved_bytes; 3434 spin_unlock(&block_group->lock); 3435 spin_unlock(&space_info->lock); 3436 } 3437 3438 return ret; 3439 } 3440 3441 /* 3442 * If @async is set, then we will trim 1 region and return. 3443 */ 3444 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3445 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3446 bool async) 3447 { 3448 struct btrfs_discard_ctl *discard_ctl = 3449 &block_group->fs_info->discard_ctl; 3450 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3451 struct btrfs_free_space *entry; 3452 struct rb_node *node; 3453 int ret = 0; 3454 u64 extent_start; 3455 u64 extent_bytes; 3456 enum btrfs_trim_state extent_trim_state; 3457 u64 bytes; 3458 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3459 3460 while (start < end) { 3461 struct btrfs_trim_range trim_entry; 3462 3463 mutex_lock(&ctl->cache_writeout_mutex); 3464 spin_lock(&ctl->tree_lock); 3465 3466 if (ctl->free_space < minlen) 3467 goto out_unlock; 3468 3469 entry = tree_search_offset(ctl, start, 0, 1); 3470 if (!entry) 3471 goto out_unlock; 3472 3473 /* Skip bitmaps and if async, already trimmed entries */ 3474 while (entry->bitmap || 3475 (async && btrfs_free_space_trimmed(entry))) { 3476 node = rb_next(&entry->offset_index); 3477 if (!node) 3478 goto out_unlock; 3479 entry = rb_entry(node, struct btrfs_free_space, 3480 offset_index); 3481 } 3482 3483 if (entry->offset >= end) 3484 goto out_unlock; 3485 3486 extent_start = entry->offset; 3487 extent_bytes = entry->bytes; 3488 extent_trim_state = entry->trim_state; 3489 if (async) { 3490 start = entry->offset; 3491 bytes = entry->bytes; 3492 if (bytes < minlen) { 3493 spin_unlock(&ctl->tree_lock); 3494 mutex_unlock(&ctl->cache_writeout_mutex); 3495 goto next; 3496 } 3497 unlink_free_space(ctl, entry); 3498 /* 3499 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3500 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3501 * X when we come back around. So trim it now. 3502 */ 3503 if (max_discard_size && 3504 bytes >= (max_discard_size + 3505 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3506 bytes = max_discard_size; 3507 extent_bytes = max_discard_size; 3508 entry->offset += max_discard_size; 3509 entry->bytes -= max_discard_size; 3510 link_free_space(ctl, entry); 3511 } else { 3512 kmem_cache_free(btrfs_free_space_cachep, entry); 3513 } 3514 } else { 3515 start = max(start, extent_start); 3516 bytes = min(extent_start + extent_bytes, end) - start; 3517 if (bytes < minlen) { 3518 spin_unlock(&ctl->tree_lock); 3519 mutex_unlock(&ctl->cache_writeout_mutex); 3520 goto next; 3521 } 3522 3523 unlink_free_space(ctl, entry); 3524 kmem_cache_free(btrfs_free_space_cachep, entry); 3525 } 3526 3527 spin_unlock(&ctl->tree_lock); 3528 trim_entry.start = extent_start; 3529 trim_entry.bytes = extent_bytes; 3530 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3531 mutex_unlock(&ctl->cache_writeout_mutex); 3532 3533 ret = do_trimming(block_group, total_trimmed, start, bytes, 3534 extent_start, extent_bytes, extent_trim_state, 3535 &trim_entry); 3536 if (ret) { 3537 block_group->discard_cursor = start + bytes; 3538 break; 3539 } 3540 next: 3541 start += bytes; 3542 block_group->discard_cursor = start; 3543 if (async && *total_trimmed) 3544 break; 3545 3546 if (fatal_signal_pending(current)) { 3547 ret = -ERESTARTSYS; 3548 break; 3549 } 3550 3551 cond_resched(); 3552 } 3553 3554 return ret; 3555 3556 out_unlock: 3557 block_group->discard_cursor = btrfs_block_group_end(block_group); 3558 spin_unlock(&ctl->tree_lock); 3559 mutex_unlock(&ctl->cache_writeout_mutex); 3560 3561 return ret; 3562 } 3563 3564 /* 3565 * If we break out of trimming a bitmap prematurely, we should reset the 3566 * trimming bit. In a rather contrieved case, it's possible to race here so 3567 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3568 * 3569 * start = start of bitmap 3570 * end = near end of bitmap 3571 * 3572 * Thread 1: Thread 2: 3573 * trim_bitmaps(start) 3574 * trim_bitmaps(end) 3575 * end_trimming_bitmap() 3576 * reset_trimming_bitmap() 3577 */ 3578 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3579 { 3580 struct btrfs_free_space *entry; 3581 3582 spin_lock(&ctl->tree_lock); 3583 entry = tree_search_offset(ctl, offset, 1, 0); 3584 if (entry) { 3585 if (btrfs_free_space_trimmed(entry)) { 3586 ctl->discardable_extents[BTRFS_STAT_CURR] += 3587 entry->bitmap_extents; 3588 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3589 } 3590 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3591 } 3592 3593 spin_unlock(&ctl->tree_lock); 3594 } 3595 3596 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3597 struct btrfs_free_space *entry) 3598 { 3599 if (btrfs_free_space_trimming_bitmap(entry)) { 3600 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3601 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3602 entry->bitmap_extents; 3603 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3604 } 3605 } 3606 3607 /* 3608 * If @async is set, then we will trim 1 region and return. 3609 */ 3610 static int trim_bitmaps(struct btrfs_block_group *block_group, 3611 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3612 u64 maxlen, bool async) 3613 { 3614 struct btrfs_discard_ctl *discard_ctl = 3615 &block_group->fs_info->discard_ctl; 3616 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3617 struct btrfs_free_space *entry; 3618 int ret = 0; 3619 int ret2; 3620 u64 bytes; 3621 u64 offset = offset_to_bitmap(ctl, start); 3622 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3623 3624 while (offset < end) { 3625 bool next_bitmap = false; 3626 struct btrfs_trim_range trim_entry; 3627 3628 mutex_lock(&ctl->cache_writeout_mutex); 3629 spin_lock(&ctl->tree_lock); 3630 3631 if (ctl->free_space < minlen) { 3632 block_group->discard_cursor = 3633 btrfs_block_group_end(block_group); 3634 spin_unlock(&ctl->tree_lock); 3635 mutex_unlock(&ctl->cache_writeout_mutex); 3636 break; 3637 } 3638 3639 entry = tree_search_offset(ctl, offset, 1, 0); 3640 /* 3641 * Bitmaps are marked trimmed lossily now to prevent constant 3642 * discarding of the same bitmap (the reason why we are bound 3643 * by the filters). So, retrim the block group bitmaps when we 3644 * are preparing to punt to the unused_bgs list. This uses 3645 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3646 * which is the only discard index which sets minlen to 0. 3647 */ 3648 if (!entry || (async && minlen && start == offset && 3649 btrfs_free_space_trimmed(entry))) { 3650 spin_unlock(&ctl->tree_lock); 3651 mutex_unlock(&ctl->cache_writeout_mutex); 3652 next_bitmap = true; 3653 goto next; 3654 } 3655 3656 /* 3657 * Async discard bitmap trimming begins at by setting the start 3658 * to be key.objectid and the offset_to_bitmap() aligns to the 3659 * start of the bitmap. This lets us know we are fully 3660 * scanning the bitmap rather than only some portion of it. 3661 */ 3662 if (start == offset) 3663 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3664 3665 bytes = minlen; 3666 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3667 if (ret2 || start >= end) { 3668 /* 3669 * We lossily consider a bitmap trimmed if we only skip 3670 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3671 */ 3672 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3673 end_trimming_bitmap(ctl, entry); 3674 else 3675 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3676 spin_unlock(&ctl->tree_lock); 3677 mutex_unlock(&ctl->cache_writeout_mutex); 3678 next_bitmap = true; 3679 goto next; 3680 } 3681 3682 /* 3683 * We already trimmed a region, but are using the locking above 3684 * to reset the trim_state. 3685 */ 3686 if (async && *total_trimmed) { 3687 spin_unlock(&ctl->tree_lock); 3688 mutex_unlock(&ctl->cache_writeout_mutex); 3689 goto out; 3690 } 3691 3692 bytes = min(bytes, end - start); 3693 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3694 spin_unlock(&ctl->tree_lock); 3695 mutex_unlock(&ctl->cache_writeout_mutex); 3696 goto next; 3697 } 3698 3699 /* 3700 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3701 * If X < @minlen, we won't trim X when we come back around. 3702 * So trim it now. We differ here from trimming extents as we 3703 * don't keep individual state per bit. 3704 */ 3705 if (async && 3706 max_discard_size && 3707 bytes > (max_discard_size + minlen)) 3708 bytes = max_discard_size; 3709 3710 bitmap_clear_bits(ctl, entry, start, bytes); 3711 if (entry->bytes == 0) 3712 free_bitmap(ctl, entry); 3713 3714 spin_unlock(&ctl->tree_lock); 3715 trim_entry.start = start; 3716 trim_entry.bytes = bytes; 3717 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3718 mutex_unlock(&ctl->cache_writeout_mutex); 3719 3720 ret = do_trimming(block_group, total_trimmed, start, bytes, 3721 start, bytes, 0, &trim_entry); 3722 if (ret) { 3723 reset_trimming_bitmap(ctl, offset); 3724 block_group->discard_cursor = 3725 btrfs_block_group_end(block_group); 3726 break; 3727 } 3728 next: 3729 if (next_bitmap) { 3730 offset += BITS_PER_BITMAP * ctl->unit; 3731 start = offset; 3732 } else { 3733 start += bytes; 3734 } 3735 block_group->discard_cursor = start; 3736 3737 if (fatal_signal_pending(current)) { 3738 if (start != offset) 3739 reset_trimming_bitmap(ctl, offset); 3740 ret = -ERESTARTSYS; 3741 break; 3742 } 3743 3744 cond_resched(); 3745 } 3746 3747 if (offset >= end) 3748 block_group->discard_cursor = end; 3749 3750 out: 3751 return ret; 3752 } 3753 3754 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3755 u64 *trimmed, u64 start, u64 end, u64 minlen) 3756 { 3757 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3758 int ret; 3759 u64 rem = 0; 3760 3761 *trimmed = 0; 3762 3763 spin_lock(&block_group->lock); 3764 if (block_group->removed) { 3765 spin_unlock(&block_group->lock); 3766 return 0; 3767 } 3768 btrfs_freeze_block_group(block_group); 3769 spin_unlock(&block_group->lock); 3770 3771 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3772 if (ret) 3773 goto out; 3774 3775 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3776 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3777 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3778 if (rem) 3779 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3780 out: 3781 btrfs_unfreeze_block_group(block_group); 3782 return ret; 3783 } 3784 3785 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3786 u64 *trimmed, u64 start, u64 end, u64 minlen, 3787 bool async) 3788 { 3789 int ret; 3790 3791 *trimmed = 0; 3792 3793 spin_lock(&block_group->lock); 3794 if (block_group->removed) { 3795 spin_unlock(&block_group->lock); 3796 return 0; 3797 } 3798 btrfs_freeze_block_group(block_group); 3799 spin_unlock(&block_group->lock); 3800 3801 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3802 btrfs_unfreeze_block_group(block_group); 3803 3804 return ret; 3805 } 3806 3807 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3808 u64 *trimmed, u64 start, u64 end, u64 minlen, 3809 u64 maxlen, bool async) 3810 { 3811 int ret; 3812 3813 *trimmed = 0; 3814 3815 spin_lock(&block_group->lock); 3816 if (block_group->removed) { 3817 spin_unlock(&block_group->lock); 3818 return 0; 3819 } 3820 btrfs_freeze_block_group(block_group); 3821 spin_unlock(&block_group->lock); 3822 3823 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3824 async); 3825 3826 btrfs_unfreeze_block_group(block_group); 3827 3828 return ret; 3829 } 3830 3831 /* 3832 * Find the left-most item in the cache tree, and then return the 3833 * smallest inode number in the item. 3834 * 3835 * Note: the returned inode number may not be the smallest one in 3836 * the tree, if the left-most item is a bitmap. 3837 */ 3838 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3839 { 3840 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3841 struct btrfs_free_space *entry = NULL; 3842 u64 ino = 0; 3843 3844 spin_lock(&ctl->tree_lock); 3845 3846 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3847 goto out; 3848 3849 entry = rb_entry(rb_first(&ctl->free_space_offset), 3850 struct btrfs_free_space, offset_index); 3851 3852 if (!entry->bitmap) { 3853 ino = entry->offset; 3854 3855 unlink_free_space(ctl, entry); 3856 entry->offset++; 3857 entry->bytes--; 3858 if (!entry->bytes) 3859 kmem_cache_free(btrfs_free_space_cachep, entry); 3860 else 3861 link_free_space(ctl, entry); 3862 } else { 3863 u64 offset = 0; 3864 u64 count = 1; 3865 int ret; 3866 3867 ret = search_bitmap(ctl, entry, &offset, &count, true); 3868 /* Logic error; Should be empty if it can't find anything */ 3869 ASSERT(!ret); 3870 3871 ino = offset; 3872 bitmap_clear_bits(ctl, entry, offset, 1); 3873 if (entry->bytes == 0) 3874 free_bitmap(ctl, entry); 3875 } 3876 out: 3877 spin_unlock(&ctl->tree_lock); 3878 3879 return ino; 3880 } 3881 3882 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3883 struct btrfs_path *path) 3884 { 3885 struct inode *inode = NULL; 3886 3887 spin_lock(&root->ino_cache_lock); 3888 if (root->ino_cache_inode) 3889 inode = igrab(root->ino_cache_inode); 3890 spin_unlock(&root->ino_cache_lock); 3891 if (inode) 3892 return inode; 3893 3894 inode = __lookup_free_space_inode(root, path, 0); 3895 if (IS_ERR(inode)) 3896 return inode; 3897 3898 spin_lock(&root->ino_cache_lock); 3899 if (!btrfs_fs_closing(root->fs_info)) 3900 root->ino_cache_inode = igrab(inode); 3901 spin_unlock(&root->ino_cache_lock); 3902 3903 return inode; 3904 } 3905 3906 int create_free_ino_inode(struct btrfs_root *root, 3907 struct btrfs_trans_handle *trans, 3908 struct btrfs_path *path) 3909 { 3910 return __create_free_space_inode(root, trans, path, 3911 BTRFS_FREE_INO_OBJECTID, 0); 3912 } 3913 3914 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3915 { 3916 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3917 struct btrfs_path *path; 3918 struct inode *inode; 3919 int ret = 0; 3920 u64 root_gen = btrfs_root_generation(&root->root_item); 3921 3922 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3923 return 0; 3924 3925 /* 3926 * If we're unmounting then just return, since this does a search on the 3927 * normal root and not the commit root and we could deadlock. 3928 */ 3929 if (btrfs_fs_closing(fs_info)) 3930 return 0; 3931 3932 path = btrfs_alloc_path(); 3933 if (!path) 3934 return 0; 3935 3936 inode = lookup_free_ino_inode(root, path); 3937 if (IS_ERR(inode)) 3938 goto out; 3939 3940 if (root_gen != BTRFS_I(inode)->generation) 3941 goto out_put; 3942 3943 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3944 3945 if (ret < 0) 3946 btrfs_err(fs_info, 3947 "failed to load free ino cache for root %llu", 3948 root->root_key.objectid); 3949 out_put: 3950 iput(inode); 3951 out: 3952 btrfs_free_path(path); 3953 return ret; 3954 } 3955 3956 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3957 struct btrfs_trans_handle *trans, 3958 struct btrfs_path *path, 3959 struct inode *inode) 3960 { 3961 struct btrfs_fs_info *fs_info = root->fs_info; 3962 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3963 int ret; 3964 struct btrfs_io_ctl io_ctl; 3965 bool release_metadata = true; 3966 3967 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3968 return 0; 3969 3970 memset(&io_ctl, 0, sizeof(io_ctl)); 3971 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3972 if (!ret) { 3973 /* 3974 * At this point writepages() didn't error out, so our metadata 3975 * reservation is released when the writeback finishes, at 3976 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3977 * with or without an error. 3978 */ 3979 release_metadata = false; 3980 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3981 } 3982 3983 if (ret) { 3984 if (release_metadata) 3985 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3986 inode->i_size, true); 3987 btrfs_debug(fs_info, 3988 "failed to write free ino cache for root %llu error %d", 3989 root->root_key.objectid, ret); 3990 } 3991 3992 return ret; 3993 } 3994 3995 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3996 /* 3997 * Use this if you need to make a bitmap or extent entry specifically, it 3998 * doesn't do any of the merging that add_free_space does, this acts a lot like 3999 * how the free space cache loading stuff works, so you can get really weird 4000 * configurations. 4001 */ 4002 int test_add_free_space_entry(struct btrfs_block_group *cache, 4003 u64 offset, u64 bytes, bool bitmap) 4004 { 4005 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4006 struct btrfs_free_space *info = NULL, *bitmap_info; 4007 void *map = NULL; 4008 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4009 u64 bytes_added; 4010 int ret; 4011 4012 again: 4013 if (!info) { 4014 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4015 if (!info) 4016 return -ENOMEM; 4017 } 4018 4019 if (!bitmap) { 4020 spin_lock(&ctl->tree_lock); 4021 info->offset = offset; 4022 info->bytes = bytes; 4023 info->max_extent_size = 0; 4024 ret = link_free_space(ctl, info); 4025 spin_unlock(&ctl->tree_lock); 4026 if (ret) 4027 kmem_cache_free(btrfs_free_space_cachep, info); 4028 return ret; 4029 } 4030 4031 if (!map) { 4032 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4033 if (!map) { 4034 kmem_cache_free(btrfs_free_space_cachep, info); 4035 return -ENOMEM; 4036 } 4037 } 4038 4039 spin_lock(&ctl->tree_lock); 4040 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4041 1, 0); 4042 if (!bitmap_info) { 4043 info->bitmap = map; 4044 map = NULL; 4045 add_new_bitmap(ctl, info, offset); 4046 bitmap_info = info; 4047 info = NULL; 4048 } 4049 4050 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4051 trim_state); 4052 4053 bytes -= bytes_added; 4054 offset += bytes_added; 4055 spin_unlock(&ctl->tree_lock); 4056 4057 if (bytes) 4058 goto again; 4059 4060 if (info) 4061 kmem_cache_free(btrfs_free_space_cachep, info); 4062 if (map) 4063 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4064 return 0; 4065 } 4066 4067 /* 4068 * Checks to see if the given range is in the free space cache. This is really 4069 * just used to check the absence of space, so if there is free space in the 4070 * range at all we will return 1. 4071 */ 4072 int test_check_exists(struct btrfs_block_group *cache, 4073 u64 offset, u64 bytes) 4074 { 4075 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4076 struct btrfs_free_space *info; 4077 int ret = 0; 4078 4079 spin_lock(&ctl->tree_lock); 4080 info = tree_search_offset(ctl, offset, 0, 0); 4081 if (!info) { 4082 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4083 1, 0); 4084 if (!info) 4085 goto out; 4086 } 4087 4088 have_info: 4089 if (info->bitmap) { 4090 u64 bit_off, bit_bytes; 4091 struct rb_node *n; 4092 struct btrfs_free_space *tmp; 4093 4094 bit_off = offset; 4095 bit_bytes = ctl->unit; 4096 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4097 if (!ret) { 4098 if (bit_off == offset) { 4099 ret = 1; 4100 goto out; 4101 } else if (bit_off > offset && 4102 offset + bytes > bit_off) { 4103 ret = 1; 4104 goto out; 4105 } 4106 } 4107 4108 n = rb_prev(&info->offset_index); 4109 while (n) { 4110 tmp = rb_entry(n, struct btrfs_free_space, 4111 offset_index); 4112 if (tmp->offset + tmp->bytes < offset) 4113 break; 4114 if (offset + bytes < tmp->offset) { 4115 n = rb_prev(&tmp->offset_index); 4116 continue; 4117 } 4118 info = tmp; 4119 goto have_info; 4120 } 4121 4122 n = rb_next(&info->offset_index); 4123 while (n) { 4124 tmp = rb_entry(n, struct btrfs_free_space, 4125 offset_index); 4126 if (offset + bytes < tmp->offset) 4127 break; 4128 if (tmp->offset + tmp->bytes < offset) { 4129 n = rb_next(&tmp->offset_index); 4130 continue; 4131 } 4132 info = tmp; 4133 goto have_info; 4134 } 4135 4136 ret = 0; 4137 goto out; 4138 } 4139 4140 if (info->offset == offset) { 4141 ret = 1; 4142 goto out; 4143 } 4144 4145 if (offset > info->offset && offset < info->offset + info->bytes) 4146 ret = 1; 4147 out: 4148 spin_unlock(&ctl->tree_lock); 4149 return ret; 4150 } 4151 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4152