1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6 #include <linux/pagemap.h> 7 #include <linux/sched.h> 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/math64.h> 11 #include <linux/ratelimit.h> 12 #include <linux/error-injection.h> 13 #include <linux/sched/mm.h> 14 #include "misc.h" 15 #include "ctree.h" 16 #include "free-space-cache.h" 17 #include "transaction.h" 18 #include "disk-io.h" 19 #include "extent_io.h" 20 #include "volumes.h" 21 #include "space-info.h" 22 #include "delalloc-space.h" 23 #include "block-group.h" 24 #include "discard.h" 25 26 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 27 #define MAX_CACHE_BYTES_PER_GIG SZ_64K 28 #define FORCE_EXTENT_THRESHOLD SZ_1M 29 30 struct btrfs_trim_range { 31 u64 start; 32 u64 bytes; 33 struct list_head list; 34 }; 35 36 static int link_free_space(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *info); 38 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 39 struct btrfs_free_space *info); 40 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 41 struct btrfs_free_space *bitmap_info, u64 *offset, 42 u64 *bytes, bool for_alloc); 43 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *bitmap_info); 45 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 46 struct btrfs_free_space *info, u64 offset, 47 u64 bytes); 48 49 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 50 struct btrfs_path *path, 51 u64 offset) 52 { 53 struct btrfs_fs_info *fs_info = root->fs_info; 54 struct btrfs_key key; 55 struct btrfs_key location; 56 struct btrfs_disk_key disk_key; 57 struct btrfs_free_space_header *header; 58 struct extent_buffer *leaf; 59 struct inode *inode = NULL; 60 unsigned nofs_flag; 61 int ret; 62 63 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 64 key.offset = offset; 65 key.type = 0; 66 67 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 68 if (ret < 0) 69 return ERR_PTR(ret); 70 if (ret > 0) { 71 btrfs_release_path(path); 72 return ERR_PTR(-ENOENT); 73 } 74 75 leaf = path->nodes[0]; 76 header = btrfs_item_ptr(leaf, path->slots[0], 77 struct btrfs_free_space_header); 78 btrfs_free_space_key(leaf, header, &disk_key); 79 btrfs_disk_key_to_cpu(&location, &disk_key); 80 btrfs_release_path(path); 81 82 /* 83 * We are often under a trans handle at this point, so we need to make 84 * sure NOFS is set to keep us from deadlocking. 85 */ 86 nofs_flag = memalloc_nofs_save(); 87 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 88 btrfs_release_path(path); 89 memalloc_nofs_restore(nofs_flag); 90 if (IS_ERR(inode)) 91 return inode; 92 93 mapping_set_gfp_mask(inode->i_mapping, 94 mapping_gfp_constraint(inode->i_mapping, 95 ~(__GFP_FS | __GFP_HIGHMEM))); 96 97 return inode; 98 } 99 100 struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 101 struct btrfs_path *path) 102 { 103 struct btrfs_fs_info *fs_info = block_group->fs_info; 104 struct inode *inode = NULL; 105 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 106 107 spin_lock(&block_group->lock); 108 if (block_group->inode) 109 inode = igrab(block_group->inode); 110 spin_unlock(&block_group->lock); 111 if (inode) 112 return inode; 113 114 inode = __lookup_free_space_inode(fs_info->tree_root, path, 115 block_group->start); 116 if (IS_ERR(inode)) 117 return inode; 118 119 spin_lock(&block_group->lock); 120 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 121 btrfs_info(fs_info, "Old style space inode found, converting."); 122 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 123 BTRFS_INODE_NODATACOW; 124 block_group->disk_cache_state = BTRFS_DC_CLEAR; 125 } 126 127 if (!block_group->iref) { 128 block_group->inode = igrab(inode); 129 block_group->iref = 1; 130 } 131 spin_unlock(&block_group->lock); 132 133 return inode; 134 } 135 136 static int __create_free_space_inode(struct btrfs_root *root, 137 struct btrfs_trans_handle *trans, 138 struct btrfs_path *path, 139 u64 ino, u64 offset) 140 { 141 struct btrfs_key key; 142 struct btrfs_disk_key disk_key; 143 struct btrfs_free_space_header *header; 144 struct btrfs_inode_item *inode_item; 145 struct extent_buffer *leaf; 146 /* We inline CRCs for the free disk space cache */ 147 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC | 148 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 149 int ret; 150 151 ret = btrfs_insert_empty_inode(trans, root, path, ino); 152 if (ret) 153 return ret; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193 } 194 195 int create_free_space_inode(struct btrfs_trans_handle *trans, 196 struct btrfs_block_group *block_group, 197 struct btrfs_path *path) 198 { 199 int ret; 200 u64 ino; 201 202 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino); 203 if (ret < 0) 204 return ret; 205 206 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 207 ino, block_group->start); 208 } 209 210 /* 211 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode 212 * handles lookup, otherwise it takes ownership and iputs the inode. 213 * Don't reuse an inode pointer after passing it into this function. 214 */ 215 int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans, 216 struct inode *inode, 217 struct btrfs_block_group *block_group) 218 { 219 struct btrfs_path *path; 220 struct btrfs_key key; 221 int ret = 0; 222 223 path = btrfs_alloc_path(); 224 if (!path) 225 return -ENOMEM; 226 227 if (!inode) 228 inode = lookup_free_space_inode(block_group, path); 229 if (IS_ERR(inode)) { 230 if (PTR_ERR(inode) != -ENOENT) 231 ret = PTR_ERR(inode); 232 goto out; 233 } 234 ret = btrfs_orphan_add(trans, BTRFS_I(inode)); 235 if (ret) { 236 btrfs_add_delayed_iput(inode); 237 goto out; 238 } 239 clear_nlink(inode); 240 /* One for the block groups ref */ 241 spin_lock(&block_group->lock); 242 if (block_group->iref) { 243 block_group->iref = 0; 244 block_group->inode = NULL; 245 spin_unlock(&block_group->lock); 246 iput(inode); 247 } else { 248 spin_unlock(&block_group->lock); 249 } 250 /* One for the lookup ref */ 251 btrfs_add_delayed_iput(inode); 252 253 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 254 key.type = 0; 255 key.offset = block_group->start; 256 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path, 257 -1, 1); 258 if (ret) { 259 if (ret > 0) 260 ret = 0; 261 goto out; 262 } 263 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path); 264 out: 265 btrfs_free_path(path); 266 return ret; 267 } 268 269 int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 270 struct btrfs_block_rsv *rsv) 271 { 272 u64 needed_bytes; 273 int ret; 274 275 /* 1 for slack space, 1 for updating the inode */ 276 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 277 btrfs_calc_metadata_size(fs_info, 1); 278 279 spin_lock(&rsv->lock); 280 if (rsv->reserved < needed_bytes) 281 ret = -ENOSPC; 282 else 283 ret = 0; 284 spin_unlock(&rsv->lock); 285 return ret; 286 } 287 288 int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 289 struct btrfs_block_group *block_group, 290 struct inode *inode) 291 { 292 struct btrfs_root *root = BTRFS_I(inode)->root; 293 int ret = 0; 294 bool locked = false; 295 296 if (block_group) { 297 struct btrfs_path *path = btrfs_alloc_path(); 298 299 if (!path) { 300 ret = -ENOMEM; 301 goto fail; 302 } 303 locked = true; 304 mutex_lock(&trans->transaction->cache_write_mutex); 305 if (!list_empty(&block_group->io_list)) { 306 list_del_init(&block_group->io_list); 307 308 btrfs_wait_cache_io(trans, block_group, path); 309 btrfs_put_block_group(block_group); 310 } 311 312 /* 313 * now that we've truncated the cache away, its no longer 314 * setup or written 315 */ 316 spin_lock(&block_group->lock); 317 block_group->disk_cache_state = BTRFS_DC_CLEAR; 318 spin_unlock(&block_group->lock); 319 btrfs_free_path(path); 320 } 321 322 btrfs_i_size_write(BTRFS_I(inode), 0); 323 truncate_pagecache(inode, 0); 324 325 /* 326 * We skip the throttling logic for free space cache inodes, so we don't 327 * need to check for -EAGAIN. 328 */ 329 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode), 330 0, BTRFS_EXTENT_DATA_KEY); 331 if (ret) 332 goto fail; 333 334 ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); 335 336 fail: 337 if (locked) 338 mutex_unlock(&trans->transaction->cache_write_mutex); 339 if (ret) 340 btrfs_abort_transaction(trans, ret); 341 342 return ret; 343 } 344 345 static void readahead_cache(struct inode *inode) 346 { 347 struct file_ra_state *ra; 348 unsigned long last_index; 349 350 ra = kzalloc(sizeof(*ra), GFP_NOFS); 351 if (!ra) 352 return; 353 354 file_ra_state_init(ra, inode->i_mapping); 355 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 356 357 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 358 359 kfree(ra); 360 } 361 362 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 363 int write) 364 { 365 int num_pages; 366 367 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 368 369 /* Make sure we can fit our crcs and generation into the first page */ 370 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 371 return -ENOSPC; 372 373 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 374 375 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 376 if (!io_ctl->pages) 377 return -ENOMEM; 378 379 io_ctl->num_pages = num_pages; 380 io_ctl->fs_info = btrfs_sb(inode->i_sb); 381 io_ctl->inode = inode; 382 383 return 0; 384 } 385 ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 386 387 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 388 { 389 kfree(io_ctl->pages); 390 io_ctl->pages = NULL; 391 } 392 393 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 394 { 395 if (io_ctl->cur) { 396 io_ctl->cur = NULL; 397 io_ctl->orig = NULL; 398 } 399 } 400 401 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 402 { 403 ASSERT(io_ctl->index < io_ctl->num_pages); 404 io_ctl->page = io_ctl->pages[io_ctl->index++]; 405 io_ctl->cur = page_address(io_ctl->page); 406 io_ctl->orig = io_ctl->cur; 407 io_ctl->size = PAGE_SIZE; 408 if (clear) 409 clear_page(io_ctl->cur); 410 } 411 412 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 413 { 414 int i; 415 416 io_ctl_unmap_page(io_ctl); 417 418 for (i = 0; i < io_ctl->num_pages; i++) { 419 if (io_ctl->pages[i]) { 420 ClearPageChecked(io_ctl->pages[i]); 421 unlock_page(io_ctl->pages[i]); 422 put_page(io_ctl->pages[i]); 423 } 424 } 425 } 426 427 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 428 { 429 struct page *page; 430 struct inode *inode = io_ctl->inode; 431 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 432 int i; 433 434 for (i = 0; i < io_ctl->num_pages; i++) { 435 int ret; 436 437 page = find_or_create_page(inode->i_mapping, i, mask); 438 if (!page) { 439 io_ctl_drop_pages(io_ctl); 440 return -ENOMEM; 441 } 442 443 ret = set_page_extent_mapped(page); 444 if (ret < 0) { 445 unlock_page(page); 446 put_page(page); 447 io_ctl_drop_pages(io_ctl); 448 return ret; 449 } 450 451 io_ctl->pages[i] = page; 452 if (uptodate && !PageUptodate(page)) { 453 btrfs_readpage(NULL, page); 454 lock_page(page); 455 if (page->mapping != inode->i_mapping) { 456 btrfs_err(BTRFS_I(inode)->root->fs_info, 457 "free space cache page truncated"); 458 io_ctl_drop_pages(io_ctl); 459 return -EIO; 460 } 461 if (!PageUptodate(page)) { 462 btrfs_err(BTRFS_I(inode)->root->fs_info, 463 "error reading free space cache"); 464 io_ctl_drop_pages(io_ctl); 465 return -EIO; 466 } 467 } 468 } 469 470 for (i = 0; i < io_ctl->num_pages; i++) 471 clear_page_dirty_for_io(io_ctl->pages[i]); 472 473 return 0; 474 } 475 476 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 477 { 478 io_ctl_map_page(io_ctl, 1); 479 480 /* 481 * Skip the csum areas. If we don't check crcs then we just have a 482 * 64bit chunk at the front of the first page. 483 */ 484 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 485 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 486 487 put_unaligned_le64(generation, io_ctl->cur); 488 io_ctl->cur += sizeof(u64); 489 } 490 491 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 492 { 493 u64 cache_gen; 494 495 /* 496 * Skip the crc area. If we don't check crcs then we just have a 64bit 497 * chunk at the front of the first page. 498 */ 499 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 500 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 501 502 cache_gen = get_unaligned_le64(io_ctl->cur); 503 if (cache_gen != generation) { 504 btrfs_err_rl(io_ctl->fs_info, 505 "space cache generation (%llu) does not match inode (%llu)", 506 cache_gen, generation); 507 io_ctl_unmap_page(io_ctl); 508 return -EIO; 509 } 510 io_ctl->cur += sizeof(u64); 511 return 0; 512 } 513 514 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 515 { 516 u32 *tmp; 517 u32 crc = ~(u32)0; 518 unsigned offset = 0; 519 520 if (index == 0) 521 offset = sizeof(u32) * io_ctl->num_pages; 522 523 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 524 btrfs_crc32c_final(crc, (u8 *)&crc); 525 io_ctl_unmap_page(io_ctl); 526 tmp = page_address(io_ctl->pages[0]); 527 tmp += index; 528 *tmp = crc; 529 } 530 531 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 532 { 533 u32 *tmp, val; 534 u32 crc = ~(u32)0; 535 unsigned offset = 0; 536 537 if (index == 0) 538 offset = sizeof(u32) * io_ctl->num_pages; 539 540 tmp = page_address(io_ctl->pages[0]); 541 tmp += index; 542 val = *tmp; 543 544 io_ctl_map_page(io_ctl, 0); 545 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 546 btrfs_crc32c_final(crc, (u8 *)&crc); 547 if (val != crc) { 548 btrfs_err_rl(io_ctl->fs_info, 549 "csum mismatch on free space cache"); 550 io_ctl_unmap_page(io_ctl); 551 return -EIO; 552 } 553 554 return 0; 555 } 556 557 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 558 void *bitmap) 559 { 560 struct btrfs_free_space_entry *entry; 561 562 if (!io_ctl->cur) 563 return -ENOSPC; 564 565 entry = io_ctl->cur; 566 put_unaligned_le64(offset, &entry->offset); 567 put_unaligned_le64(bytes, &entry->bytes); 568 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 569 BTRFS_FREE_SPACE_EXTENT; 570 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 571 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 572 573 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 574 return 0; 575 576 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 577 578 /* No more pages to map */ 579 if (io_ctl->index >= io_ctl->num_pages) 580 return 0; 581 582 /* map the next page */ 583 io_ctl_map_page(io_ctl, 1); 584 return 0; 585 } 586 587 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 588 { 589 if (!io_ctl->cur) 590 return -ENOSPC; 591 592 /* 593 * If we aren't at the start of the current page, unmap this one and 594 * map the next one if there is any left. 595 */ 596 if (io_ctl->cur != io_ctl->orig) { 597 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 598 if (io_ctl->index >= io_ctl->num_pages) 599 return -ENOSPC; 600 io_ctl_map_page(io_ctl, 0); 601 } 602 603 copy_page(io_ctl->cur, bitmap); 604 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 605 if (io_ctl->index < io_ctl->num_pages) 606 io_ctl_map_page(io_ctl, 0); 607 return 0; 608 } 609 610 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 611 { 612 /* 613 * If we're not on the boundary we know we've modified the page and we 614 * need to crc the page. 615 */ 616 if (io_ctl->cur != io_ctl->orig) 617 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 618 else 619 io_ctl_unmap_page(io_ctl); 620 621 while (io_ctl->index < io_ctl->num_pages) { 622 io_ctl_map_page(io_ctl, 1); 623 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 624 } 625 } 626 627 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 628 struct btrfs_free_space *entry, u8 *type) 629 { 630 struct btrfs_free_space_entry *e; 631 int ret; 632 633 if (!io_ctl->cur) { 634 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 635 if (ret) 636 return ret; 637 } 638 639 e = io_ctl->cur; 640 entry->offset = get_unaligned_le64(&e->offset); 641 entry->bytes = get_unaligned_le64(&e->bytes); 642 *type = e->type; 643 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 644 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 645 646 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 647 return 0; 648 649 io_ctl_unmap_page(io_ctl); 650 651 return 0; 652 } 653 654 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 655 struct btrfs_free_space *entry) 656 { 657 int ret; 658 659 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 660 if (ret) 661 return ret; 662 663 copy_page(entry->bitmap, io_ctl->cur); 664 io_ctl_unmap_page(io_ctl); 665 666 return 0; 667 } 668 669 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 670 { 671 struct btrfs_block_group *block_group = ctl->private; 672 u64 max_bytes; 673 u64 bitmap_bytes; 674 u64 extent_bytes; 675 u64 size = block_group->length; 676 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 677 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 678 679 max_bitmaps = max_t(u64, max_bitmaps, 1); 680 681 ASSERT(ctl->total_bitmaps <= max_bitmaps); 682 683 /* 684 * We are trying to keep the total amount of memory used per 1GiB of 685 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 686 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 687 * bitmaps, we may end up using more memory than this. 688 */ 689 if (size < SZ_1G) 690 max_bytes = MAX_CACHE_BYTES_PER_GIG; 691 else 692 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 693 694 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 695 696 /* 697 * we want the extent entry threshold to always be at most 1/2 the max 698 * bytes we can have, or whatever is less than that. 699 */ 700 extent_bytes = max_bytes - bitmap_bytes; 701 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 702 703 ctl->extents_thresh = 704 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 705 } 706 707 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 708 struct btrfs_free_space_ctl *ctl, 709 struct btrfs_path *path, u64 offset) 710 { 711 struct btrfs_fs_info *fs_info = root->fs_info; 712 struct btrfs_free_space_header *header; 713 struct extent_buffer *leaf; 714 struct btrfs_io_ctl io_ctl; 715 struct btrfs_key key; 716 struct btrfs_free_space *e, *n; 717 LIST_HEAD(bitmaps); 718 u64 num_entries; 719 u64 num_bitmaps; 720 u64 generation; 721 u8 type; 722 int ret = 0; 723 724 /* Nothing in the space cache, goodbye */ 725 if (!i_size_read(inode)) 726 return 0; 727 728 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 729 key.offset = offset; 730 key.type = 0; 731 732 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 733 if (ret < 0) 734 return 0; 735 else if (ret > 0) { 736 btrfs_release_path(path); 737 return 0; 738 } 739 740 ret = -1; 741 742 leaf = path->nodes[0]; 743 header = btrfs_item_ptr(leaf, path->slots[0], 744 struct btrfs_free_space_header); 745 num_entries = btrfs_free_space_entries(leaf, header); 746 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 747 generation = btrfs_free_space_generation(leaf, header); 748 btrfs_release_path(path); 749 750 if (!BTRFS_I(inode)->generation) { 751 btrfs_info(fs_info, 752 "the free space cache file (%llu) is invalid, skip it", 753 offset); 754 return 0; 755 } 756 757 if (BTRFS_I(inode)->generation != generation) { 758 btrfs_err(fs_info, 759 "free space inode generation (%llu) did not match free space cache generation (%llu)", 760 BTRFS_I(inode)->generation, generation); 761 return 0; 762 } 763 764 if (!num_entries) 765 return 0; 766 767 ret = io_ctl_init(&io_ctl, inode, 0); 768 if (ret) 769 return ret; 770 771 readahead_cache(inode); 772 773 ret = io_ctl_prepare_pages(&io_ctl, true); 774 if (ret) 775 goto out; 776 777 ret = io_ctl_check_crc(&io_ctl, 0); 778 if (ret) 779 goto free_cache; 780 781 ret = io_ctl_check_generation(&io_ctl, generation); 782 if (ret) 783 goto free_cache; 784 785 while (num_entries) { 786 e = kmem_cache_zalloc(btrfs_free_space_cachep, 787 GFP_NOFS); 788 if (!e) { 789 ret = -ENOMEM; 790 goto free_cache; 791 } 792 793 ret = io_ctl_read_entry(&io_ctl, e, &type); 794 if (ret) { 795 kmem_cache_free(btrfs_free_space_cachep, e); 796 goto free_cache; 797 } 798 799 if (!e->bytes) { 800 ret = -1; 801 kmem_cache_free(btrfs_free_space_cachep, e); 802 goto free_cache; 803 } 804 805 if (type == BTRFS_FREE_SPACE_EXTENT) { 806 spin_lock(&ctl->tree_lock); 807 ret = link_free_space(ctl, e); 808 spin_unlock(&ctl->tree_lock); 809 if (ret) { 810 btrfs_err(fs_info, 811 "Duplicate entries in free space cache, dumping"); 812 kmem_cache_free(btrfs_free_space_cachep, e); 813 goto free_cache; 814 } 815 } else { 816 ASSERT(num_bitmaps); 817 num_bitmaps--; 818 e->bitmap = kmem_cache_zalloc( 819 btrfs_free_space_bitmap_cachep, GFP_NOFS); 820 if (!e->bitmap) { 821 ret = -ENOMEM; 822 kmem_cache_free( 823 btrfs_free_space_cachep, e); 824 goto free_cache; 825 } 826 spin_lock(&ctl->tree_lock); 827 ret = link_free_space(ctl, e); 828 ctl->total_bitmaps++; 829 recalculate_thresholds(ctl); 830 spin_unlock(&ctl->tree_lock); 831 if (ret) { 832 btrfs_err(fs_info, 833 "Duplicate entries in free space cache, dumping"); 834 kmem_cache_free(btrfs_free_space_cachep, e); 835 goto free_cache; 836 } 837 list_add_tail(&e->list, &bitmaps); 838 } 839 840 num_entries--; 841 } 842 843 io_ctl_unmap_page(&io_ctl); 844 845 /* 846 * We add the bitmaps at the end of the entries in order that 847 * the bitmap entries are added to the cache. 848 */ 849 list_for_each_entry_safe(e, n, &bitmaps, list) { 850 list_del_init(&e->list); 851 ret = io_ctl_read_bitmap(&io_ctl, e); 852 if (ret) 853 goto free_cache; 854 } 855 856 io_ctl_drop_pages(&io_ctl); 857 ret = 1; 858 out: 859 io_ctl_free(&io_ctl); 860 return ret; 861 free_cache: 862 io_ctl_drop_pages(&io_ctl); 863 __btrfs_remove_free_space_cache(ctl); 864 goto out; 865 } 866 867 static int copy_free_space_cache(struct btrfs_block_group *block_group, 868 struct btrfs_free_space_ctl *ctl) 869 { 870 struct btrfs_free_space *info; 871 struct rb_node *n; 872 int ret = 0; 873 874 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) { 875 info = rb_entry(n, struct btrfs_free_space, offset_index); 876 if (!info->bitmap) { 877 unlink_free_space(ctl, info); 878 ret = btrfs_add_free_space(block_group, info->offset, 879 info->bytes); 880 kmem_cache_free(btrfs_free_space_cachep, info); 881 } else { 882 u64 offset = info->offset; 883 u64 bytes = ctl->unit; 884 885 while (search_bitmap(ctl, info, &offset, &bytes, 886 false) == 0) { 887 ret = btrfs_add_free_space(block_group, offset, 888 bytes); 889 if (ret) 890 break; 891 bitmap_clear_bits(ctl, info, offset, bytes); 892 offset = info->offset; 893 bytes = ctl->unit; 894 } 895 free_bitmap(ctl, info); 896 } 897 cond_resched(); 898 } 899 return ret; 900 } 901 902 int load_free_space_cache(struct btrfs_block_group *block_group) 903 { 904 struct btrfs_fs_info *fs_info = block_group->fs_info; 905 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 906 struct btrfs_free_space_ctl tmp_ctl = {}; 907 struct inode *inode; 908 struct btrfs_path *path; 909 int ret = 0; 910 bool matched; 911 u64 used = block_group->used; 912 913 /* 914 * Because we could potentially discard our loaded free space, we want 915 * to load everything into a temporary structure first, and then if it's 916 * valid copy it all into the actual free space ctl. 917 */ 918 btrfs_init_free_space_ctl(block_group, &tmp_ctl); 919 920 /* 921 * If this block group has been marked to be cleared for one reason or 922 * another then we can't trust the on disk cache, so just return. 923 */ 924 spin_lock(&block_group->lock); 925 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 926 spin_unlock(&block_group->lock); 927 return 0; 928 } 929 spin_unlock(&block_group->lock); 930 931 path = btrfs_alloc_path(); 932 if (!path) 933 return 0; 934 path->search_commit_root = 1; 935 path->skip_locking = 1; 936 937 /* 938 * We must pass a path with search_commit_root set to btrfs_iget in 939 * order to avoid a deadlock when allocating extents for the tree root. 940 * 941 * When we are COWing an extent buffer from the tree root, when looking 942 * for a free extent, at extent-tree.c:find_free_extent(), we can find 943 * block group without its free space cache loaded. When we find one 944 * we must load its space cache which requires reading its free space 945 * cache's inode item from the root tree. If this inode item is located 946 * in the same leaf that we started COWing before, then we end up in 947 * deadlock on the extent buffer (trying to read lock it when we 948 * previously write locked it). 949 * 950 * It's safe to read the inode item using the commit root because 951 * block groups, once loaded, stay in memory forever (until they are 952 * removed) as well as their space caches once loaded. New block groups 953 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 954 * we will never try to read their inode item while the fs is mounted. 955 */ 956 inode = lookup_free_space_inode(block_group, path); 957 if (IS_ERR(inode)) { 958 btrfs_free_path(path); 959 return 0; 960 } 961 962 /* We may have converted the inode and made the cache invalid. */ 963 spin_lock(&block_group->lock); 964 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 965 spin_unlock(&block_group->lock); 966 btrfs_free_path(path); 967 goto out; 968 } 969 spin_unlock(&block_group->lock); 970 971 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl, 972 path, block_group->start); 973 btrfs_free_path(path); 974 if (ret <= 0) 975 goto out; 976 977 matched = (tmp_ctl.free_space == (block_group->length - used - 978 block_group->bytes_super)); 979 980 if (matched) { 981 ret = copy_free_space_cache(block_group, &tmp_ctl); 982 /* 983 * ret == 1 means we successfully loaded the free space cache, 984 * so we need to re-set it here. 985 */ 986 if (ret == 0) 987 ret = 1; 988 } else { 989 __btrfs_remove_free_space_cache(&tmp_ctl); 990 btrfs_warn(fs_info, 991 "block group %llu has wrong amount of free space", 992 block_group->start); 993 ret = -1; 994 } 995 out: 996 if (ret < 0) { 997 /* This cache is bogus, make sure it gets cleared */ 998 spin_lock(&block_group->lock); 999 block_group->disk_cache_state = BTRFS_DC_CLEAR; 1000 spin_unlock(&block_group->lock); 1001 ret = 0; 1002 1003 btrfs_warn(fs_info, 1004 "failed to load free space cache for block group %llu, rebuilding it now", 1005 block_group->start); 1006 } 1007 1008 spin_lock(&ctl->tree_lock); 1009 btrfs_discard_update_discardable(block_group); 1010 spin_unlock(&ctl->tree_lock); 1011 iput(inode); 1012 return ret; 1013 } 1014 1015 static noinline_for_stack 1016 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 1017 struct btrfs_free_space_ctl *ctl, 1018 struct btrfs_block_group *block_group, 1019 int *entries, int *bitmaps, 1020 struct list_head *bitmap_list) 1021 { 1022 int ret; 1023 struct btrfs_free_cluster *cluster = NULL; 1024 struct btrfs_free_cluster *cluster_locked = NULL; 1025 struct rb_node *node = rb_first(&ctl->free_space_offset); 1026 struct btrfs_trim_range *trim_entry; 1027 1028 /* Get the cluster for this block_group if it exists */ 1029 if (block_group && !list_empty(&block_group->cluster_list)) { 1030 cluster = list_entry(block_group->cluster_list.next, 1031 struct btrfs_free_cluster, 1032 block_group_list); 1033 } 1034 1035 if (!node && cluster) { 1036 cluster_locked = cluster; 1037 spin_lock(&cluster_locked->lock); 1038 node = rb_first(&cluster->root); 1039 cluster = NULL; 1040 } 1041 1042 /* Write out the extent entries */ 1043 while (node) { 1044 struct btrfs_free_space *e; 1045 1046 e = rb_entry(node, struct btrfs_free_space, offset_index); 1047 *entries += 1; 1048 1049 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 1050 e->bitmap); 1051 if (ret) 1052 goto fail; 1053 1054 if (e->bitmap) { 1055 list_add_tail(&e->list, bitmap_list); 1056 *bitmaps += 1; 1057 } 1058 node = rb_next(node); 1059 if (!node && cluster) { 1060 node = rb_first(&cluster->root); 1061 cluster_locked = cluster; 1062 spin_lock(&cluster_locked->lock); 1063 cluster = NULL; 1064 } 1065 } 1066 if (cluster_locked) { 1067 spin_unlock(&cluster_locked->lock); 1068 cluster_locked = NULL; 1069 } 1070 1071 /* 1072 * Make sure we don't miss any range that was removed from our rbtree 1073 * because trimming is running. Otherwise after a umount+mount (or crash 1074 * after committing the transaction) we would leak free space and get 1075 * an inconsistent free space cache report from fsck. 1076 */ 1077 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1078 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1079 trim_entry->bytes, NULL); 1080 if (ret) 1081 goto fail; 1082 *entries += 1; 1083 } 1084 1085 return 0; 1086 fail: 1087 if (cluster_locked) 1088 spin_unlock(&cluster_locked->lock); 1089 return -ENOSPC; 1090 } 1091 1092 static noinline_for_stack int 1093 update_cache_item(struct btrfs_trans_handle *trans, 1094 struct btrfs_root *root, 1095 struct inode *inode, 1096 struct btrfs_path *path, u64 offset, 1097 int entries, int bitmaps) 1098 { 1099 struct btrfs_key key; 1100 struct btrfs_free_space_header *header; 1101 struct extent_buffer *leaf; 1102 int ret; 1103 1104 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1105 key.offset = offset; 1106 key.type = 0; 1107 1108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1109 if (ret < 0) { 1110 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1111 EXTENT_DELALLOC, 0, 0, NULL); 1112 goto fail; 1113 } 1114 leaf = path->nodes[0]; 1115 if (ret > 0) { 1116 struct btrfs_key found_key; 1117 ASSERT(path->slots[0]); 1118 path->slots[0]--; 1119 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1120 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1121 found_key.offset != offset) { 1122 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1123 inode->i_size - 1, EXTENT_DELALLOC, 0, 1124 0, NULL); 1125 btrfs_release_path(path); 1126 goto fail; 1127 } 1128 } 1129 1130 BTRFS_I(inode)->generation = trans->transid; 1131 header = btrfs_item_ptr(leaf, path->slots[0], 1132 struct btrfs_free_space_header); 1133 btrfs_set_free_space_entries(leaf, header, entries); 1134 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1135 btrfs_set_free_space_generation(leaf, header, trans->transid); 1136 btrfs_mark_buffer_dirty(leaf); 1137 btrfs_release_path(path); 1138 1139 return 0; 1140 1141 fail: 1142 return -1; 1143 } 1144 1145 static noinline_for_stack int write_pinned_extent_entries( 1146 struct btrfs_trans_handle *trans, 1147 struct btrfs_block_group *block_group, 1148 struct btrfs_io_ctl *io_ctl, 1149 int *entries) 1150 { 1151 u64 start, extent_start, extent_end, len; 1152 struct extent_io_tree *unpin = NULL; 1153 int ret; 1154 1155 if (!block_group) 1156 return 0; 1157 1158 /* 1159 * We want to add any pinned extents to our free space cache 1160 * so we don't leak the space 1161 * 1162 * We shouldn't have switched the pinned extents yet so this is the 1163 * right one 1164 */ 1165 unpin = &trans->transaction->pinned_extents; 1166 1167 start = block_group->start; 1168 1169 while (start < block_group->start + block_group->length) { 1170 ret = find_first_extent_bit(unpin, start, 1171 &extent_start, &extent_end, 1172 EXTENT_DIRTY, NULL); 1173 if (ret) 1174 return 0; 1175 1176 /* This pinned extent is out of our range */ 1177 if (extent_start >= block_group->start + block_group->length) 1178 return 0; 1179 1180 extent_start = max(extent_start, start); 1181 extent_end = min(block_group->start + block_group->length, 1182 extent_end + 1); 1183 len = extent_end - extent_start; 1184 1185 *entries += 1; 1186 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1187 if (ret) 1188 return -ENOSPC; 1189 1190 start = extent_end; 1191 } 1192 1193 return 0; 1194 } 1195 1196 static noinline_for_stack int 1197 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1198 { 1199 struct btrfs_free_space *entry, *next; 1200 int ret; 1201 1202 /* Write out the bitmaps */ 1203 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1204 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1205 if (ret) 1206 return -ENOSPC; 1207 list_del_init(&entry->list); 1208 } 1209 1210 return 0; 1211 } 1212 1213 static int flush_dirty_cache(struct inode *inode) 1214 { 1215 int ret; 1216 1217 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1218 if (ret) 1219 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1220 EXTENT_DELALLOC, 0, 0, NULL); 1221 1222 return ret; 1223 } 1224 1225 static void noinline_for_stack 1226 cleanup_bitmap_list(struct list_head *bitmap_list) 1227 { 1228 struct btrfs_free_space *entry, *next; 1229 1230 list_for_each_entry_safe(entry, next, bitmap_list, list) 1231 list_del_init(&entry->list); 1232 } 1233 1234 static void noinline_for_stack 1235 cleanup_write_cache_enospc(struct inode *inode, 1236 struct btrfs_io_ctl *io_ctl, 1237 struct extent_state **cached_state) 1238 { 1239 io_ctl_drop_pages(io_ctl); 1240 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1241 i_size_read(inode) - 1, cached_state); 1242 } 1243 1244 static int __btrfs_wait_cache_io(struct btrfs_root *root, 1245 struct btrfs_trans_handle *trans, 1246 struct btrfs_block_group *block_group, 1247 struct btrfs_io_ctl *io_ctl, 1248 struct btrfs_path *path, u64 offset) 1249 { 1250 int ret; 1251 struct inode *inode = io_ctl->inode; 1252 1253 if (!inode) 1254 return 0; 1255 1256 /* Flush the dirty pages in the cache file. */ 1257 ret = flush_dirty_cache(inode); 1258 if (ret) 1259 goto out; 1260 1261 /* Update the cache item to tell everyone this cache file is valid. */ 1262 ret = update_cache_item(trans, root, inode, path, offset, 1263 io_ctl->entries, io_ctl->bitmaps); 1264 out: 1265 if (ret) { 1266 invalidate_inode_pages2(inode->i_mapping); 1267 BTRFS_I(inode)->generation = 0; 1268 if (block_group) 1269 btrfs_debug(root->fs_info, 1270 "failed to write free space cache for block group %llu error %d", 1271 block_group->start, ret); 1272 } 1273 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1274 1275 if (block_group) { 1276 /* the dirty list is protected by the dirty_bgs_lock */ 1277 spin_lock(&trans->transaction->dirty_bgs_lock); 1278 1279 /* the disk_cache_state is protected by the block group lock */ 1280 spin_lock(&block_group->lock); 1281 1282 /* 1283 * only mark this as written if we didn't get put back on 1284 * the dirty list while waiting for IO. Otherwise our 1285 * cache state won't be right, and we won't get written again 1286 */ 1287 if (!ret && list_empty(&block_group->dirty_list)) 1288 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1289 else if (ret) 1290 block_group->disk_cache_state = BTRFS_DC_ERROR; 1291 1292 spin_unlock(&block_group->lock); 1293 spin_unlock(&trans->transaction->dirty_bgs_lock); 1294 io_ctl->inode = NULL; 1295 iput(inode); 1296 } 1297 1298 return ret; 1299 1300 } 1301 1302 int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1303 struct btrfs_block_group *block_group, 1304 struct btrfs_path *path) 1305 { 1306 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1307 block_group, &block_group->io_ctl, 1308 path, block_group->start); 1309 } 1310 1311 /** 1312 * Write out cached info to an inode 1313 * 1314 * @root: root the inode belongs to 1315 * @inode: freespace inode we are writing out 1316 * @ctl: free space cache we are going to write out 1317 * @block_group: block_group for this cache if it belongs to a block_group 1318 * @io_ctl: holds context for the io 1319 * @trans: the trans handle 1320 * 1321 * This function writes out a free space cache struct to disk for quick recovery 1322 * on mount. This will return 0 if it was successful in writing the cache out, 1323 * or an errno if it was not. 1324 */ 1325 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1326 struct btrfs_free_space_ctl *ctl, 1327 struct btrfs_block_group *block_group, 1328 struct btrfs_io_ctl *io_ctl, 1329 struct btrfs_trans_handle *trans) 1330 { 1331 struct extent_state *cached_state = NULL; 1332 LIST_HEAD(bitmap_list); 1333 int entries = 0; 1334 int bitmaps = 0; 1335 int ret; 1336 int must_iput = 0; 1337 1338 if (!i_size_read(inode)) 1339 return -EIO; 1340 1341 WARN_ON(io_ctl->pages); 1342 ret = io_ctl_init(io_ctl, inode, 1); 1343 if (ret) 1344 return ret; 1345 1346 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1347 down_write(&block_group->data_rwsem); 1348 spin_lock(&block_group->lock); 1349 if (block_group->delalloc_bytes) { 1350 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1351 spin_unlock(&block_group->lock); 1352 up_write(&block_group->data_rwsem); 1353 BTRFS_I(inode)->generation = 0; 1354 ret = 0; 1355 must_iput = 1; 1356 goto out; 1357 } 1358 spin_unlock(&block_group->lock); 1359 } 1360 1361 /* Lock all pages first so we can lock the extent safely. */ 1362 ret = io_ctl_prepare_pages(io_ctl, false); 1363 if (ret) 1364 goto out_unlock; 1365 1366 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1367 &cached_state); 1368 1369 io_ctl_set_generation(io_ctl, trans->transid); 1370 1371 mutex_lock(&ctl->cache_writeout_mutex); 1372 /* Write out the extent entries in the free space cache */ 1373 spin_lock(&ctl->tree_lock); 1374 ret = write_cache_extent_entries(io_ctl, ctl, 1375 block_group, &entries, &bitmaps, 1376 &bitmap_list); 1377 if (ret) 1378 goto out_nospc_locked; 1379 1380 /* 1381 * Some spaces that are freed in the current transaction are pinned, 1382 * they will be added into free space cache after the transaction is 1383 * committed, we shouldn't lose them. 1384 * 1385 * If this changes while we are working we'll get added back to 1386 * the dirty list and redo it. No locking needed 1387 */ 1388 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1389 if (ret) 1390 goto out_nospc_locked; 1391 1392 /* 1393 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1394 * locked while doing it because a concurrent trim can be manipulating 1395 * or freeing the bitmap. 1396 */ 1397 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1398 spin_unlock(&ctl->tree_lock); 1399 mutex_unlock(&ctl->cache_writeout_mutex); 1400 if (ret) 1401 goto out_nospc; 1402 1403 /* Zero out the rest of the pages just to make sure */ 1404 io_ctl_zero_remaining_pages(io_ctl); 1405 1406 /* Everything is written out, now we dirty the pages in the file. */ 1407 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1408 io_ctl->num_pages, 0, i_size_read(inode), 1409 &cached_state, false); 1410 if (ret) 1411 goto out_nospc; 1412 1413 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1414 up_write(&block_group->data_rwsem); 1415 /* 1416 * Release the pages and unlock the extent, we will flush 1417 * them out later 1418 */ 1419 io_ctl_drop_pages(io_ctl); 1420 io_ctl_free(io_ctl); 1421 1422 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1423 i_size_read(inode) - 1, &cached_state); 1424 1425 /* 1426 * at this point the pages are under IO and we're happy, 1427 * The caller is responsible for waiting on them and updating 1428 * the cache and the inode 1429 */ 1430 io_ctl->entries = entries; 1431 io_ctl->bitmaps = bitmaps; 1432 1433 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1434 if (ret) 1435 goto out; 1436 1437 return 0; 1438 1439 out_nospc_locked: 1440 cleanup_bitmap_list(&bitmap_list); 1441 spin_unlock(&ctl->tree_lock); 1442 mutex_unlock(&ctl->cache_writeout_mutex); 1443 1444 out_nospc: 1445 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1446 1447 out_unlock: 1448 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1449 up_write(&block_group->data_rwsem); 1450 1451 out: 1452 io_ctl->inode = NULL; 1453 io_ctl_free(io_ctl); 1454 if (ret) { 1455 invalidate_inode_pages2(inode->i_mapping); 1456 BTRFS_I(inode)->generation = 0; 1457 } 1458 btrfs_update_inode(trans, root, BTRFS_I(inode)); 1459 if (must_iput) 1460 iput(inode); 1461 return ret; 1462 } 1463 1464 int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1465 struct btrfs_block_group *block_group, 1466 struct btrfs_path *path) 1467 { 1468 struct btrfs_fs_info *fs_info = trans->fs_info; 1469 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1470 struct inode *inode; 1471 int ret = 0; 1472 1473 spin_lock(&block_group->lock); 1474 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1475 spin_unlock(&block_group->lock); 1476 return 0; 1477 } 1478 spin_unlock(&block_group->lock); 1479 1480 inode = lookup_free_space_inode(block_group, path); 1481 if (IS_ERR(inode)) 1482 return 0; 1483 1484 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1485 block_group, &block_group->io_ctl, trans); 1486 if (ret) { 1487 btrfs_debug(fs_info, 1488 "failed to write free space cache for block group %llu error %d", 1489 block_group->start, ret); 1490 spin_lock(&block_group->lock); 1491 block_group->disk_cache_state = BTRFS_DC_ERROR; 1492 spin_unlock(&block_group->lock); 1493 1494 block_group->io_ctl.inode = NULL; 1495 iput(inode); 1496 } 1497 1498 /* 1499 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1500 * to wait for IO and put the inode 1501 */ 1502 1503 return ret; 1504 } 1505 1506 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1507 u64 offset) 1508 { 1509 ASSERT(offset >= bitmap_start); 1510 offset -= bitmap_start; 1511 return (unsigned long)(div_u64(offset, unit)); 1512 } 1513 1514 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1515 { 1516 return (unsigned long)(div_u64(bytes, unit)); 1517 } 1518 1519 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1520 u64 offset) 1521 { 1522 u64 bitmap_start; 1523 u64 bytes_per_bitmap; 1524 1525 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1526 bitmap_start = offset - ctl->start; 1527 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1528 bitmap_start *= bytes_per_bitmap; 1529 bitmap_start += ctl->start; 1530 1531 return bitmap_start; 1532 } 1533 1534 static int tree_insert_offset(struct rb_root *root, u64 offset, 1535 struct rb_node *node, int bitmap) 1536 { 1537 struct rb_node **p = &root->rb_node; 1538 struct rb_node *parent = NULL; 1539 struct btrfs_free_space *info; 1540 1541 while (*p) { 1542 parent = *p; 1543 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1544 1545 if (offset < info->offset) { 1546 p = &(*p)->rb_left; 1547 } else if (offset > info->offset) { 1548 p = &(*p)->rb_right; 1549 } else { 1550 /* 1551 * we could have a bitmap entry and an extent entry 1552 * share the same offset. If this is the case, we want 1553 * the extent entry to always be found first if we do a 1554 * linear search through the tree, since we want to have 1555 * the quickest allocation time, and allocating from an 1556 * extent is faster than allocating from a bitmap. So 1557 * if we're inserting a bitmap and we find an entry at 1558 * this offset, we want to go right, or after this entry 1559 * logically. If we are inserting an extent and we've 1560 * found a bitmap, we want to go left, or before 1561 * logically. 1562 */ 1563 if (bitmap) { 1564 if (info->bitmap) { 1565 WARN_ON_ONCE(1); 1566 return -EEXIST; 1567 } 1568 p = &(*p)->rb_right; 1569 } else { 1570 if (!info->bitmap) { 1571 WARN_ON_ONCE(1); 1572 return -EEXIST; 1573 } 1574 p = &(*p)->rb_left; 1575 } 1576 } 1577 } 1578 1579 rb_link_node(node, parent, p); 1580 rb_insert_color(node, root); 1581 1582 return 0; 1583 } 1584 1585 /* 1586 * searches the tree for the given offset. 1587 * 1588 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1589 * want a section that has at least bytes size and comes at or after the given 1590 * offset. 1591 */ 1592 static struct btrfs_free_space * 1593 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1594 u64 offset, int bitmap_only, int fuzzy) 1595 { 1596 struct rb_node *n = ctl->free_space_offset.rb_node; 1597 struct btrfs_free_space *entry, *prev = NULL; 1598 1599 /* find entry that is closest to the 'offset' */ 1600 while (1) { 1601 if (!n) { 1602 entry = NULL; 1603 break; 1604 } 1605 1606 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1607 prev = entry; 1608 1609 if (offset < entry->offset) 1610 n = n->rb_left; 1611 else if (offset > entry->offset) 1612 n = n->rb_right; 1613 else 1614 break; 1615 } 1616 1617 if (bitmap_only) { 1618 if (!entry) 1619 return NULL; 1620 if (entry->bitmap) 1621 return entry; 1622 1623 /* 1624 * bitmap entry and extent entry may share same offset, 1625 * in that case, bitmap entry comes after extent entry. 1626 */ 1627 n = rb_next(n); 1628 if (!n) 1629 return NULL; 1630 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1631 if (entry->offset != offset) 1632 return NULL; 1633 1634 WARN_ON(!entry->bitmap); 1635 return entry; 1636 } else if (entry) { 1637 if (entry->bitmap) { 1638 /* 1639 * if previous extent entry covers the offset, 1640 * we should return it instead of the bitmap entry 1641 */ 1642 n = rb_prev(&entry->offset_index); 1643 if (n) { 1644 prev = rb_entry(n, struct btrfs_free_space, 1645 offset_index); 1646 if (!prev->bitmap && 1647 prev->offset + prev->bytes > offset) 1648 entry = prev; 1649 } 1650 } 1651 return entry; 1652 } 1653 1654 if (!prev) 1655 return NULL; 1656 1657 /* find last entry before the 'offset' */ 1658 entry = prev; 1659 if (entry->offset > offset) { 1660 n = rb_prev(&entry->offset_index); 1661 if (n) { 1662 entry = rb_entry(n, struct btrfs_free_space, 1663 offset_index); 1664 ASSERT(entry->offset <= offset); 1665 } else { 1666 if (fuzzy) 1667 return entry; 1668 else 1669 return NULL; 1670 } 1671 } 1672 1673 if (entry->bitmap) { 1674 n = rb_prev(&entry->offset_index); 1675 if (n) { 1676 prev = rb_entry(n, struct btrfs_free_space, 1677 offset_index); 1678 if (!prev->bitmap && 1679 prev->offset + prev->bytes > offset) 1680 return prev; 1681 } 1682 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1683 return entry; 1684 } else if (entry->offset + entry->bytes > offset) 1685 return entry; 1686 1687 if (!fuzzy) 1688 return NULL; 1689 1690 while (1) { 1691 if (entry->bitmap) { 1692 if (entry->offset + BITS_PER_BITMAP * 1693 ctl->unit > offset) 1694 break; 1695 } else { 1696 if (entry->offset + entry->bytes > offset) 1697 break; 1698 } 1699 1700 n = rb_next(&entry->offset_index); 1701 if (!n) 1702 return NULL; 1703 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1704 } 1705 return entry; 1706 } 1707 1708 static inline void 1709 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1710 struct btrfs_free_space *info) 1711 { 1712 rb_erase(&info->offset_index, &ctl->free_space_offset); 1713 ctl->free_extents--; 1714 1715 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1716 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1717 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1718 } 1719 } 1720 1721 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1722 struct btrfs_free_space *info) 1723 { 1724 __unlink_free_space(ctl, info); 1725 ctl->free_space -= info->bytes; 1726 } 1727 1728 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1729 struct btrfs_free_space *info) 1730 { 1731 int ret = 0; 1732 1733 ASSERT(info->bytes || info->bitmap); 1734 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1735 &info->offset_index, (info->bitmap != NULL)); 1736 if (ret) 1737 return ret; 1738 1739 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1740 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1741 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1742 } 1743 1744 ctl->free_space += info->bytes; 1745 ctl->free_extents++; 1746 return ret; 1747 } 1748 1749 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1750 struct btrfs_free_space *info, 1751 u64 offset, u64 bytes) 1752 { 1753 unsigned long start, count, end; 1754 int extent_delta = -1; 1755 1756 start = offset_to_bit(info->offset, ctl->unit, offset); 1757 count = bytes_to_bits(bytes, ctl->unit); 1758 end = start + count; 1759 ASSERT(end <= BITS_PER_BITMAP); 1760 1761 bitmap_clear(info->bitmap, start, count); 1762 1763 info->bytes -= bytes; 1764 if (info->max_extent_size > ctl->unit) 1765 info->max_extent_size = 0; 1766 1767 if (start && test_bit(start - 1, info->bitmap)) 1768 extent_delta++; 1769 1770 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1771 extent_delta++; 1772 1773 info->bitmap_extents += extent_delta; 1774 if (!btrfs_free_space_trimmed(info)) { 1775 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1776 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1777 } 1778 } 1779 1780 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1781 struct btrfs_free_space *info, u64 offset, 1782 u64 bytes) 1783 { 1784 __bitmap_clear_bits(ctl, info, offset, bytes); 1785 ctl->free_space -= bytes; 1786 } 1787 1788 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1789 struct btrfs_free_space *info, u64 offset, 1790 u64 bytes) 1791 { 1792 unsigned long start, count, end; 1793 int extent_delta = 1; 1794 1795 start = offset_to_bit(info->offset, ctl->unit, offset); 1796 count = bytes_to_bits(bytes, ctl->unit); 1797 end = start + count; 1798 ASSERT(end <= BITS_PER_BITMAP); 1799 1800 bitmap_set(info->bitmap, start, count); 1801 1802 info->bytes += bytes; 1803 ctl->free_space += bytes; 1804 1805 if (start && test_bit(start - 1, info->bitmap)) 1806 extent_delta--; 1807 1808 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1809 extent_delta--; 1810 1811 info->bitmap_extents += extent_delta; 1812 if (!btrfs_free_space_trimmed(info)) { 1813 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1814 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1815 } 1816 } 1817 1818 /* 1819 * If we can not find suitable extent, we will use bytes to record 1820 * the size of the max extent. 1821 */ 1822 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1823 struct btrfs_free_space *bitmap_info, u64 *offset, 1824 u64 *bytes, bool for_alloc) 1825 { 1826 unsigned long found_bits = 0; 1827 unsigned long max_bits = 0; 1828 unsigned long bits, i; 1829 unsigned long next_zero; 1830 unsigned long extent_bits; 1831 1832 /* 1833 * Skip searching the bitmap if we don't have a contiguous section that 1834 * is large enough for this allocation. 1835 */ 1836 if (for_alloc && 1837 bitmap_info->max_extent_size && 1838 bitmap_info->max_extent_size < *bytes) { 1839 *bytes = bitmap_info->max_extent_size; 1840 return -1; 1841 } 1842 1843 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1844 max_t(u64, *offset, bitmap_info->offset)); 1845 bits = bytes_to_bits(*bytes, ctl->unit); 1846 1847 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1848 if (for_alloc && bits == 1) { 1849 found_bits = 1; 1850 break; 1851 } 1852 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1853 BITS_PER_BITMAP, i); 1854 extent_bits = next_zero - i; 1855 if (extent_bits >= bits) { 1856 found_bits = extent_bits; 1857 break; 1858 } else if (extent_bits > max_bits) { 1859 max_bits = extent_bits; 1860 } 1861 i = next_zero; 1862 } 1863 1864 if (found_bits) { 1865 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1866 *bytes = (u64)(found_bits) * ctl->unit; 1867 return 0; 1868 } 1869 1870 *bytes = (u64)(max_bits) * ctl->unit; 1871 bitmap_info->max_extent_size = *bytes; 1872 return -1; 1873 } 1874 1875 static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1876 { 1877 if (entry->bitmap) 1878 return entry->max_extent_size; 1879 return entry->bytes; 1880 } 1881 1882 /* Cache the size of the max extent in bytes */ 1883 static struct btrfs_free_space * 1884 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1885 unsigned long align, u64 *max_extent_size) 1886 { 1887 struct btrfs_free_space *entry; 1888 struct rb_node *node; 1889 u64 tmp; 1890 u64 align_off; 1891 int ret; 1892 1893 if (!ctl->free_space_offset.rb_node) 1894 goto out; 1895 1896 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1897 if (!entry) 1898 goto out; 1899 1900 for (node = &entry->offset_index; node; node = rb_next(node)) { 1901 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1902 if (entry->bytes < *bytes) { 1903 *max_extent_size = max(get_max_extent_size(entry), 1904 *max_extent_size); 1905 continue; 1906 } 1907 1908 /* make sure the space returned is big enough 1909 * to match our requested alignment 1910 */ 1911 if (*bytes >= align) { 1912 tmp = entry->offset - ctl->start + align - 1; 1913 tmp = div64_u64(tmp, align); 1914 tmp = tmp * align + ctl->start; 1915 align_off = tmp - entry->offset; 1916 } else { 1917 align_off = 0; 1918 tmp = entry->offset; 1919 } 1920 1921 if (entry->bytes < *bytes + align_off) { 1922 *max_extent_size = max(get_max_extent_size(entry), 1923 *max_extent_size); 1924 continue; 1925 } 1926 1927 if (entry->bitmap) { 1928 u64 size = *bytes; 1929 1930 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1931 if (!ret) { 1932 *offset = tmp; 1933 *bytes = size; 1934 return entry; 1935 } else { 1936 *max_extent_size = 1937 max(get_max_extent_size(entry), 1938 *max_extent_size); 1939 } 1940 continue; 1941 } 1942 1943 *offset = tmp; 1944 *bytes = entry->bytes - align_off; 1945 return entry; 1946 } 1947 out: 1948 return NULL; 1949 } 1950 1951 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1952 struct btrfs_free_space *info, u64 offset) 1953 { 1954 info->offset = offset_to_bitmap(ctl, offset); 1955 info->bytes = 0; 1956 info->bitmap_extents = 0; 1957 INIT_LIST_HEAD(&info->list); 1958 link_free_space(ctl, info); 1959 ctl->total_bitmaps++; 1960 recalculate_thresholds(ctl); 1961 } 1962 1963 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1964 struct btrfs_free_space *bitmap_info) 1965 { 1966 /* 1967 * Normally when this is called, the bitmap is completely empty. However, 1968 * if we are blowing up the free space cache for one reason or another 1969 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1970 * we may leave stats on the table. 1971 */ 1972 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1973 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1974 bitmap_info->bitmap_extents; 1975 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1976 1977 } 1978 unlink_free_space(ctl, bitmap_info); 1979 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1980 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1981 ctl->total_bitmaps--; 1982 recalculate_thresholds(ctl); 1983 } 1984 1985 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1986 struct btrfs_free_space *bitmap_info, 1987 u64 *offset, u64 *bytes) 1988 { 1989 u64 end; 1990 u64 search_start, search_bytes; 1991 int ret; 1992 1993 again: 1994 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1995 1996 /* 1997 * We need to search for bits in this bitmap. We could only cover some 1998 * of the extent in this bitmap thanks to how we add space, so we need 1999 * to search for as much as it as we can and clear that amount, and then 2000 * go searching for the next bit. 2001 */ 2002 search_start = *offset; 2003 search_bytes = ctl->unit; 2004 search_bytes = min(search_bytes, end - search_start + 1); 2005 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 2006 false); 2007 if (ret < 0 || search_start != *offset) 2008 return -EINVAL; 2009 2010 /* We may have found more bits than what we need */ 2011 search_bytes = min(search_bytes, *bytes); 2012 2013 /* Cannot clear past the end of the bitmap */ 2014 search_bytes = min(search_bytes, end - search_start + 1); 2015 2016 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2017 *offset += search_bytes; 2018 *bytes -= search_bytes; 2019 2020 if (*bytes) { 2021 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2022 if (!bitmap_info->bytes) 2023 free_bitmap(ctl, bitmap_info); 2024 2025 /* 2026 * no entry after this bitmap, but we still have bytes to 2027 * remove, so something has gone wrong. 2028 */ 2029 if (!next) 2030 return -EINVAL; 2031 2032 bitmap_info = rb_entry(next, struct btrfs_free_space, 2033 offset_index); 2034 2035 /* 2036 * if the next entry isn't a bitmap we need to return to let the 2037 * extent stuff do its work. 2038 */ 2039 if (!bitmap_info->bitmap) 2040 return -EAGAIN; 2041 2042 /* 2043 * Ok the next item is a bitmap, but it may not actually hold 2044 * the information for the rest of this free space stuff, so 2045 * look for it, and if we don't find it return so we can try 2046 * everything over again. 2047 */ 2048 search_start = *offset; 2049 search_bytes = ctl->unit; 2050 ret = search_bitmap(ctl, bitmap_info, &search_start, 2051 &search_bytes, false); 2052 if (ret < 0 || search_start != *offset) 2053 return -EAGAIN; 2054 2055 goto again; 2056 } else if (!bitmap_info->bytes) 2057 free_bitmap(ctl, bitmap_info); 2058 2059 return 0; 2060 } 2061 2062 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2063 struct btrfs_free_space *info, u64 offset, 2064 u64 bytes, enum btrfs_trim_state trim_state) 2065 { 2066 u64 bytes_to_set = 0; 2067 u64 end; 2068 2069 /* 2070 * This is a tradeoff to make bitmap trim state minimal. We mark the 2071 * whole bitmap untrimmed if at any point we add untrimmed regions. 2072 */ 2073 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2074 if (btrfs_free_space_trimmed(info)) { 2075 ctl->discardable_extents[BTRFS_STAT_CURR] += 2076 info->bitmap_extents; 2077 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2078 } 2079 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2080 } 2081 2082 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2083 2084 bytes_to_set = min(end - offset, bytes); 2085 2086 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2087 2088 /* 2089 * We set some bytes, we have no idea what the max extent size is 2090 * anymore. 2091 */ 2092 info->max_extent_size = 0; 2093 2094 return bytes_to_set; 2095 2096 } 2097 2098 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2099 struct btrfs_free_space *info) 2100 { 2101 struct btrfs_block_group *block_group = ctl->private; 2102 struct btrfs_fs_info *fs_info = block_group->fs_info; 2103 bool forced = false; 2104 2105 #ifdef CONFIG_BTRFS_DEBUG 2106 if (btrfs_should_fragment_free_space(block_group)) 2107 forced = true; 2108 #endif 2109 2110 /* This is a way to reclaim large regions from the bitmaps. */ 2111 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2112 return false; 2113 2114 /* 2115 * If we are below the extents threshold then we can add this as an 2116 * extent, and don't have to deal with the bitmap 2117 */ 2118 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2119 /* 2120 * If this block group has some small extents we don't want to 2121 * use up all of our free slots in the cache with them, we want 2122 * to reserve them to larger extents, however if we have plenty 2123 * of cache left then go ahead an dadd them, no sense in adding 2124 * the overhead of a bitmap if we don't have to. 2125 */ 2126 if (info->bytes <= fs_info->sectorsize * 8) { 2127 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2128 return false; 2129 } else { 2130 return false; 2131 } 2132 } 2133 2134 /* 2135 * The original block groups from mkfs can be really small, like 8 2136 * megabytes, so don't bother with a bitmap for those entries. However 2137 * some block groups can be smaller than what a bitmap would cover but 2138 * are still large enough that they could overflow the 32k memory limit, 2139 * so allow those block groups to still be allowed to have a bitmap 2140 * entry. 2141 */ 2142 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2143 return false; 2144 2145 return true; 2146 } 2147 2148 static const struct btrfs_free_space_op free_space_op = { 2149 .use_bitmap = use_bitmap, 2150 }; 2151 2152 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2153 struct btrfs_free_space *info) 2154 { 2155 struct btrfs_free_space *bitmap_info; 2156 struct btrfs_block_group *block_group = NULL; 2157 int added = 0; 2158 u64 bytes, offset, bytes_added; 2159 enum btrfs_trim_state trim_state; 2160 int ret; 2161 2162 bytes = info->bytes; 2163 offset = info->offset; 2164 trim_state = info->trim_state; 2165 2166 if (!ctl->op->use_bitmap(ctl, info)) 2167 return 0; 2168 2169 if (ctl->op == &free_space_op) 2170 block_group = ctl->private; 2171 again: 2172 /* 2173 * Since we link bitmaps right into the cluster we need to see if we 2174 * have a cluster here, and if so and it has our bitmap we need to add 2175 * the free space to that bitmap. 2176 */ 2177 if (block_group && !list_empty(&block_group->cluster_list)) { 2178 struct btrfs_free_cluster *cluster; 2179 struct rb_node *node; 2180 struct btrfs_free_space *entry; 2181 2182 cluster = list_entry(block_group->cluster_list.next, 2183 struct btrfs_free_cluster, 2184 block_group_list); 2185 spin_lock(&cluster->lock); 2186 node = rb_first(&cluster->root); 2187 if (!node) { 2188 spin_unlock(&cluster->lock); 2189 goto no_cluster_bitmap; 2190 } 2191 2192 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2193 if (!entry->bitmap) { 2194 spin_unlock(&cluster->lock); 2195 goto no_cluster_bitmap; 2196 } 2197 2198 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2199 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2200 bytes, trim_state); 2201 bytes -= bytes_added; 2202 offset += bytes_added; 2203 } 2204 spin_unlock(&cluster->lock); 2205 if (!bytes) { 2206 ret = 1; 2207 goto out; 2208 } 2209 } 2210 2211 no_cluster_bitmap: 2212 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2213 1, 0); 2214 if (!bitmap_info) { 2215 ASSERT(added == 0); 2216 goto new_bitmap; 2217 } 2218 2219 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2220 trim_state); 2221 bytes -= bytes_added; 2222 offset += bytes_added; 2223 added = 0; 2224 2225 if (!bytes) { 2226 ret = 1; 2227 goto out; 2228 } else 2229 goto again; 2230 2231 new_bitmap: 2232 if (info && info->bitmap) { 2233 add_new_bitmap(ctl, info, offset); 2234 added = 1; 2235 info = NULL; 2236 goto again; 2237 } else { 2238 spin_unlock(&ctl->tree_lock); 2239 2240 /* no pre-allocated info, allocate a new one */ 2241 if (!info) { 2242 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2243 GFP_NOFS); 2244 if (!info) { 2245 spin_lock(&ctl->tree_lock); 2246 ret = -ENOMEM; 2247 goto out; 2248 } 2249 } 2250 2251 /* allocate the bitmap */ 2252 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2253 GFP_NOFS); 2254 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2255 spin_lock(&ctl->tree_lock); 2256 if (!info->bitmap) { 2257 ret = -ENOMEM; 2258 goto out; 2259 } 2260 goto again; 2261 } 2262 2263 out: 2264 if (info) { 2265 if (info->bitmap) 2266 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2267 info->bitmap); 2268 kmem_cache_free(btrfs_free_space_cachep, info); 2269 } 2270 2271 return ret; 2272 } 2273 2274 /* 2275 * Free space merging rules: 2276 * 1) Merge trimmed areas together 2277 * 2) Let untrimmed areas coalesce with trimmed areas 2278 * 3) Always pull neighboring regions from bitmaps 2279 * 2280 * The above rules are for when we merge free space based on btrfs_trim_state. 2281 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2282 * same reason: to promote larger extent regions which makes life easier for 2283 * find_free_extent(). Rule 2 enables coalescing based on the common path 2284 * being returning free space from btrfs_finish_extent_commit(). So when free 2285 * space is trimmed, it will prevent aggregating trimmed new region and 2286 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2287 * and provide find_free_extent() with the largest extents possible hoping for 2288 * the reuse path. 2289 */ 2290 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2291 struct btrfs_free_space *info, bool update_stat) 2292 { 2293 struct btrfs_free_space *left_info = NULL; 2294 struct btrfs_free_space *right_info; 2295 bool merged = false; 2296 u64 offset = info->offset; 2297 u64 bytes = info->bytes; 2298 const bool is_trimmed = btrfs_free_space_trimmed(info); 2299 2300 /* 2301 * first we want to see if there is free space adjacent to the range we 2302 * are adding, if there is remove that struct and add a new one to 2303 * cover the entire range 2304 */ 2305 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2306 if (right_info && rb_prev(&right_info->offset_index)) 2307 left_info = rb_entry(rb_prev(&right_info->offset_index), 2308 struct btrfs_free_space, offset_index); 2309 else if (!right_info) 2310 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2311 2312 /* See try_merge_free_space() comment. */ 2313 if (right_info && !right_info->bitmap && 2314 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2315 if (update_stat) 2316 unlink_free_space(ctl, right_info); 2317 else 2318 __unlink_free_space(ctl, right_info); 2319 info->bytes += right_info->bytes; 2320 kmem_cache_free(btrfs_free_space_cachep, right_info); 2321 merged = true; 2322 } 2323 2324 /* See try_merge_free_space() comment. */ 2325 if (left_info && !left_info->bitmap && 2326 left_info->offset + left_info->bytes == offset && 2327 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2328 if (update_stat) 2329 unlink_free_space(ctl, left_info); 2330 else 2331 __unlink_free_space(ctl, left_info); 2332 info->offset = left_info->offset; 2333 info->bytes += left_info->bytes; 2334 kmem_cache_free(btrfs_free_space_cachep, left_info); 2335 merged = true; 2336 } 2337 2338 return merged; 2339 } 2340 2341 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2342 struct btrfs_free_space *info, 2343 bool update_stat) 2344 { 2345 struct btrfs_free_space *bitmap; 2346 unsigned long i; 2347 unsigned long j; 2348 const u64 end = info->offset + info->bytes; 2349 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2350 u64 bytes; 2351 2352 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2353 if (!bitmap) 2354 return false; 2355 2356 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2357 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2358 if (j == i) 2359 return false; 2360 bytes = (j - i) * ctl->unit; 2361 info->bytes += bytes; 2362 2363 /* See try_merge_free_space() comment. */ 2364 if (!btrfs_free_space_trimmed(bitmap)) 2365 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2366 2367 if (update_stat) 2368 bitmap_clear_bits(ctl, bitmap, end, bytes); 2369 else 2370 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2371 2372 if (!bitmap->bytes) 2373 free_bitmap(ctl, bitmap); 2374 2375 return true; 2376 } 2377 2378 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2379 struct btrfs_free_space *info, 2380 bool update_stat) 2381 { 2382 struct btrfs_free_space *bitmap; 2383 u64 bitmap_offset; 2384 unsigned long i; 2385 unsigned long j; 2386 unsigned long prev_j; 2387 u64 bytes; 2388 2389 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2390 /* If we're on a boundary, try the previous logical bitmap. */ 2391 if (bitmap_offset == info->offset) { 2392 if (info->offset == 0) 2393 return false; 2394 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2395 } 2396 2397 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2398 if (!bitmap) 2399 return false; 2400 2401 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2402 j = 0; 2403 prev_j = (unsigned long)-1; 2404 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2405 if (j > i) 2406 break; 2407 prev_j = j; 2408 } 2409 if (prev_j == i) 2410 return false; 2411 2412 if (prev_j == (unsigned long)-1) 2413 bytes = (i + 1) * ctl->unit; 2414 else 2415 bytes = (i - prev_j) * ctl->unit; 2416 2417 info->offset -= bytes; 2418 info->bytes += bytes; 2419 2420 /* See try_merge_free_space() comment. */ 2421 if (!btrfs_free_space_trimmed(bitmap)) 2422 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2423 2424 if (update_stat) 2425 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2426 else 2427 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2428 2429 if (!bitmap->bytes) 2430 free_bitmap(ctl, bitmap); 2431 2432 return true; 2433 } 2434 2435 /* 2436 * We prefer always to allocate from extent entries, both for clustered and 2437 * non-clustered allocation requests. So when attempting to add a new extent 2438 * entry, try to see if there's adjacent free space in bitmap entries, and if 2439 * there is, migrate that space from the bitmaps to the extent. 2440 * Like this we get better chances of satisfying space allocation requests 2441 * because we attempt to satisfy them based on a single cache entry, and never 2442 * on 2 or more entries - even if the entries represent a contiguous free space 2443 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2444 * ends). 2445 */ 2446 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2447 struct btrfs_free_space *info, 2448 bool update_stat) 2449 { 2450 /* 2451 * Only work with disconnected entries, as we can change their offset, 2452 * and must be extent entries. 2453 */ 2454 ASSERT(!info->bitmap); 2455 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2456 2457 if (ctl->total_bitmaps > 0) { 2458 bool stole_end; 2459 bool stole_front = false; 2460 2461 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2462 if (ctl->total_bitmaps > 0) 2463 stole_front = steal_from_bitmap_to_front(ctl, info, 2464 update_stat); 2465 2466 if (stole_end || stole_front) 2467 try_merge_free_space(ctl, info, update_stat); 2468 } 2469 } 2470 2471 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2472 struct btrfs_free_space_ctl *ctl, 2473 u64 offset, u64 bytes, 2474 enum btrfs_trim_state trim_state) 2475 { 2476 struct btrfs_block_group *block_group = ctl->private; 2477 struct btrfs_free_space *info; 2478 int ret = 0; 2479 u64 filter_bytes = bytes; 2480 2481 ASSERT(!btrfs_is_zoned(fs_info)); 2482 2483 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2484 if (!info) 2485 return -ENOMEM; 2486 2487 info->offset = offset; 2488 info->bytes = bytes; 2489 info->trim_state = trim_state; 2490 RB_CLEAR_NODE(&info->offset_index); 2491 2492 spin_lock(&ctl->tree_lock); 2493 2494 if (try_merge_free_space(ctl, info, true)) 2495 goto link; 2496 2497 /* 2498 * There was no extent directly to the left or right of this new 2499 * extent then we know we're going to have to allocate a new extent, so 2500 * before we do that see if we need to drop this into a bitmap 2501 */ 2502 ret = insert_into_bitmap(ctl, info); 2503 if (ret < 0) { 2504 goto out; 2505 } else if (ret) { 2506 ret = 0; 2507 goto out; 2508 } 2509 link: 2510 /* 2511 * Only steal free space from adjacent bitmaps if we're sure we're not 2512 * going to add the new free space to existing bitmap entries - because 2513 * that would mean unnecessary work that would be reverted. Therefore 2514 * attempt to steal space from bitmaps if we're adding an extent entry. 2515 */ 2516 steal_from_bitmap(ctl, info, true); 2517 2518 filter_bytes = max(filter_bytes, info->bytes); 2519 2520 ret = link_free_space(ctl, info); 2521 if (ret) 2522 kmem_cache_free(btrfs_free_space_cachep, info); 2523 out: 2524 btrfs_discard_update_discardable(block_group); 2525 spin_unlock(&ctl->tree_lock); 2526 2527 if (ret) { 2528 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2529 ASSERT(ret != -EEXIST); 2530 } 2531 2532 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2533 btrfs_discard_check_filter(block_group, filter_bytes); 2534 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2535 } 2536 2537 return ret; 2538 } 2539 2540 static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, 2541 u64 bytenr, u64 size, bool used) 2542 { 2543 struct btrfs_fs_info *fs_info = block_group->fs_info; 2544 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2545 u64 offset = bytenr - block_group->start; 2546 u64 to_free, to_unusable; 2547 2548 spin_lock(&ctl->tree_lock); 2549 if (!used) 2550 to_free = size; 2551 else if (offset >= block_group->alloc_offset) 2552 to_free = size; 2553 else if (offset + size <= block_group->alloc_offset) 2554 to_free = 0; 2555 else 2556 to_free = offset + size - block_group->alloc_offset; 2557 to_unusable = size - to_free; 2558 2559 ctl->free_space += to_free; 2560 /* 2561 * If the block group is read-only, we should account freed space into 2562 * bytes_readonly. 2563 */ 2564 if (!block_group->ro) 2565 block_group->zone_unusable += to_unusable; 2566 spin_unlock(&ctl->tree_lock); 2567 if (!used) { 2568 spin_lock(&block_group->lock); 2569 block_group->alloc_offset -= size; 2570 spin_unlock(&block_group->lock); 2571 } 2572 2573 /* All the region is now unusable. Mark it as unused and reclaim */ 2574 if (block_group->zone_unusable == block_group->length) { 2575 btrfs_mark_bg_unused(block_group); 2576 } else if (block_group->zone_unusable >= 2577 div_factor_fine(block_group->length, 2578 fs_info->bg_reclaim_threshold)) { 2579 btrfs_mark_bg_to_reclaim(block_group); 2580 } 2581 2582 return 0; 2583 } 2584 2585 int btrfs_add_free_space(struct btrfs_block_group *block_group, 2586 u64 bytenr, u64 size) 2587 { 2588 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2589 2590 if (btrfs_is_zoned(block_group->fs_info)) 2591 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2592 true); 2593 2594 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2595 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2596 2597 return __btrfs_add_free_space(block_group->fs_info, 2598 block_group->free_space_ctl, 2599 bytenr, size, trim_state); 2600 } 2601 2602 int btrfs_add_free_space_unused(struct btrfs_block_group *block_group, 2603 u64 bytenr, u64 size) 2604 { 2605 if (btrfs_is_zoned(block_group->fs_info)) 2606 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2607 false); 2608 2609 return btrfs_add_free_space(block_group, bytenr, size); 2610 } 2611 2612 /* 2613 * This is a subtle distinction because when adding free space back in general, 2614 * we want it to be added as untrimmed for async. But in the case where we add 2615 * it on loading of a block group, we want to consider it trimmed. 2616 */ 2617 int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2618 u64 bytenr, u64 size) 2619 { 2620 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2621 2622 if (btrfs_is_zoned(block_group->fs_info)) 2623 return __btrfs_add_free_space_zoned(block_group, bytenr, size, 2624 true); 2625 2626 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2627 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2628 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2629 2630 return __btrfs_add_free_space(block_group->fs_info, 2631 block_group->free_space_ctl, 2632 bytenr, size, trim_state); 2633 } 2634 2635 int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2636 u64 offset, u64 bytes) 2637 { 2638 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2639 struct btrfs_free_space *info; 2640 int ret; 2641 bool re_search = false; 2642 2643 if (btrfs_is_zoned(block_group->fs_info)) { 2644 /* 2645 * This can happen with conventional zones when replaying log. 2646 * Since the allocation info of tree-log nodes are not recorded 2647 * to the extent-tree, calculate_alloc_pointer() failed to 2648 * advance the allocation pointer after last allocated tree log 2649 * node blocks. 2650 * 2651 * This function is called from 2652 * btrfs_pin_extent_for_log_replay() when replaying the log. 2653 * Advance the pointer not to overwrite the tree-log nodes. 2654 */ 2655 if (block_group->alloc_offset < offset + bytes) 2656 block_group->alloc_offset = offset + bytes; 2657 return 0; 2658 } 2659 2660 spin_lock(&ctl->tree_lock); 2661 2662 again: 2663 ret = 0; 2664 if (!bytes) 2665 goto out_lock; 2666 2667 info = tree_search_offset(ctl, offset, 0, 0); 2668 if (!info) { 2669 /* 2670 * oops didn't find an extent that matched the space we wanted 2671 * to remove, look for a bitmap instead 2672 */ 2673 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2674 1, 0); 2675 if (!info) { 2676 /* 2677 * If we found a partial bit of our free space in a 2678 * bitmap but then couldn't find the other part this may 2679 * be a problem, so WARN about it. 2680 */ 2681 WARN_ON(re_search); 2682 goto out_lock; 2683 } 2684 } 2685 2686 re_search = false; 2687 if (!info->bitmap) { 2688 unlink_free_space(ctl, info); 2689 if (offset == info->offset) { 2690 u64 to_free = min(bytes, info->bytes); 2691 2692 info->bytes -= to_free; 2693 info->offset += to_free; 2694 if (info->bytes) { 2695 ret = link_free_space(ctl, info); 2696 WARN_ON(ret); 2697 } else { 2698 kmem_cache_free(btrfs_free_space_cachep, info); 2699 } 2700 2701 offset += to_free; 2702 bytes -= to_free; 2703 goto again; 2704 } else { 2705 u64 old_end = info->bytes + info->offset; 2706 2707 info->bytes = offset - info->offset; 2708 ret = link_free_space(ctl, info); 2709 WARN_ON(ret); 2710 if (ret) 2711 goto out_lock; 2712 2713 /* Not enough bytes in this entry to satisfy us */ 2714 if (old_end < offset + bytes) { 2715 bytes -= old_end - offset; 2716 offset = old_end; 2717 goto again; 2718 } else if (old_end == offset + bytes) { 2719 /* all done */ 2720 goto out_lock; 2721 } 2722 spin_unlock(&ctl->tree_lock); 2723 2724 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2725 offset + bytes, 2726 old_end - (offset + bytes), 2727 info->trim_state); 2728 WARN_ON(ret); 2729 goto out; 2730 } 2731 } 2732 2733 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2734 if (ret == -EAGAIN) { 2735 re_search = true; 2736 goto again; 2737 } 2738 out_lock: 2739 btrfs_discard_update_discardable(block_group); 2740 spin_unlock(&ctl->tree_lock); 2741 out: 2742 return ret; 2743 } 2744 2745 void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2746 u64 bytes) 2747 { 2748 struct btrfs_fs_info *fs_info = block_group->fs_info; 2749 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2750 struct btrfs_free_space *info; 2751 struct rb_node *n; 2752 int count = 0; 2753 2754 /* 2755 * Zoned btrfs does not use free space tree and cluster. Just print 2756 * out the free space after the allocation offset. 2757 */ 2758 if (btrfs_is_zoned(fs_info)) { 2759 btrfs_info(fs_info, "free space %llu", 2760 block_group->length - block_group->alloc_offset); 2761 return; 2762 } 2763 2764 spin_lock(&ctl->tree_lock); 2765 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2766 info = rb_entry(n, struct btrfs_free_space, offset_index); 2767 if (info->bytes >= bytes && !block_group->ro) 2768 count++; 2769 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2770 info->offset, info->bytes, 2771 (info->bitmap) ? "yes" : "no"); 2772 } 2773 spin_unlock(&ctl->tree_lock); 2774 btrfs_info(fs_info, "block group has cluster?: %s", 2775 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2776 btrfs_info(fs_info, 2777 "%d blocks of free space at or bigger than bytes is", count); 2778 } 2779 2780 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, 2781 struct btrfs_free_space_ctl *ctl) 2782 { 2783 struct btrfs_fs_info *fs_info = block_group->fs_info; 2784 2785 spin_lock_init(&ctl->tree_lock); 2786 ctl->unit = fs_info->sectorsize; 2787 ctl->start = block_group->start; 2788 ctl->private = block_group; 2789 ctl->op = &free_space_op; 2790 INIT_LIST_HEAD(&ctl->trimming_ranges); 2791 mutex_init(&ctl->cache_writeout_mutex); 2792 2793 /* 2794 * we only want to have 32k of ram per block group for keeping 2795 * track of free space, and if we pass 1/2 of that we want to 2796 * start converting things over to using bitmaps 2797 */ 2798 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2799 } 2800 2801 /* 2802 * for a given cluster, put all of its extents back into the free 2803 * space cache. If the block group passed doesn't match the block group 2804 * pointed to by the cluster, someone else raced in and freed the 2805 * cluster already. In that case, we just return without changing anything 2806 */ 2807 static void __btrfs_return_cluster_to_free_space( 2808 struct btrfs_block_group *block_group, 2809 struct btrfs_free_cluster *cluster) 2810 { 2811 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2812 struct btrfs_free_space *entry; 2813 struct rb_node *node; 2814 2815 spin_lock(&cluster->lock); 2816 if (cluster->block_group != block_group) { 2817 spin_unlock(&cluster->lock); 2818 return; 2819 } 2820 2821 cluster->block_group = NULL; 2822 cluster->window_start = 0; 2823 list_del_init(&cluster->block_group_list); 2824 2825 node = rb_first(&cluster->root); 2826 while (node) { 2827 bool bitmap; 2828 2829 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2830 node = rb_next(&entry->offset_index); 2831 rb_erase(&entry->offset_index, &cluster->root); 2832 RB_CLEAR_NODE(&entry->offset_index); 2833 2834 bitmap = (entry->bitmap != NULL); 2835 if (!bitmap) { 2836 /* Merging treats extents as if they were new */ 2837 if (!btrfs_free_space_trimmed(entry)) { 2838 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2839 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2840 entry->bytes; 2841 } 2842 2843 try_merge_free_space(ctl, entry, false); 2844 steal_from_bitmap(ctl, entry, false); 2845 2846 /* As we insert directly, update these statistics */ 2847 if (!btrfs_free_space_trimmed(entry)) { 2848 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2849 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2850 entry->bytes; 2851 } 2852 } 2853 tree_insert_offset(&ctl->free_space_offset, 2854 entry->offset, &entry->offset_index, bitmap); 2855 } 2856 cluster->root = RB_ROOT; 2857 spin_unlock(&cluster->lock); 2858 btrfs_put_block_group(block_group); 2859 } 2860 2861 static void __btrfs_remove_free_space_cache_locked( 2862 struct btrfs_free_space_ctl *ctl) 2863 { 2864 struct btrfs_free_space *info; 2865 struct rb_node *node; 2866 2867 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2868 info = rb_entry(node, struct btrfs_free_space, offset_index); 2869 if (!info->bitmap) { 2870 unlink_free_space(ctl, info); 2871 kmem_cache_free(btrfs_free_space_cachep, info); 2872 } else { 2873 free_bitmap(ctl, info); 2874 } 2875 2876 cond_resched_lock(&ctl->tree_lock); 2877 } 2878 } 2879 2880 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2881 { 2882 spin_lock(&ctl->tree_lock); 2883 __btrfs_remove_free_space_cache_locked(ctl); 2884 if (ctl->private) 2885 btrfs_discard_update_discardable(ctl->private); 2886 spin_unlock(&ctl->tree_lock); 2887 } 2888 2889 void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2890 { 2891 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2892 struct btrfs_free_cluster *cluster; 2893 struct list_head *head; 2894 2895 spin_lock(&ctl->tree_lock); 2896 while ((head = block_group->cluster_list.next) != 2897 &block_group->cluster_list) { 2898 cluster = list_entry(head, struct btrfs_free_cluster, 2899 block_group_list); 2900 2901 WARN_ON(cluster->block_group != block_group); 2902 __btrfs_return_cluster_to_free_space(block_group, cluster); 2903 2904 cond_resched_lock(&ctl->tree_lock); 2905 } 2906 __btrfs_remove_free_space_cache_locked(ctl); 2907 btrfs_discard_update_discardable(block_group); 2908 spin_unlock(&ctl->tree_lock); 2909 2910 } 2911 2912 /** 2913 * btrfs_is_free_space_trimmed - see if everything is trimmed 2914 * @block_group: block_group of interest 2915 * 2916 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2917 */ 2918 bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2919 { 2920 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2921 struct btrfs_free_space *info; 2922 struct rb_node *node; 2923 bool ret = true; 2924 2925 spin_lock(&ctl->tree_lock); 2926 node = rb_first(&ctl->free_space_offset); 2927 2928 while (node) { 2929 info = rb_entry(node, struct btrfs_free_space, offset_index); 2930 2931 if (!btrfs_free_space_trimmed(info)) { 2932 ret = false; 2933 break; 2934 } 2935 2936 node = rb_next(node); 2937 } 2938 2939 spin_unlock(&ctl->tree_lock); 2940 return ret; 2941 } 2942 2943 u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2944 u64 offset, u64 bytes, u64 empty_size, 2945 u64 *max_extent_size) 2946 { 2947 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2948 struct btrfs_discard_ctl *discard_ctl = 2949 &block_group->fs_info->discard_ctl; 2950 struct btrfs_free_space *entry = NULL; 2951 u64 bytes_search = bytes + empty_size; 2952 u64 ret = 0; 2953 u64 align_gap = 0; 2954 u64 align_gap_len = 0; 2955 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2956 2957 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 2958 2959 spin_lock(&ctl->tree_lock); 2960 entry = find_free_space(ctl, &offset, &bytes_search, 2961 block_group->full_stripe_len, max_extent_size); 2962 if (!entry) 2963 goto out; 2964 2965 ret = offset; 2966 if (entry->bitmap) { 2967 bitmap_clear_bits(ctl, entry, offset, bytes); 2968 2969 if (!btrfs_free_space_trimmed(entry)) 2970 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2971 2972 if (!entry->bytes) 2973 free_bitmap(ctl, entry); 2974 } else { 2975 unlink_free_space(ctl, entry); 2976 align_gap_len = offset - entry->offset; 2977 align_gap = entry->offset; 2978 align_gap_trim_state = entry->trim_state; 2979 2980 if (!btrfs_free_space_trimmed(entry)) 2981 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2982 2983 entry->offset = offset + bytes; 2984 WARN_ON(entry->bytes < bytes + align_gap_len); 2985 2986 entry->bytes -= bytes + align_gap_len; 2987 if (!entry->bytes) 2988 kmem_cache_free(btrfs_free_space_cachep, entry); 2989 else 2990 link_free_space(ctl, entry); 2991 } 2992 out: 2993 btrfs_discard_update_discardable(block_group); 2994 spin_unlock(&ctl->tree_lock); 2995 2996 if (align_gap_len) 2997 __btrfs_add_free_space(block_group->fs_info, ctl, 2998 align_gap, align_gap_len, 2999 align_gap_trim_state); 3000 return ret; 3001 } 3002 3003 /* 3004 * given a cluster, put all of its extents back into the free space 3005 * cache. If a block group is passed, this function will only free 3006 * a cluster that belongs to the passed block group. 3007 * 3008 * Otherwise, it'll get a reference on the block group pointed to by the 3009 * cluster and remove the cluster from it. 3010 */ 3011 void btrfs_return_cluster_to_free_space( 3012 struct btrfs_block_group *block_group, 3013 struct btrfs_free_cluster *cluster) 3014 { 3015 struct btrfs_free_space_ctl *ctl; 3016 3017 /* first, get a safe pointer to the block group */ 3018 spin_lock(&cluster->lock); 3019 if (!block_group) { 3020 block_group = cluster->block_group; 3021 if (!block_group) { 3022 spin_unlock(&cluster->lock); 3023 return; 3024 } 3025 } else if (cluster->block_group != block_group) { 3026 /* someone else has already freed it don't redo their work */ 3027 spin_unlock(&cluster->lock); 3028 return; 3029 } 3030 btrfs_get_block_group(block_group); 3031 spin_unlock(&cluster->lock); 3032 3033 ctl = block_group->free_space_ctl; 3034 3035 /* now return any extents the cluster had on it */ 3036 spin_lock(&ctl->tree_lock); 3037 __btrfs_return_cluster_to_free_space(block_group, cluster); 3038 spin_unlock(&ctl->tree_lock); 3039 3040 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 3041 3042 /* finally drop our ref */ 3043 btrfs_put_block_group(block_group); 3044 } 3045 3046 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 3047 struct btrfs_free_cluster *cluster, 3048 struct btrfs_free_space *entry, 3049 u64 bytes, u64 min_start, 3050 u64 *max_extent_size) 3051 { 3052 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3053 int err; 3054 u64 search_start = cluster->window_start; 3055 u64 search_bytes = bytes; 3056 u64 ret = 0; 3057 3058 search_start = min_start; 3059 search_bytes = bytes; 3060 3061 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 3062 if (err) { 3063 *max_extent_size = max(get_max_extent_size(entry), 3064 *max_extent_size); 3065 return 0; 3066 } 3067 3068 ret = search_start; 3069 __bitmap_clear_bits(ctl, entry, ret, bytes); 3070 3071 return ret; 3072 } 3073 3074 /* 3075 * given a cluster, try to allocate 'bytes' from it, returns 0 3076 * if it couldn't find anything suitably large, or a logical disk offset 3077 * if things worked out 3078 */ 3079 u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 3080 struct btrfs_free_cluster *cluster, u64 bytes, 3081 u64 min_start, u64 *max_extent_size) 3082 { 3083 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3084 struct btrfs_discard_ctl *discard_ctl = 3085 &block_group->fs_info->discard_ctl; 3086 struct btrfs_free_space *entry = NULL; 3087 struct rb_node *node; 3088 u64 ret = 0; 3089 3090 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3091 3092 spin_lock(&cluster->lock); 3093 if (bytes > cluster->max_size) 3094 goto out; 3095 3096 if (cluster->block_group != block_group) 3097 goto out; 3098 3099 node = rb_first(&cluster->root); 3100 if (!node) 3101 goto out; 3102 3103 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3104 while (1) { 3105 if (entry->bytes < bytes) 3106 *max_extent_size = max(get_max_extent_size(entry), 3107 *max_extent_size); 3108 3109 if (entry->bytes < bytes || 3110 (!entry->bitmap && entry->offset < min_start)) { 3111 node = rb_next(&entry->offset_index); 3112 if (!node) 3113 break; 3114 entry = rb_entry(node, struct btrfs_free_space, 3115 offset_index); 3116 continue; 3117 } 3118 3119 if (entry->bitmap) { 3120 ret = btrfs_alloc_from_bitmap(block_group, 3121 cluster, entry, bytes, 3122 cluster->window_start, 3123 max_extent_size); 3124 if (ret == 0) { 3125 node = rb_next(&entry->offset_index); 3126 if (!node) 3127 break; 3128 entry = rb_entry(node, struct btrfs_free_space, 3129 offset_index); 3130 continue; 3131 } 3132 cluster->window_start += bytes; 3133 } else { 3134 ret = entry->offset; 3135 3136 entry->offset += bytes; 3137 entry->bytes -= bytes; 3138 } 3139 3140 break; 3141 } 3142 out: 3143 spin_unlock(&cluster->lock); 3144 3145 if (!ret) 3146 return 0; 3147 3148 spin_lock(&ctl->tree_lock); 3149 3150 if (!btrfs_free_space_trimmed(entry)) 3151 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3152 3153 ctl->free_space -= bytes; 3154 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3155 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3156 3157 spin_lock(&cluster->lock); 3158 if (entry->bytes == 0) { 3159 rb_erase(&entry->offset_index, &cluster->root); 3160 ctl->free_extents--; 3161 if (entry->bitmap) { 3162 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3163 entry->bitmap); 3164 ctl->total_bitmaps--; 3165 recalculate_thresholds(ctl); 3166 } else if (!btrfs_free_space_trimmed(entry)) { 3167 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3168 } 3169 kmem_cache_free(btrfs_free_space_cachep, entry); 3170 } 3171 3172 spin_unlock(&cluster->lock); 3173 spin_unlock(&ctl->tree_lock); 3174 3175 return ret; 3176 } 3177 3178 static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3179 struct btrfs_free_space *entry, 3180 struct btrfs_free_cluster *cluster, 3181 u64 offset, u64 bytes, 3182 u64 cont1_bytes, u64 min_bytes) 3183 { 3184 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3185 unsigned long next_zero; 3186 unsigned long i; 3187 unsigned long want_bits; 3188 unsigned long min_bits; 3189 unsigned long found_bits; 3190 unsigned long max_bits = 0; 3191 unsigned long start = 0; 3192 unsigned long total_found = 0; 3193 int ret; 3194 3195 i = offset_to_bit(entry->offset, ctl->unit, 3196 max_t(u64, offset, entry->offset)); 3197 want_bits = bytes_to_bits(bytes, ctl->unit); 3198 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3199 3200 /* 3201 * Don't bother looking for a cluster in this bitmap if it's heavily 3202 * fragmented. 3203 */ 3204 if (entry->max_extent_size && 3205 entry->max_extent_size < cont1_bytes) 3206 return -ENOSPC; 3207 again: 3208 found_bits = 0; 3209 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3210 next_zero = find_next_zero_bit(entry->bitmap, 3211 BITS_PER_BITMAP, i); 3212 if (next_zero - i >= min_bits) { 3213 found_bits = next_zero - i; 3214 if (found_bits > max_bits) 3215 max_bits = found_bits; 3216 break; 3217 } 3218 if (next_zero - i > max_bits) 3219 max_bits = next_zero - i; 3220 i = next_zero; 3221 } 3222 3223 if (!found_bits) { 3224 entry->max_extent_size = (u64)max_bits * ctl->unit; 3225 return -ENOSPC; 3226 } 3227 3228 if (!total_found) { 3229 start = i; 3230 cluster->max_size = 0; 3231 } 3232 3233 total_found += found_bits; 3234 3235 if (cluster->max_size < found_bits * ctl->unit) 3236 cluster->max_size = found_bits * ctl->unit; 3237 3238 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3239 i = next_zero + 1; 3240 goto again; 3241 } 3242 3243 cluster->window_start = start * ctl->unit + entry->offset; 3244 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3245 ret = tree_insert_offset(&cluster->root, entry->offset, 3246 &entry->offset_index, 1); 3247 ASSERT(!ret); /* -EEXIST; Logic error */ 3248 3249 trace_btrfs_setup_cluster(block_group, cluster, 3250 total_found * ctl->unit, 1); 3251 return 0; 3252 } 3253 3254 /* 3255 * This searches the block group for just extents to fill the cluster with. 3256 * Try to find a cluster with at least bytes total bytes, at least one 3257 * extent of cont1_bytes, and other clusters of at least min_bytes. 3258 */ 3259 static noinline int 3260 setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3261 struct btrfs_free_cluster *cluster, 3262 struct list_head *bitmaps, u64 offset, u64 bytes, 3263 u64 cont1_bytes, u64 min_bytes) 3264 { 3265 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3266 struct btrfs_free_space *first = NULL; 3267 struct btrfs_free_space *entry = NULL; 3268 struct btrfs_free_space *last; 3269 struct rb_node *node; 3270 u64 window_free; 3271 u64 max_extent; 3272 u64 total_size = 0; 3273 3274 entry = tree_search_offset(ctl, offset, 0, 1); 3275 if (!entry) 3276 return -ENOSPC; 3277 3278 /* 3279 * We don't want bitmaps, so just move along until we find a normal 3280 * extent entry. 3281 */ 3282 while (entry->bitmap || entry->bytes < min_bytes) { 3283 if (entry->bitmap && list_empty(&entry->list)) 3284 list_add_tail(&entry->list, bitmaps); 3285 node = rb_next(&entry->offset_index); 3286 if (!node) 3287 return -ENOSPC; 3288 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3289 } 3290 3291 window_free = entry->bytes; 3292 max_extent = entry->bytes; 3293 first = entry; 3294 last = entry; 3295 3296 for (node = rb_next(&entry->offset_index); node; 3297 node = rb_next(&entry->offset_index)) { 3298 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3299 3300 if (entry->bitmap) { 3301 if (list_empty(&entry->list)) 3302 list_add_tail(&entry->list, bitmaps); 3303 continue; 3304 } 3305 3306 if (entry->bytes < min_bytes) 3307 continue; 3308 3309 last = entry; 3310 window_free += entry->bytes; 3311 if (entry->bytes > max_extent) 3312 max_extent = entry->bytes; 3313 } 3314 3315 if (window_free < bytes || max_extent < cont1_bytes) 3316 return -ENOSPC; 3317 3318 cluster->window_start = first->offset; 3319 3320 node = &first->offset_index; 3321 3322 /* 3323 * now we've found our entries, pull them out of the free space 3324 * cache and put them into the cluster rbtree 3325 */ 3326 do { 3327 int ret; 3328 3329 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3330 node = rb_next(&entry->offset_index); 3331 if (entry->bitmap || entry->bytes < min_bytes) 3332 continue; 3333 3334 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3335 ret = tree_insert_offset(&cluster->root, entry->offset, 3336 &entry->offset_index, 0); 3337 total_size += entry->bytes; 3338 ASSERT(!ret); /* -EEXIST; Logic error */ 3339 } while (node && entry != last); 3340 3341 cluster->max_size = max_extent; 3342 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3343 return 0; 3344 } 3345 3346 /* 3347 * This specifically looks for bitmaps that may work in the cluster, we assume 3348 * that we have already failed to find extents that will work. 3349 */ 3350 static noinline int 3351 setup_cluster_bitmap(struct btrfs_block_group *block_group, 3352 struct btrfs_free_cluster *cluster, 3353 struct list_head *bitmaps, u64 offset, u64 bytes, 3354 u64 cont1_bytes, u64 min_bytes) 3355 { 3356 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3357 struct btrfs_free_space *entry = NULL; 3358 int ret = -ENOSPC; 3359 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3360 3361 if (ctl->total_bitmaps == 0) 3362 return -ENOSPC; 3363 3364 /* 3365 * The bitmap that covers offset won't be in the list unless offset 3366 * is just its start offset. 3367 */ 3368 if (!list_empty(bitmaps)) 3369 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3370 3371 if (!entry || entry->offset != bitmap_offset) { 3372 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3373 if (entry && list_empty(&entry->list)) 3374 list_add(&entry->list, bitmaps); 3375 } 3376 3377 list_for_each_entry(entry, bitmaps, list) { 3378 if (entry->bytes < bytes) 3379 continue; 3380 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3381 bytes, cont1_bytes, min_bytes); 3382 if (!ret) 3383 return 0; 3384 } 3385 3386 /* 3387 * The bitmaps list has all the bitmaps that record free space 3388 * starting after offset, so no more search is required. 3389 */ 3390 return -ENOSPC; 3391 } 3392 3393 /* 3394 * here we try to find a cluster of blocks in a block group. The goal 3395 * is to find at least bytes+empty_size. 3396 * We might not find them all in one contiguous area. 3397 * 3398 * returns zero and sets up cluster if things worked out, otherwise 3399 * it returns -enospc 3400 */ 3401 int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3402 struct btrfs_free_cluster *cluster, 3403 u64 offset, u64 bytes, u64 empty_size) 3404 { 3405 struct btrfs_fs_info *fs_info = block_group->fs_info; 3406 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3407 struct btrfs_free_space *entry, *tmp; 3408 LIST_HEAD(bitmaps); 3409 u64 min_bytes; 3410 u64 cont1_bytes; 3411 int ret; 3412 3413 /* 3414 * Choose the minimum extent size we'll require for this 3415 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3416 * For metadata, allow allocates with smaller extents. For 3417 * data, keep it dense. 3418 */ 3419 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3420 cont1_bytes = min_bytes = bytes + empty_size; 3421 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3422 cont1_bytes = bytes; 3423 min_bytes = fs_info->sectorsize; 3424 } else { 3425 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3426 min_bytes = fs_info->sectorsize; 3427 } 3428 3429 spin_lock(&ctl->tree_lock); 3430 3431 /* 3432 * If we know we don't have enough space to make a cluster don't even 3433 * bother doing all the work to try and find one. 3434 */ 3435 if (ctl->free_space < bytes) { 3436 spin_unlock(&ctl->tree_lock); 3437 return -ENOSPC; 3438 } 3439 3440 spin_lock(&cluster->lock); 3441 3442 /* someone already found a cluster, hooray */ 3443 if (cluster->block_group) { 3444 ret = 0; 3445 goto out; 3446 } 3447 3448 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3449 min_bytes); 3450 3451 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3452 bytes + empty_size, 3453 cont1_bytes, min_bytes); 3454 if (ret) 3455 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3456 offset, bytes + empty_size, 3457 cont1_bytes, min_bytes); 3458 3459 /* Clear our temporary list */ 3460 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3461 list_del_init(&entry->list); 3462 3463 if (!ret) { 3464 btrfs_get_block_group(block_group); 3465 list_add_tail(&cluster->block_group_list, 3466 &block_group->cluster_list); 3467 cluster->block_group = block_group; 3468 } else { 3469 trace_btrfs_failed_cluster_setup(block_group); 3470 } 3471 out: 3472 spin_unlock(&cluster->lock); 3473 spin_unlock(&ctl->tree_lock); 3474 3475 return ret; 3476 } 3477 3478 /* 3479 * simple code to zero out a cluster 3480 */ 3481 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3482 { 3483 spin_lock_init(&cluster->lock); 3484 spin_lock_init(&cluster->refill_lock); 3485 cluster->root = RB_ROOT; 3486 cluster->max_size = 0; 3487 cluster->fragmented = false; 3488 INIT_LIST_HEAD(&cluster->block_group_list); 3489 cluster->block_group = NULL; 3490 } 3491 3492 static int do_trimming(struct btrfs_block_group *block_group, 3493 u64 *total_trimmed, u64 start, u64 bytes, 3494 u64 reserved_start, u64 reserved_bytes, 3495 enum btrfs_trim_state reserved_trim_state, 3496 struct btrfs_trim_range *trim_entry) 3497 { 3498 struct btrfs_space_info *space_info = block_group->space_info; 3499 struct btrfs_fs_info *fs_info = block_group->fs_info; 3500 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3501 int ret; 3502 int update = 0; 3503 const u64 end = start + bytes; 3504 const u64 reserved_end = reserved_start + reserved_bytes; 3505 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3506 u64 trimmed = 0; 3507 3508 spin_lock(&space_info->lock); 3509 spin_lock(&block_group->lock); 3510 if (!block_group->ro) { 3511 block_group->reserved += reserved_bytes; 3512 space_info->bytes_reserved += reserved_bytes; 3513 update = 1; 3514 } 3515 spin_unlock(&block_group->lock); 3516 spin_unlock(&space_info->lock); 3517 3518 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3519 if (!ret) { 3520 *total_trimmed += trimmed; 3521 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3522 } 3523 3524 mutex_lock(&ctl->cache_writeout_mutex); 3525 if (reserved_start < start) 3526 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3527 start - reserved_start, 3528 reserved_trim_state); 3529 if (start + bytes < reserved_start + reserved_bytes) 3530 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3531 reserved_trim_state); 3532 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3533 list_del(&trim_entry->list); 3534 mutex_unlock(&ctl->cache_writeout_mutex); 3535 3536 if (update) { 3537 spin_lock(&space_info->lock); 3538 spin_lock(&block_group->lock); 3539 if (block_group->ro) 3540 space_info->bytes_readonly += reserved_bytes; 3541 block_group->reserved -= reserved_bytes; 3542 space_info->bytes_reserved -= reserved_bytes; 3543 spin_unlock(&block_group->lock); 3544 spin_unlock(&space_info->lock); 3545 } 3546 3547 return ret; 3548 } 3549 3550 /* 3551 * If @async is set, then we will trim 1 region and return. 3552 */ 3553 static int trim_no_bitmap(struct btrfs_block_group *block_group, 3554 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3555 bool async) 3556 { 3557 struct btrfs_discard_ctl *discard_ctl = 3558 &block_group->fs_info->discard_ctl; 3559 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3560 struct btrfs_free_space *entry; 3561 struct rb_node *node; 3562 int ret = 0; 3563 u64 extent_start; 3564 u64 extent_bytes; 3565 enum btrfs_trim_state extent_trim_state; 3566 u64 bytes; 3567 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3568 3569 while (start < end) { 3570 struct btrfs_trim_range trim_entry; 3571 3572 mutex_lock(&ctl->cache_writeout_mutex); 3573 spin_lock(&ctl->tree_lock); 3574 3575 if (ctl->free_space < minlen) 3576 goto out_unlock; 3577 3578 entry = tree_search_offset(ctl, start, 0, 1); 3579 if (!entry) 3580 goto out_unlock; 3581 3582 /* Skip bitmaps and if async, already trimmed entries */ 3583 while (entry->bitmap || 3584 (async && btrfs_free_space_trimmed(entry))) { 3585 node = rb_next(&entry->offset_index); 3586 if (!node) 3587 goto out_unlock; 3588 entry = rb_entry(node, struct btrfs_free_space, 3589 offset_index); 3590 } 3591 3592 if (entry->offset >= end) 3593 goto out_unlock; 3594 3595 extent_start = entry->offset; 3596 extent_bytes = entry->bytes; 3597 extent_trim_state = entry->trim_state; 3598 if (async) { 3599 start = entry->offset; 3600 bytes = entry->bytes; 3601 if (bytes < minlen) { 3602 spin_unlock(&ctl->tree_lock); 3603 mutex_unlock(&ctl->cache_writeout_mutex); 3604 goto next; 3605 } 3606 unlink_free_space(ctl, entry); 3607 /* 3608 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3609 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3610 * X when we come back around. So trim it now. 3611 */ 3612 if (max_discard_size && 3613 bytes >= (max_discard_size + 3614 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3615 bytes = max_discard_size; 3616 extent_bytes = max_discard_size; 3617 entry->offset += max_discard_size; 3618 entry->bytes -= max_discard_size; 3619 link_free_space(ctl, entry); 3620 } else { 3621 kmem_cache_free(btrfs_free_space_cachep, entry); 3622 } 3623 } else { 3624 start = max(start, extent_start); 3625 bytes = min(extent_start + extent_bytes, end) - start; 3626 if (bytes < minlen) { 3627 spin_unlock(&ctl->tree_lock); 3628 mutex_unlock(&ctl->cache_writeout_mutex); 3629 goto next; 3630 } 3631 3632 unlink_free_space(ctl, entry); 3633 kmem_cache_free(btrfs_free_space_cachep, entry); 3634 } 3635 3636 spin_unlock(&ctl->tree_lock); 3637 trim_entry.start = extent_start; 3638 trim_entry.bytes = extent_bytes; 3639 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3640 mutex_unlock(&ctl->cache_writeout_mutex); 3641 3642 ret = do_trimming(block_group, total_trimmed, start, bytes, 3643 extent_start, extent_bytes, extent_trim_state, 3644 &trim_entry); 3645 if (ret) { 3646 block_group->discard_cursor = start + bytes; 3647 break; 3648 } 3649 next: 3650 start += bytes; 3651 block_group->discard_cursor = start; 3652 if (async && *total_trimmed) 3653 break; 3654 3655 if (fatal_signal_pending(current)) { 3656 ret = -ERESTARTSYS; 3657 break; 3658 } 3659 3660 cond_resched(); 3661 } 3662 3663 return ret; 3664 3665 out_unlock: 3666 block_group->discard_cursor = btrfs_block_group_end(block_group); 3667 spin_unlock(&ctl->tree_lock); 3668 mutex_unlock(&ctl->cache_writeout_mutex); 3669 3670 return ret; 3671 } 3672 3673 /* 3674 * If we break out of trimming a bitmap prematurely, we should reset the 3675 * trimming bit. In a rather contrieved case, it's possible to race here so 3676 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3677 * 3678 * start = start of bitmap 3679 * end = near end of bitmap 3680 * 3681 * Thread 1: Thread 2: 3682 * trim_bitmaps(start) 3683 * trim_bitmaps(end) 3684 * end_trimming_bitmap() 3685 * reset_trimming_bitmap() 3686 */ 3687 static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3688 { 3689 struct btrfs_free_space *entry; 3690 3691 spin_lock(&ctl->tree_lock); 3692 entry = tree_search_offset(ctl, offset, 1, 0); 3693 if (entry) { 3694 if (btrfs_free_space_trimmed(entry)) { 3695 ctl->discardable_extents[BTRFS_STAT_CURR] += 3696 entry->bitmap_extents; 3697 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3698 } 3699 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3700 } 3701 3702 spin_unlock(&ctl->tree_lock); 3703 } 3704 3705 static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3706 struct btrfs_free_space *entry) 3707 { 3708 if (btrfs_free_space_trimming_bitmap(entry)) { 3709 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3710 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3711 entry->bitmap_extents; 3712 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3713 } 3714 } 3715 3716 /* 3717 * If @async is set, then we will trim 1 region and return. 3718 */ 3719 static int trim_bitmaps(struct btrfs_block_group *block_group, 3720 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3721 u64 maxlen, bool async) 3722 { 3723 struct btrfs_discard_ctl *discard_ctl = 3724 &block_group->fs_info->discard_ctl; 3725 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3726 struct btrfs_free_space *entry; 3727 int ret = 0; 3728 int ret2; 3729 u64 bytes; 3730 u64 offset = offset_to_bitmap(ctl, start); 3731 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3732 3733 while (offset < end) { 3734 bool next_bitmap = false; 3735 struct btrfs_trim_range trim_entry; 3736 3737 mutex_lock(&ctl->cache_writeout_mutex); 3738 spin_lock(&ctl->tree_lock); 3739 3740 if (ctl->free_space < minlen) { 3741 block_group->discard_cursor = 3742 btrfs_block_group_end(block_group); 3743 spin_unlock(&ctl->tree_lock); 3744 mutex_unlock(&ctl->cache_writeout_mutex); 3745 break; 3746 } 3747 3748 entry = tree_search_offset(ctl, offset, 1, 0); 3749 /* 3750 * Bitmaps are marked trimmed lossily now to prevent constant 3751 * discarding of the same bitmap (the reason why we are bound 3752 * by the filters). So, retrim the block group bitmaps when we 3753 * are preparing to punt to the unused_bgs list. This uses 3754 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3755 * which is the only discard index which sets minlen to 0. 3756 */ 3757 if (!entry || (async && minlen && start == offset && 3758 btrfs_free_space_trimmed(entry))) { 3759 spin_unlock(&ctl->tree_lock); 3760 mutex_unlock(&ctl->cache_writeout_mutex); 3761 next_bitmap = true; 3762 goto next; 3763 } 3764 3765 /* 3766 * Async discard bitmap trimming begins at by setting the start 3767 * to be key.objectid and the offset_to_bitmap() aligns to the 3768 * start of the bitmap. This lets us know we are fully 3769 * scanning the bitmap rather than only some portion of it. 3770 */ 3771 if (start == offset) 3772 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3773 3774 bytes = minlen; 3775 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3776 if (ret2 || start >= end) { 3777 /* 3778 * We lossily consider a bitmap trimmed if we only skip 3779 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3780 */ 3781 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3782 end_trimming_bitmap(ctl, entry); 3783 else 3784 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3785 spin_unlock(&ctl->tree_lock); 3786 mutex_unlock(&ctl->cache_writeout_mutex); 3787 next_bitmap = true; 3788 goto next; 3789 } 3790 3791 /* 3792 * We already trimmed a region, but are using the locking above 3793 * to reset the trim_state. 3794 */ 3795 if (async && *total_trimmed) { 3796 spin_unlock(&ctl->tree_lock); 3797 mutex_unlock(&ctl->cache_writeout_mutex); 3798 goto out; 3799 } 3800 3801 bytes = min(bytes, end - start); 3802 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3803 spin_unlock(&ctl->tree_lock); 3804 mutex_unlock(&ctl->cache_writeout_mutex); 3805 goto next; 3806 } 3807 3808 /* 3809 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3810 * If X < @minlen, we won't trim X when we come back around. 3811 * So trim it now. We differ here from trimming extents as we 3812 * don't keep individual state per bit. 3813 */ 3814 if (async && 3815 max_discard_size && 3816 bytes > (max_discard_size + minlen)) 3817 bytes = max_discard_size; 3818 3819 bitmap_clear_bits(ctl, entry, start, bytes); 3820 if (entry->bytes == 0) 3821 free_bitmap(ctl, entry); 3822 3823 spin_unlock(&ctl->tree_lock); 3824 trim_entry.start = start; 3825 trim_entry.bytes = bytes; 3826 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3827 mutex_unlock(&ctl->cache_writeout_mutex); 3828 3829 ret = do_trimming(block_group, total_trimmed, start, bytes, 3830 start, bytes, 0, &trim_entry); 3831 if (ret) { 3832 reset_trimming_bitmap(ctl, offset); 3833 block_group->discard_cursor = 3834 btrfs_block_group_end(block_group); 3835 break; 3836 } 3837 next: 3838 if (next_bitmap) { 3839 offset += BITS_PER_BITMAP * ctl->unit; 3840 start = offset; 3841 } else { 3842 start += bytes; 3843 } 3844 block_group->discard_cursor = start; 3845 3846 if (fatal_signal_pending(current)) { 3847 if (start != offset) 3848 reset_trimming_bitmap(ctl, offset); 3849 ret = -ERESTARTSYS; 3850 break; 3851 } 3852 3853 cond_resched(); 3854 } 3855 3856 if (offset >= end) 3857 block_group->discard_cursor = end; 3858 3859 out: 3860 return ret; 3861 } 3862 3863 int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3864 u64 *trimmed, u64 start, u64 end, u64 minlen) 3865 { 3866 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3867 int ret; 3868 u64 rem = 0; 3869 3870 ASSERT(!btrfs_is_zoned(block_group->fs_info)); 3871 3872 *trimmed = 0; 3873 3874 spin_lock(&block_group->lock); 3875 if (block_group->removed) { 3876 spin_unlock(&block_group->lock); 3877 return 0; 3878 } 3879 btrfs_freeze_block_group(block_group); 3880 spin_unlock(&block_group->lock); 3881 3882 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3883 if (ret) 3884 goto out; 3885 3886 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3887 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3888 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3889 if (rem) 3890 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3891 out: 3892 btrfs_unfreeze_block_group(block_group); 3893 return ret; 3894 } 3895 3896 int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3897 u64 *trimmed, u64 start, u64 end, u64 minlen, 3898 bool async) 3899 { 3900 int ret; 3901 3902 *trimmed = 0; 3903 3904 spin_lock(&block_group->lock); 3905 if (block_group->removed) { 3906 spin_unlock(&block_group->lock); 3907 return 0; 3908 } 3909 btrfs_freeze_block_group(block_group); 3910 spin_unlock(&block_group->lock); 3911 3912 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3913 btrfs_unfreeze_block_group(block_group); 3914 3915 return ret; 3916 } 3917 3918 int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3919 u64 *trimmed, u64 start, u64 end, u64 minlen, 3920 u64 maxlen, bool async) 3921 { 3922 int ret; 3923 3924 *trimmed = 0; 3925 3926 spin_lock(&block_group->lock); 3927 if (block_group->removed) { 3928 spin_unlock(&block_group->lock); 3929 return 0; 3930 } 3931 btrfs_freeze_block_group(block_group); 3932 spin_unlock(&block_group->lock); 3933 3934 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3935 async); 3936 3937 btrfs_unfreeze_block_group(block_group); 3938 3939 return ret; 3940 } 3941 3942 bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info) 3943 { 3944 return btrfs_super_cache_generation(fs_info->super_copy); 3945 } 3946 3947 static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, 3948 struct btrfs_trans_handle *trans) 3949 { 3950 struct btrfs_block_group *block_group; 3951 struct rb_node *node; 3952 int ret; 3953 3954 btrfs_info(fs_info, "cleaning free space cache v1"); 3955 3956 node = rb_first(&fs_info->block_group_cache_tree); 3957 while (node) { 3958 block_group = rb_entry(node, struct btrfs_block_group, cache_node); 3959 ret = btrfs_remove_free_space_inode(trans, NULL, block_group); 3960 if (ret) 3961 goto out; 3962 node = rb_next(node); 3963 } 3964 out: 3965 return ret; 3966 } 3967 3968 int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active) 3969 { 3970 struct btrfs_trans_handle *trans; 3971 int ret; 3972 3973 /* 3974 * update_super_roots will appropriately set or unset 3975 * super_copy->cache_generation based on SPACE_CACHE and 3976 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a 3977 * transaction commit whether we are enabling space cache v1 and don't 3978 * have any other work to do, or are disabling it and removing free 3979 * space inodes. 3980 */ 3981 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3982 if (IS_ERR(trans)) 3983 return PTR_ERR(trans); 3984 3985 if (!active) { 3986 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3987 ret = cleanup_free_space_cache_v1(fs_info, trans); 3988 if (ret) { 3989 btrfs_abort_transaction(trans, ret); 3990 btrfs_end_transaction(trans); 3991 goto out; 3992 } 3993 } 3994 3995 ret = btrfs_commit_transaction(trans); 3996 out: 3997 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags); 3998 3999 return ret; 4000 } 4001 4002 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4003 /* 4004 * Use this if you need to make a bitmap or extent entry specifically, it 4005 * doesn't do any of the merging that add_free_space does, this acts a lot like 4006 * how the free space cache loading stuff works, so you can get really weird 4007 * configurations. 4008 */ 4009 int test_add_free_space_entry(struct btrfs_block_group *cache, 4010 u64 offset, u64 bytes, bool bitmap) 4011 { 4012 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4013 struct btrfs_free_space *info = NULL, *bitmap_info; 4014 void *map = NULL; 4015 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4016 u64 bytes_added; 4017 int ret; 4018 4019 again: 4020 if (!info) { 4021 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4022 if (!info) 4023 return -ENOMEM; 4024 } 4025 4026 if (!bitmap) { 4027 spin_lock(&ctl->tree_lock); 4028 info->offset = offset; 4029 info->bytes = bytes; 4030 info->max_extent_size = 0; 4031 ret = link_free_space(ctl, info); 4032 spin_unlock(&ctl->tree_lock); 4033 if (ret) 4034 kmem_cache_free(btrfs_free_space_cachep, info); 4035 return ret; 4036 } 4037 4038 if (!map) { 4039 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4040 if (!map) { 4041 kmem_cache_free(btrfs_free_space_cachep, info); 4042 return -ENOMEM; 4043 } 4044 } 4045 4046 spin_lock(&ctl->tree_lock); 4047 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4048 1, 0); 4049 if (!bitmap_info) { 4050 info->bitmap = map; 4051 map = NULL; 4052 add_new_bitmap(ctl, info, offset); 4053 bitmap_info = info; 4054 info = NULL; 4055 } 4056 4057 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4058 trim_state); 4059 4060 bytes -= bytes_added; 4061 offset += bytes_added; 4062 spin_unlock(&ctl->tree_lock); 4063 4064 if (bytes) 4065 goto again; 4066 4067 if (info) 4068 kmem_cache_free(btrfs_free_space_cachep, info); 4069 if (map) 4070 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4071 return 0; 4072 } 4073 4074 /* 4075 * Checks to see if the given range is in the free space cache. This is really 4076 * just used to check the absence of space, so if there is free space in the 4077 * range at all we will return 1. 4078 */ 4079 int test_check_exists(struct btrfs_block_group *cache, 4080 u64 offset, u64 bytes) 4081 { 4082 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4083 struct btrfs_free_space *info; 4084 int ret = 0; 4085 4086 spin_lock(&ctl->tree_lock); 4087 info = tree_search_offset(ctl, offset, 0, 0); 4088 if (!info) { 4089 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4090 1, 0); 4091 if (!info) 4092 goto out; 4093 } 4094 4095 have_info: 4096 if (info->bitmap) { 4097 u64 bit_off, bit_bytes; 4098 struct rb_node *n; 4099 struct btrfs_free_space *tmp; 4100 4101 bit_off = offset; 4102 bit_bytes = ctl->unit; 4103 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4104 if (!ret) { 4105 if (bit_off == offset) { 4106 ret = 1; 4107 goto out; 4108 } else if (bit_off > offset && 4109 offset + bytes > bit_off) { 4110 ret = 1; 4111 goto out; 4112 } 4113 } 4114 4115 n = rb_prev(&info->offset_index); 4116 while (n) { 4117 tmp = rb_entry(n, struct btrfs_free_space, 4118 offset_index); 4119 if (tmp->offset + tmp->bytes < offset) 4120 break; 4121 if (offset + bytes < tmp->offset) { 4122 n = rb_prev(&tmp->offset_index); 4123 continue; 4124 } 4125 info = tmp; 4126 goto have_info; 4127 } 4128 4129 n = rb_next(&info->offset_index); 4130 while (n) { 4131 tmp = rb_entry(n, struct btrfs_free_space, 4132 offset_index); 4133 if (offset + bytes < tmp->offset) 4134 break; 4135 if (tmp->offset + tmp->bytes < offset) { 4136 n = rb_next(&tmp->offset_index); 4137 continue; 4138 } 4139 info = tmp; 4140 goto have_info; 4141 } 4142 4143 ret = 0; 4144 goto out; 4145 } 4146 4147 if (info->offset == offset) { 4148 ret = 1; 4149 goto out; 4150 } 4151 4152 if (offset > info->offset && offset < info->offset + info->bytes) 4153 ret = 1; 4154 out: 4155 spin_unlock(&ctl->tree_lock); 4156 return ret; 4157 } 4158 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4159