1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 #include "volumes.h" 31 32 #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 33 #define MAX_CACHE_BYTES_PER_GIG SZ_32K 34 35 struct btrfs_trim_range { 36 u64 start; 37 u64 bytes; 38 struct list_head list; 39 }; 40 41 static int link_free_space(struct btrfs_free_space_ctl *ctl, 42 struct btrfs_free_space *info); 43 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 46 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 47 struct btrfs_path *path, 48 u64 offset) 49 { 50 struct btrfs_key key; 51 struct btrfs_key location; 52 struct btrfs_disk_key disk_key; 53 struct btrfs_free_space_header *header; 54 struct extent_buffer *leaf; 55 struct inode *inode = NULL; 56 int ret; 57 58 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 59 key.offset = offset; 60 key.type = 0; 61 62 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 63 if (ret < 0) 64 return ERR_PTR(ret); 65 if (ret > 0) { 66 btrfs_release_path(path); 67 return ERR_PTR(-ENOENT); 68 } 69 70 leaf = path->nodes[0]; 71 header = btrfs_item_ptr(leaf, path->slots[0], 72 struct btrfs_free_space_header); 73 btrfs_free_space_key(leaf, header, &disk_key); 74 btrfs_disk_key_to_cpu(&location, &disk_key); 75 btrfs_release_path(path); 76 77 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 78 if (!inode) 79 return ERR_PTR(-ENOENT); 80 if (IS_ERR(inode)) 81 return inode; 82 if (is_bad_inode(inode)) { 83 iput(inode); 84 return ERR_PTR(-ENOENT); 85 } 86 87 mapping_set_gfp_mask(inode->i_mapping, 88 mapping_gfp_constraint(inode->i_mapping, 89 ~(__GFP_FS | __GFP_HIGHMEM))); 90 91 return inode; 92 } 93 94 struct inode *lookup_free_space_inode(struct btrfs_root *root, 95 struct btrfs_block_group_cache 96 *block_group, struct btrfs_path *path) 97 { 98 struct inode *inode = NULL; 99 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 100 101 spin_lock(&block_group->lock); 102 if (block_group->inode) 103 inode = igrab(block_group->inode); 104 spin_unlock(&block_group->lock); 105 if (inode) 106 return inode; 107 108 inode = __lookup_free_space_inode(root, path, 109 block_group->key.objectid); 110 if (IS_ERR(inode)) 111 return inode; 112 113 spin_lock(&block_group->lock); 114 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 115 btrfs_info(root->fs_info, 116 "Old style space inode found, converting."); 117 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 118 BTRFS_INODE_NODATACOW; 119 block_group->disk_cache_state = BTRFS_DC_CLEAR; 120 } 121 122 if (!block_group->iref) { 123 block_group->inode = igrab(inode); 124 block_group->iref = 1; 125 } 126 spin_unlock(&block_group->lock); 127 128 return inode; 129 } 130 131 static int __create_free_space_inode(struct btrfs_root *root, 132 struct btrfs_trans_handle *trans, 133 struct btrfs_path *path, 134 u64 ino, u64 offset) 135 { 136 struct btrfs_key key; 137 struct btrfs_disk_key disk_key; 138 struct btrfs_free_space_header *header; 139 struct btrfs_inode_item *inode_item; 140 struct extent_buffer *leaf; 141 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 142 int ret; 143 144 ret = btrfs_insert_empty_inode(trans, root, path, ino); 145 if (ret) 146 return ret; 147 148 /* We inline crc's for the free disk space cache */ 149 if (ino != BTRFS_FREE_INO_OBJECTID) 150 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 151 152 leaf = path->nodes[0]; 153 inode_item = btrfs_item_ptr(leaf, path->slots[0], 154 struct btrfs_inode_item); 155 btrfs_item_key(leaf, &disk_key, path->slots[0]); 156 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 157 sizeof(*inode_item)); 158 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 159 btrfs_set_inode_size(leaf, inode_item, 0); 160 btrfs_set_inode_nbytes(leaf, inode_item, 0); 161 btrfs_set_inode_uid(leaf, inode_item, 0); 162 btrfs_set_inode_gid(leaf, inode_item, 0); 163 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 164 btrfs_set_inode_flags(leaf, inode_item, flags); 165 btrfs_set_inode_nlink(leaf, inode_item, 1); 166 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 167 btrfs_set_inode_block_group(leaf, inode_item, offset); 168 btrfs_mark_buffer_dirty(leaf); 169 btrfs_release_path(path); 170 171 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 172 key.offset = offset; 173 key.type = 0; 174 ret = btrfs_insert_empty_item(trans, root, path, &key, 175 sizeof(struct btrfs_free_space_header)); 176 if (ret < 0) { 177 btrfs_release_path(path); 178 return ret; 179 } 180 181 leaf = path->nodes[0]; 182 header = btrfs_item_ptr(leaf, path->slots[0], 183 struct btrfs_free_space_header); 184 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 185 btrfs_set_free_space_key(leaf, header, &disk_key); 186 btrfs_mark_buffer_dirty(leaf); 187 btrfs_release_path(path); 188 189 return 0; 190 } 191 192 int create_free_space_inode(struct btrfs_root *root, 193 struct btrfs_trans_handle *trans, 194 struct btrfs_block_group_cache *block_group, 195 struct btrfs_path *path) 196 { 197 int ret; 198 u64 ino; 199 200 ret = btrfs_find_free_objectid(root, &ino); 201 if (ret < 0) 202 return ret; 203 204 return __create_free_space_inode(root, trans, path, ino, 205 block_group->key.objectid); 206 } 207 208 int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, 209 struct btrfs_block_rsv *rsv) 210 { 211 u64 needed_bytes; 212 int ret; 213 214 /* 1 for slack space, 1 for updating the inode */ 215 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 216 btrfs_calc_trans_metadata_size(root, 1); 217 218 spin_lock(&rsv->lock); 219 if (rsv->reserved < needed_bytes) 220 ret = -ENOSPC; 221 else 222 ret = 0; 223 spin_unlock(&rsv->lock); 224 return ret; 225 } 226 227 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 228 struct btrfs_trans_handle *trans, 229 struct btrfs_block_group_cache *block_group, 230 struct inode *inode) 231 { 232 int ret = 0; 233 struct btrfs_path *path = btrfs_alloc_path(); 234 bool locked = false; 235 236 if (!path) { 237 ret = -ENOMEM; 238 goto fail; 239 } 240 241 if (block_group) { 242 locked = true; 243 mutex_lock(&trans->transaction->cache_write_mutex); 244 if (!list_empty(&block_group->io_list)) { 245 list_del_init(&block_group->io_list); 246 247 btrfs_wait_cache_io(root, trans, block_group, 248 &block_group->io_ctl, path, 249 block_group->key.objectid); 250 btrfs_put_block_group(block_group); 251 } 252 253 /* 254 * now that we've truncated the cache away, its no longer 255 * setup or written 256 */ 257 spin_lock(&block_group->lock); 258 block_group->disk_cache_state = BTRFS_DC_CLEAR; 259 spin_unlock(&block_group->lock); 260 } 261 btrfs_free_path(path); 262 263 btrfs_i_size_write(inode, 0); 264 truncate_pagecache(inode, 0); 265 266 /* 267 * We don't need an orphan item because truncating the free space cache 268 * will never be split across transactions. 269 * We don't need to check for -EAGAIN because we're a free space 270 * cache inode 271 */ 272 ret = btrfs_truncate_inode_items(trans, root, inode, 273 0, BTRFS_EXTENT_DATA_KEY); 274 if (ret) 275 goto fail; 276 277 ret = btrfs_update_inode(trans, root, inode); 278 279 fail: 280 if (locked) 281 mutex_unlock(&trans->transaction->cache_write_mutex); 282 if (ret) 283 btrfs_abort_transaction(trans, ret); 284 285 return ret; 286 } 287 288 static int readahead_cache(struct inode *inode) 289 { 290 struct file_ra_state *ra; 291 unsigned long last_index; 292 293 ra = kzalloc(sizeof(*ra), GFP_NOFS); 294 if (!ra) 295 return -ENOMEM; 296 297 file_ra_state_init(ra, inode->i_mapping); 298 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 299 300 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 301 302 kfree(ra); 303 304 return 0; 305 } 306 307 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 308 struct btrfs_root *root, int write) 309 { 310 int num_pages; 311 int check_crcs = 0; 312 313 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 314 315 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 316 check_crcs = 1; 317 318 /* Make sure we can fit our crcs into the first page */ 319 if (write && check_crcs && 320 (num_pages * sizeof(u32)) >= PAGE_SIZE) 321 return -ENOSPC; 322 323 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 324 325 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 326 if (!io_ctl->pages) 327 return -ENOMEM; 328 329 io_ctl->num_pages = num_pages; 330 io_ctl->root = root; 331 io_ctl->check_crcs = check_crcs; 332 io_ctl->inode = inode; 333 334 return 0; 335 } 336 337 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 338 { 339 kfree(io_ctl->pages); 340 io_ctl->pages = NULL; 341 } 342 343 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 344 { 345 if (io_ctl->cur) { 346 io_ctl->cur = NULL; 347 io_ctl->orig = NULL; 348 } 349 } 350 351 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 352 { 353 ASSERT(io_ctl->index < io_ctl->num_pages); 354 io_ctl->page = io_ctl->pages[io_ctl->index++]; 355 io_ctl->cur = page_address(io_ctl->page); 356 io_ctl->orig = io_ctl->cur; 357 io_ctl->size = PAGE_SIZE; 358 if (clear) 359 memset(io_ctl->cur, 0, PAGE_SIZE); 360 } 361 362 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 363 { 364 int i; 365 366 io_ctl_unmap_page(io_ctl); 367 368 for (i = 0; i < io_ctl->num_pages; i++) { 369 if (io_ctl->pages[i]) { 370 ClearPageChecked(io_ctl->pages[i]); 371 unlock_page(io_ctl->pages[i]); 372 put_page(io_ctl->pages[i]); 373 } 374 } 375 } 376 377 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 378 int uptodate) 379 { 380 struct page *page; 381 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 382 int i; 383 384 for (i = 0; i < io_ctl->num_pages; i++) { 385 page = find_or_create_page(inode->i_mapping, i, mask); 386 if (!page) { 387 io_ctl_drop_pages(io_ctl); 388 return -ENOMEM; 389 } 390 io_ctl->pages[i] = page; 391 if (uptodate && !PageUptodate(page)) { 392 btrfs_readpage(NULL, page); 393 lock_page(page); 394 if (!PageUptodate(page)) { 395 btrfs_err(BTRFS_I(inode)->root->fs_info, 396 "error reading free space cache"); 397 io_ctl_drop_pages(io_ctl); 398 return -EIO; 399 } 400 } 401 } 402 403 for (i = 0; i < io_ctl->num_pages; i++) { 404 clear_page_dirty_for_io(io_ctl->pages[i]); 405 set_page_extent_mapped(io_ctl->pages[i]); 406 } 407 408 return 0; 409 } 410 411 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 412 { 413 __le64 *val; 414 415 io_ctl_map_page(io_ctl, 1); 416 417 /* 418 * Skip the csum areas. If we don't check crcs then we just have a 419 * 64bit chunk at the front of the first page. 420 */ 421 if (io_ctl->check_crcs) { 422 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 423 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 424 } else { 425 io_ctl->cur += sizeof(u64); 426 io_ctl->size -= sizeof(u64) * 2; 427 } 428 429 val = io_ctl->cur; 430 *val = cpu_to_le64(generation); 431 io_ctl->cur += sizeof(u64); 432 } 433 434 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 435 { 436 __le64 *gen; 437 438 /* 439 * Skip the crc area. If we don't check crcs then we just have a 64bit 440 * chunk at the front of the first page. 441 */ 442 if (io_ctl->check_crcs) { 443 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 444 io_ctl->size -= sizeof(u64) + 445 (sizeof(u32) * io_ctl->num_pages); 446 } else { 447 io_ctl->cur += sizeof(u64); 448 io_ctl->size -= sizeof(u64) * 2; 449 } 450 451 gen = io_ctl->cur; 452 if (le64_to_cpu(*gen) != generation) { 453 btrfs_err_rl(io_ctl->root->fs_info, 454 "space cache generation (%llu) does not match inode (%llu)", 455 *gen, generation); 456 io_ctl_unmap_page(io_ctl); 457 return -EIO; 458 } 459 io_ctl->cur += sizeof(u64); 460 return 0; 461 } 462 463 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 464 { 465 u32 *tmp; 466 u32 crc = ~(u32)0; 467 unsigned offset = 0; 468 469 if (!io_ctl->check_crcs) { 470 io_ctl_unmap_page(io_ctl); 471 return; 472 } 473 474 if (index == 0) 475 offset = sizeof(u32) * io_ctl->num_pages; 476 477 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 478 PAGE_SIZE - offset); 479 btrfs_csum_final(crc, (char *)&crc); 480 io_ctl_unmap_page(io_ctl); 481 tmp = page_address(io_ctl->pages[0]); 482 tmp += index; 483 *tmp = crc; 484 } 485 486 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 487 { 488 u32 *tmp, val; 489 u32 crc = ~(u32)0; 490 unsigned offset = 0; 491 492 if (!io_ctl->check_crcs) { 493 io_ctl_map_page(io_ctl, 0); 494 return 0; 495 } 496 497 if (index == 0) 498 offset = sizeof(u32) * io_ctl->num_pages; 499 500 tmp = page_address(io_ctl->pages[0]); 501 tmp += index; 502 val = *tmp; 503 504 io_ctl_map_page(io_ctl, 0); 505 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 506 PAGE_SIZE - offset); 507 btrfs_csum_final(crc, (char *)&crc); 508 if (val != crc) { 509 btrfs_err_rl(io_ctl->root->fs_info, 510 "csum mismatch on free space cache"); 511 io_ctl_unmap_page(io_ctl); 512 return -EIO; 513 } 514 515 return 0; 516 } 517 518 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 519 void *bitmap) 520 { 521 struct btrfs_free_space_entry *entry; 522 523 if (!io_ctl->cur) 524 return -ENOSPC; 525 526 entry = io_ctl->cur; 527 entry->offset = cpu_to_le64(offset); 528 entry->bytes = cpu_to_le64(bytes); 529 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 530 BTRFS_FREE_SPACE_EXTENT; 531 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 532 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 533 534 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 535 return 0; 536 537 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 538 539 /* No more pages to map */ 540 if (io_ctl->index >= io_ctl->num_pages) 541 return 0; 542 543 /* map the next page */ 544 io_ctl_map_page(io_ctl, 1); 545 return 0; 546 } 547 548 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 549 { 550 if (!io_ctl->cur) 551 return -ENOSPC; 552 553 /* 554 * If we aren't at the start of the current page, unmap this one and 555 * map the next one if there is any left. 556 */ 557 if (io_ctl->cur != io_ctl->orig) { 558 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 559 if (io_ctl->index >= io_ctl->num_pages) 560 return -ENOSPC; 561 io_ctl_map_page(io_ctl, 0); 562 } 563 564 memcpy(io_ctl->cur, bitmap, PAGE_SIZE); 565 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 566 if (io_ctl->index < io_ctl->num_pages) 567 io_ctl_map_page(io_ctl, 0); 568 return 0; 569 } 570 571 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 572 { 573 /* 574 * If we're not on the boundary we know we've modified the page and we 575 * need to crc the page. 576 */ 577 if (io_ctl->cur != io_ctl->orig) 578 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 579 else 580 io_ctl_unmap_page(io_ctl); 581 582 while (io_ctl->index < io_ctl->num_pages) { 583 io_ctl_map_page(io_ctl, 1); 584 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 585 } 586 } 587 588 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 589 struct btrfs_free_space *entry, u8 *type) 590 { 591 struct btrfs_free_space_entry *e; 592 int ret; 593 594 if (!io_ctl->cur) { 595 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 596 if (ret) 597 return ret; 598 } 599 600 e = io_ctl->cur; 601 entry->offset = le64_to_cpu(e->offset); 602 entry->bytes = le64_to_cpu(e->bytes); 603 *type = e->type; 604 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 605 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 606 607 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 608 return 0; 609 610 io_ctl_unmap_page(io_ctl); 611 612 return 0; 613 } 614 615 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 616 struct btrfs_free_space *entry) 617 { 618 int ret; 619 620 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 621 if (ret) 622 return ret; 623 624 memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE); 625 io_ctl_unmap_page(io_ctl); 626 627 return 0; 628 } 629 630 /* 631 * Since we attach pinned extents after the fact we can have contiguous sections 632 * of free space that are split up in entries. This poses a problem with the 633 * tree logging stuff since it could have allocated across what appears to be 2 634 * entries since we would have merged the entries when adding the pinned extents 635 * back to the free space cache. So run through the space cache that we just 636 * loaded and merge contiguous entries. This will make the log replay stuff not 637 * blow up and it will make for nicer allocator behavior. 638 */ 639 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 640 { 641 struct btrfs_free_space *e, *prev = NULL; 642 struct rb_node *n; 643 644 again: 645 spin_lock(&ctl->tree_lock); 646 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 647 e = rb_entry(n, struct btrfs_free_space, offset_index); 648 if (!prev) 649 goto next; 650 if (e->bitmap || prev->bitmap) 651 goto next; 652 if (prev->offset + prev->bytes == e->offset) { 653 unlink_free_space(ctl, prev); 654 unlink_free_space(ctl, e); 655 prev->bytes += e->bytes; 656 kmem_cache_free(btrfs_free_space_cachep, e); 657 link_free_space(ctl, prev); 658 prev = NULL; 659 spin_unlock(&ctl->tree_lock); 660 goto again; 661 } 662 next: 663 prev = e; 664 } 665 spin_unlock(&ctl->tree_lock); 666 } 667 668 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 669 struct btrfs_free_space_ctl *ctl, 670 struct btrfs_path *path, u64 offset) 671 { 672 struct btrfs_free_space_header *header; 673 struct extent_buffer *leaf; 674 struct btrfs_io_ctl io_ctl; 675 struct btrfs_key key; 676 struct btrfs_free_space *e, *n; 677 LIST_HEAD(bitmaps); 678 u64 num_entries; 679 u64 num_bitmaps; 680 u64 generation; 681 u8 type; 682 int ret = 0; 683 684 /* Nothing in the space cache, goodbye */ 685 if (!i_size_read(inode)) 686 return 0; 687 688 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 689 key.offset = offset; 690 key.type = 0; 691 692 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 693 if (ret < 0) 694 return 0; 695 else if (ret > 0) { 696 btrfs_release_path(path); 697 return 0; 698 } 699 700 ret = -1; 701 702 leaf = path->nodes[0]; 703 header = btrfs_item_ptr(leaf, path->slots[0], 704 struct btrfs_free_space_header); 705 num_entries = btrfs_free_space_entries(leaf, header); 706 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 707 generation = btrfs_free_space_generation(leaf, header); 708 btrfs_release_path(path); 709 710 if (!BTRFS_I(inode)->generation) { 711 btrfs_info(root->fs_info, 712 "The free space cache file (%llu) is invalid. skip it\n", 713 offset); 714 return 0; 715 } 716 717 if (BTRFS_I(inode)->generation != generation) { 718 btrfs_err(root->fs_info, 719 "free space inode generation (%llu) did not match free space cache generation (%llu)", 720 BTRFS_I(inode)->generation, generation); 721 return 0; 722 } 723 724 if (!num_entries) 725 return 0; 726 727 ret = io_ctl_init(&io_ctl, inode, root, 0); 728 if (ret) 729 return ret; 730 731 ret = readahead_cache(inode); 732 if (ret) 733 goto out; 734 735 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 736 if (ret) 737 goto out; 738 739 ret = io_ctl_check_crc(&io_ctl, 0); 740 if (ret) 741 goto free_cache; 742 743 ret = io_ctl_check_generation(&io_ctl, generation); 744 if (ret) 745 goto free_cache; 746 747 while (num_entries) { 748 e = kmem_cache_zalloc(btrfs_free_space_cachep, 749 GFP_NOFS); 750 if (!e) 751 goto free_cache; 752 753 ret = io_ctl_read_entry(&io_ctl, e, &type); 754 if (ret) { 755 kmem_cache_free(btrfs_free_space_cachep, e); 756 goto free_cache; 757 } 758 759 if (!e->bytes) { 760 kmem_cache_free(btrfs_free_space_cachep, e); 761 goto free_cache; 762 } 763 764 if (type == BTRFS_FREE_SPACE_EXTENT) { 765 spin_lock(&ctl->tree_lock); 766 ret = link_free_space(ctl, e); 767 spin_unlock(&ctl->tree_lock); 768 if (ret) { 769 btrfs_err(root->fs_info, 770 "Duplicate entries in free space cache, dumping"); 771 kmem_cache_free(btrfs_free_space_cachep, e); 772 goto free_cache; 773 } 774 } else { 775 ASSERT(num_bitmaps); 776 num_bitmaps--; 777 e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 778 if (!e->bitmap) { 779 kmem_cache_free( 780 btrfs_free_space_cachep, e); 781 goto free_cache; 782 } 783 spin_lock(&ctl->tree_lock); 784 ret = link_free_space(ctl, e); 785 ctl->total_bitmaps++; 786 ctl->op->recalc_thresholds(ctl); 787 spin_unlock(&ctl->tree_lock); 788 if (ret) { 789 btrfs_err(root->fs_info, 790 "Duplicate entries in free space cache, dumping"); 791 kmem_cache_free(btrfs_free_space_cachep, e); 792 goto free_cache; 793 } 794 list_add_tail(&e->list, &bitmaps); 795 } 796 797 num_entries--; 798 } 799 800 io_ctl_unmap_page(&io_ctl); 801 802 /* 803 * We add the bitmaps at the end of the entries in order that 804 * the bitmap entries are added to the cache. 805 */ 806 list_for_each_entry_safe(e, n, &bitmaps, list) { 807 list_del_init(&e->list); 808 ret = io_ctl_read_bitmap(&io_ctl, e); 809 if (ret) 810 goto free_cache; 811 } 812 813 io_ctl_drop_pages(&io_ctl); 814 merge_space_tree(ctl); 815 ret = 1; 816 out: 817 io_ctl_free(&io_ctl); 818 return ret; 819 free_cache: 820 io_ctl_drop_pages(&io_ctl); 821 __btrfs_remove_free_space_cache(ctl); 822 goto out; 823 } 824 825 int load_free_space_cache(struct btrfs_fs_info *fs_info, 826 struct btrfs_block_group_cache *block_group) 827 { 828 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 829 struct btrfs_root *root = fs_info->tree_root; 830 struct inode *inode; 831 struct btrfs_path *path; 832 int ret = 0; 833 bool matched; 834 u64 used = btrfs_block_group_used(&block_group->item); 835 836 /* 837 * If this block group has been marked to be cleared for one reason or 838 * another then we can't trust the on disk cache, so just return. 839 */ 840 spin_lock(&block_group->lock); 841 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 842 spin_unlock(&block_group->lock); 843 return 0; 844 } 845 spin_unlock(&block_group->lock); 846 847 path = btrfs_alloc_path(); 848 if (!path) 849 return 0; 850 path->search_commit_root = 1; 851 path->skip_locking = 1; 852 853 inode = lookup_free_space_inode(root, block_group, path); 854 if (IS_ERR(inode)) { 855 btrfs_free_path(path); 856 return 0; 857 } 858 859 /* We may have converted the inode and made the cache invalid. */ 860 spin_lock(&block_group->lock); 861 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 862 spin_unlock(&block_group->lock); 863 btrfs_free_path(path); 864 goto out; 865 } 866 spin_unlock(&block_group->lock); 867 868 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 869 path, block_group->key.objectid); 870 btrfs_free_path(path); 871 if (ret <= 0) 872 goto out; 873 874 spin_lock(&ctl->tree_lock); 875 matched = (ctl->free_space == (block_group->key.offset - used - 876 block_group->bytes_super)); 877 spin_unlock(&ctl->tree_lock); 878 879 if (!matched) { 880 __btrfs_remove_free_space_cache(ctl); 881 btrfs_warn(fs_info, 882 "block group %llu has wrong amount of free space", 883 block_group->key.objectid); 884 ret = -1; 885 } 886 out: 887 if (ret < 0) { 888 /* This cache is bogus, make sure it gets cleared */ 889 spin_lock(&block_group->lock); 890 block_group->disk_cache_state = BTRFS_DC_CLEAR; 891 spin_unlock(&block_group->lock); 892 ret = 0; 893 894 btrfs_warn(fs_info, 895 "failed to load free space cache for block group %llu, rebuilding it now", 896 block_group->key.objectid); 897 } 898 899 iput(inode); 900 return ret; 901 } 902 903 static noinline_for_stack 904 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 905 struct btrfs_free_space_ctl *ctl, 906 struct btrfs_block_group_cache *block_group, 907 int *entries, int *bitmaps, 908 struct list_head *bitmap_list) 909 { 910 int ret; 911 struct btrfs_free_cluster *cluster = NULL; 912 struct btrfs_free_cluster *cluster_locked = NULL; 913 struct rb_node *node = rb_first(&ctl->free_space_offset); 914 struct btrfs_trim_range *trim_entry; 915 916 /* Get the cluster for this block_group if it exists */ 917 if (block_group && !list_empty(&block_group->cluster_list)) { 918 cluster = list_entry(block_group->cluster_list.next, 919 struct btrfs_free_cluster, 920 block_group_list); 921 } 922 923 if (!node && cluster) { 924 cluster_locked = cluster; 925 spin_lock(&cluster_locked->lock); 926 node = rb_first(&cluster->root); 927 cluster = NULL; 928 } 929 930 /* Write out the extent entries */ 931 while (node) { 932 struct btrfs_free_space *e; 933 934 e = rb_entry(node, struct btrfs_free_space, offset_index); 935 *entries += 1; 936 937 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 938 e->bitmap); 939 if (ret) 940 goto fail; 941 942 if (e->bitmap) { 943 list_add_tail(&e->list, bitmap_list); 944 *bitmaps += 1; 945 } 946 node = rb_next(node); 947 if (!node && cluster) { 948 node = rb_first(&cluster->root); 949 cluster_locked = cluster; 950 spin_lock(&cluster_locked->lock); 951 cluster = NULL; 952 } 953 } 954 if (cluster_locked) { 955 spin_unlock(&cluster_locked->lock); 956 cluster_locked = NULL; 957 } 958 959 /* 960 * Make sure we don't miss any range that was removed from our rbtree 961 * because trimming is running. Otherwise after a umount+mount (or crash 962 * after committing the transaction) we would leak free space and get 963 * an inconsistent free space cache report from fsck. 964 */ 965 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 966 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 967 trim_entry->bytes, NULL); 968 if (ret) 969 goto fail; 970 *entries += 1; 971 } 972 973 return 0; 974 fail: 975 if (cluster_locked) 976 spin_unlock(&cluster_locked->lock); 977 return -ENOSPC; 978 } 979 980 static noinline_for_stack int 981 update_cache_item(struct btrfs_trans_handle *trans, 982 struct btrfs_root *root, 983 struct inode *inode, 984 struct btrfs_path *path, u64 offset, 985 int entries, int bitmaps) 986 { 987 struct btrfs_key key; 988 struct btrfs_free_space_header *header; 989 struct extent_buffer *leaf; 990 int ret; 991 992 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 993 key.offset = offset; 994 key.type = 0; 995 996 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 997 if (ret < 0) { 998 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 999 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1000 GFP_NOFS); 1001 goto fail; 1002 } 1003 leaf = path->nodes[0]; 1004 if (ret > 0) { 1005 struct btrfs_key found_key; 1006 ASSERT(path->slots[0]); 1007 path->slots[0]--; 1008 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1009 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1010 found_key.offset != offset) { 1011 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1012 inode->i_size - 1, 1013 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1014 NULL, GFP_NOFS); 1015 btrfs_release_path(path); 1016 goto fail; 1017 } 1018 } 1019 1020 BTRFS_I(inode)->generation = trans->transid; 1021 header = btrfs_item_ptr(leaf, path->slots[0], 1022 struct btrfs_free_space_header); 1023 btrfs_set_free_space_entries(leaf, header, entries); 1024 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1025 btrfs_set_free_space_generation(leaf, header, trans->transid); 1026 btrfs_mark_buffer_dirty(leaf); 1027 btrfs_release_path(path); 1028 1029 return 0; 1030 1031 fail: 1032 return -1; 1033 } 1034 1035 static noinline_for_stack int 1036 write_pinned_extent_entries(struct btrfs_root *root, 1037 struct btrfs_block_group_cache *block_group, 1038 struct btrfs_io_ctl *io_ctl, 1039 int *entries) 1040 { 1041 u64 start, extent_start, extent_end, len; 1042 struct extent_io_tree *unpin = NULL; 1043 int ret; 1044 1045 if (!block_group) 1046 return 0; 1047 1048 /* 1049 * We want to add any pinned extents to our free space cache 1050 * so we don't leak the space 1051 * 1052 * We shouldn't have switched the pinned extents yet so this is the 1053 * right one 1054 */ 1055 unpin = root->fs_info->pinned_extents; 1056 1057 start = block_group->key.objectid; 1058 1059 while (start < block_group->key.objectid + block_group->key.offset) { 1060 ret = find_first_extent_bit(unpin, start, 1061 &extent_start, &extent_end, 1062 EXTENT_DIRTY, NULL); 1063 if (ret) 1064 return 0; 1065 1066 /* This pinned extent is out of our range */ 1067 if (extent_start >= block_group->key.objectid + 1068 block_group->key.offset) 1069 return 0; 1070 1071 extent_start = max(extent_start, start); 1072 extent_end = min(block_group->key.objectid + 1073 block_group->key.offset, extent_end + 1); 1074 len = extent_end - extent_start; 1075 1076 *entries += 1; 1077 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1078 if (ret) 1079 return -ENOSPC; 1080 1081 start = extent_end; 1082 } 1083 1084 return 0; 1085 } 1086 1087 static noinline_for_stack int 1088 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1089 { 1090 struct btrfs_free_space *entry, *next; 1091 int ret; 1092 1093 /* Write out the bitmaps */ 1094 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1095 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1096 if (ret) 1097 return -ENOSPC; 1098 list_del_init(&entry->list); 1099 } 1100 1101 return 0; 1102 } 1103 1104 static int flush_dirty_cache(struct inode *inode) 1105 { 1106 int ret; 1107 1108 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1109 if (ret) 1110 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1111 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1112 GFP_NOFS); 1113 1114 return ret; 1115 } 1116 1117 static void noinline_for_stack 1118 cleanup_bitmap_list(struct list_head *bitmap_list) 1119 { 1120 struct btrfs_free_space *entry, *next; 1121 1122 list_for_each_entry_safe(entry, next, bitmap_list, list) 1123 list_del_init(&entry->list); 1124 } 1125 1126 static void noinline_for_stack 1127 cleanup_write_cache_enospc(struct inode *inode, 1128 struct btrfs_io_ctl *io_ctl, 1129 struct extent_state **cached_state, 1130 struct list_head *bitmap_list) 1131 { 1132 io_ctl_drop_pages(io_ctl); 1133 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1134 i_size_read(inode) - 1, cached_state, 1135 GFP_NOFS); 1136 } 1137 1138 int btrfs_wait_cache_io(struct btrfs_root *root, 1139 struct btrfs_trans_handle *trans, 1140 struct btrfs_block_group_cache *block_group, 1141 struct btrfs_io_ctl *io_ctl, 1142 struct btrfs_path *path, u64 offset) 1143 { 1144 int ret; 1145 struct inode *inode = io_ctl->inode; 1146 1147 if (!inode) 1148 return 0; 1149 1150 if (block_group) 1151 root = root->fs_info->tree_root; 1152 1153 /* Flush the dirty pages in the cache file. */ 1154 ret = flush_dirty_cache(inode); 1155 if (ret) 1156 goto out; 1157 1158 /* Update the cache item to tell everyone this cache file is valid. */ 1159 ret = update_cache_item(trans, root, inode, path, offset, 1160 io_ctl->entries, io_ctl->bitmaps); 1161 out: 1162 io_ctl_free(io_ctl); 1163 if (ret) { 1164 invalidate_inode_pages2(inode->i_mapping); 1165 BTRFS_I(inode)->generation = 0; 1166 if (block_group) { 1167 #ifdef DEBUG 1168 btrfs_err(root->fs_info, 1169 "failed to write free space cache for block group %llu", 1170 block_group->key.objectid); 1171 #endif 1172 } 1173 } 1174 btrfs_update_inode(trans, root, inode); 1175 1176 if (block_group) { 1177 /* the dirty list is protected by the dirty_bgs_lock */ 1178 spin_lock(&trans->transaction->dirty_bgs_lock); 1179 1180 /* the disk_cache_state is protected by the block group lock */ 1181 spin_lock(&block_group->lock); 1182 1183 /* 1184 * only mark this as written if we didn't get put back on 1185 * the dirty list while waiting for IO. Otherwise our 1186 * cache state won't be right, and we won't get written again 1187 */ 1188 if (!ret && list_empty(&block_group->dirty_list)) 1189 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1190 else if (ret) 1191 block_group->disk_cache_state = BTRFS_DC_ERROR; 1192 1193 spin_unlock(&block_group->lock); 1194 spin_unlock(&trans->transaction->dirty_bgs_lock); 1195 io_ctl->inode = NULL; 1196 iput(inode); 1197 } 1198 1199 return ret; 1200 1201 } 1202 1203 /** 1204 * __btrfs_write_out_cache - write out cached info to an inode 1205 * @root - the root the inode belongs to 1206 * @ctl - the free space cache we are going to write out 1207 * @block_group - the block_group for this cache if it belongs to a block_group 1208 * @trans - the trans handle 1209 * @path - the path to use 1210 * @offset - the offset for the key we'll insert 1211 * 1212 * This function writes out a free space cache struct to disk for quick recovery 1213 * on mount. This will return 0 if it was successful in writing the cache out, 1214 * or an errno if it was not. 1215 */ 1216 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1217 struct btrfs_free_space_ctl *ctl, 1218 struct btrfs_block_group_cache *block_group, 1219 struct btrfs_io_ctl *io_ctl, 1220 struct btrfs_trans_handle *trans, 1221 struct btrfs_path *path, u64 offset) 1222 { 1223 struct extent_state *cached_state = NULL; 1224 LIST_HEAD(bitmap_list); 1225 int entries = 0; 1226 int bitmaps = 0; 1227 int ret; 1228 int must_iput = 0; 1229 1230 if (!i_size_read(inode)) 1231 return -EIO; 1232 1233 WARN_ON(io_ctl->pages); 1234 ret = io_ctl_init(io_ctl, inode, root, 1); 1235 if (ret) 1236 return ret; 1237 1238 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1239 down_write(&block_group->data_rwsem); 1240 spin_lock(&block_group->lock); 1241 if (block_group->delalloc_bytes) { 1242 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1243 spin_unlock(&block_group->lock); 1244 up_write(&block_group->data_rwsem); 1245 BTRFS_I(inode)->generation = 0; 1246 ret = 0; 1247 must_iput = 1; 1248 goto out; 1249 } 1250 spin_unlock(&block_group->lock); 1251 } 1252 1253 /* Lock all pages first so we can lock the extent safely. */ 1254 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1255 if (ret) 1256 goto out; 1257 1258 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1259 &cached_state); 1260 1261 io_ctl_set_generation(io_ctl, trans->transid); 1262 1263 mutex_lock(&ctl->cache_writeout_mutex); 1264 /* Write out the extent entries in the free space cache */ 1265 spin_lock(&ctl->tree_lock); 1266 ret = write_cache_extent_entries(io_ctl, ctl, 1267 block_group, &entries, &bitmaps, 1268 &bitmap_list); 1269 if (ret) 1270 goto out_nospc_locked; 1271 1272 /* 1273 * Some spaces that are freed in the current transaction are pinned, 1274 * they will be added into free space cache after the transaction is 1275 * committed, we shouldn't lose them. 1276 * 1277 * If this changes while we are working we'll get added back to 1278 * the dirty list and redo it. No locking needed 1279 */ 1280 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries); 1281 if (ret) 1282 goto out_nospc_locked; 1283 1284 /* 1285 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1286 * locked while doing it because a concurrent trim can be manipulating 1287 * or freeing the bitmap. 1288 */ 1289 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1290 spin_unlock(&ctl->tree_lock); 1291 mutex_unlock(&ctl->cache_writeout_mutex); 1292 if (ret) 1293 goto out_nospc; 1294 1295 /* Zero out the rest of the pages just to make sure */ 1296 io_ctl_zero_remaining_pages(io_ctl); 1297 1298 /* Everything is written out, now we dirty the pages in the file. */ 1299 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, 1300 0, i_size_read(inode), &cached_state); 1301 if (ret) 1302 goto out_nospc; 1303 1304 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1305 up_write(&block_group->data_rwsem); 1306 /* 1307 * Release the pages and unlock the extent, we will flush 1308 * them out later 1309 */ 1310 io_ctl_drop_pages(io_ctl); 1311 1312 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1313 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1314 1315 /* 1316 * at this point the pages are under IO and we're happy, 1317 * The caller is responsible for waiting on them and updating the 1318 * the cache and the inode 1319 */ 1320 io_ctl->entries = entries; 1321 io_ctl->bitmaps = bitmaps; 1322 1323 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1324 if (ret) 1325 goto out; 1326 1327 return 0; 1328 1329 out: 1330 io_ctl->inode = NULL; 1331 io_ctl_free(io_ctl); 1332 if (ret) { 1333 invalidate_inode_pages2(inode->i_mapping); 1334 BTRFS_I(inode)->generation = 0; 1335 } 1336 btrfs_update_inode(trans, root, inode); 1337 if (must_iput) 1338 iput(inode); 1339 return ret; 1340 1341 out_nospc_locked: 1342 cleanup_bitmap_list(&bitmap_list); 1343 spin_unlock(&ctl->tree_lock); 1344 mutex_unlock(&ctl->cache_writeout_mutex); 1345 1346 out_nospc: 1347 cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); 1348 1349 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1350 up_write(&block_group->data_rwsem); 1351 1352 goto out; 1353 } 1354 1355 int btrfs_write_out_cache(struct btrfs_root *root, 1356 struct btrfs_trans_handle *trans, 1357 struct btrfs_block_group_cache *block_group, 1358 struct btrfs_path *path) 1359 { 1360 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1361 struct inode *inode; 1362 int ret = 0; 1363 1364 root = root->fs_info->tree_root; 1365 1366 spin_lock(&block_group->lock); 1367 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1368 spin_unlock(&block_group->lock); 1369 return 0; 1370 } 1371 spin_unlock(&block_group->lock); 1372 1373 inode = lookup_free_space_inode(root, block_group, path); 1374 if (IS_ERR(inode)) 1375 return 0; 1376 1377 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, 1378 &block_group->io_ctl, trans, 1379 path, block_group->key.objectid); 1380 if (ret) { 1381 #ifdef DEBUG 1382 btrfs_err(root->fs_info, 1383 "failed to write free space cache for block group %llu", 1384 block_group->key.objectid); 1385 #endif 1386 spin_lock(&block_group->lock); 1387 block_group->disk_cache_state = BTRFS_DC_ERROR; 1388 spin_unlock(&block_group->lock); 1389 1390 block_group->io_ctl.inode = NULL; 1391 iput(inode); 1392 } 1393 1394 /* 1395 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1396 * to wait for IO and put the inode 1397 */ 1398 1399 return ret; 1400 } 1401 1402 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1403 u64 offset) 1404 { 1405 ASSERT(offset >= bitmap_start); 1406 offset -= bitmap_start; 1407 return (unsigned long)(div_u64(offset, unit)); 1408 } 1409 1410 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1411 { 1412 return (unsigned long)(div_u64(bytes, unit)); 1413 } 1414 1415 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1416 u64 offset) 1417 { 1418 u64 bitmap_start; 1419 u64 bytes_per_bitmap; 1420 1421 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1422 bitmap_start = offset - ctl->start; 1423 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1424 bitmap_start *= bytes_per_bitmap; 1425 bitmap_start += ctl->start; 1426 1427 return bitmap_start; 1428 } 1429 1430 static int tree_insert_offset(struct rb_root *root, u64 offset, 1431 struct rb_node *node, int bitmap) 1432 { 1433 struct rb_node **p = &root->rb_node; 1434 struct rb_node *parent = NULL; 1435 struct btrfs_free_space *info; 1436 1437 while (*p) { 1438 parent = *p; 1439 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1440 1441 if (offset < info->offset) { 1442 p = &(*p)->rb_left; 1443 } else if (offset > info->offset) { 1444 p = &(*p)->rb_right; 1445 } else { 1446 /* 1447 * we could have a bitmap entry and an extent entry 1448 * share the same offset. If this is the case, we want 1449 * the extent entry to always be found first if we do a 1450 * linear search through the tree, since we want to have 1451 * the quickest allocation time, and allocating from an 1452 * extent is faster than allocating from a bitmap. So 1453 * if we're inserting a bitmap and we find an entry at 1454 * this offset, we want to go right, or after this entry 1455 * logically. If we are inserting an extent and we've 1456 * found a bitmap, we want to go left, or before 1457 * logically. 1458 */ 1459 if (bitmap) { 1460 if (info->bitmap) { 1461 WARN_ON_ONCE(1); 1462 return -EEXIST; 1463 } 1464 p = &(*p)->rb_right; 1465 } else { 1466 if (!info->bitmap) { 1467 WARN_ON_ONCE(1); 1468 return -EEXIST; 1469 } 1470 p = &(*p)->rb_left; 1471 } 1472 } 1473 } 1474 1475 rb_link_node(node, parent, p); 1476 rb_insert_color(node, root); 1477 1478 return 0; 1479 } 1480 1481 /* 1482 * searches the tree for the given offset. 1483 * 1484 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1485 * want a section that has at least bytes size and comes at or after the given 1486 * offset. 1487 */ 1488 static struct btrfs_free_space * 1489 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1490 u64 offset, int bitmap_only, int fuzzy) 1491 { 1492 struct rb_node *n = ctl->free_space_offset.rb_node; 1493 struct btrfs_free_space *entry, *prev = NULL; 1494 1495 /* find entry that is closest to the 'offset' */ 1496 while (1) { 1497 if (!n) { 1498 entry = NULL; 1499 break; 1500 } 1501 1502 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1503 prev = entry; 1504 1505 if (offset < entry->offset) 1506 n = n->rb_left; 1507 else if (offset > entry->offset) 1508 n = n->rb_right; 1509 else 1510 break; 1511 } 1512 1513 if (bitmap_only) { 1514 if (!entry) 1515 return NULL; 1516 if (entry->bitmap) 1517 return entry; 1518 1519 /* 1520 * bitmap entry and extent entry may share same offset, 1521 * in that case, bitmap entry comes after extent entry. 1522 */ 1523 n = rb_next(n); 1524 if (!n) 1525 return NULL; 1526 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1527 if (entry->offset != offset) 1528 return NULL; 1529 1530 WARN_ON(!entry->bitmap); 1531 return entry; 1532 } else if (entry) { 1533 if (entry->bitmap) { 1534 /* 1535 * if previous extent entry covers the offset, 1536 * we should return it instead of the bitmap entry 1537 */ 1538 n = rb_prev(&entry->offset_index); 1539 if (n) { 1540 prev = rb_entry(n, struct btrfs_free_space, 1541 offset_index); 1542 if (!prev->bitmap && 1543 prev->offset + prev->bytes > offset) 1544 entry = prev; 1545 } 1546 } 1547 return entry; 1548 } 1549 1550 if (!prev) 1551 return NULL; 1552 1553 /* find last entry before the 'offset' */ 1554 entry = prev; 1555 if (entry->offset > offset) { 1556 n = rb_prev(&entry->offset_index); 1557 if (n) { 1558 entry = rb_entry(n, struct btrfs_free_space, 1559 offset_index); 1560 ASSERT(entry->offset <= offset); 1561 } else { 1562 if (fuzzy) 1563 return entry; 1564 else 1565 return NULL; 1566 } 1567 } 1568 1569 if (entry->bitmap) { 1570 n = rb_prev(&entry->offset_index); 1571 if (n) { 1572 prev = rb_entry(n, struct btrfs_free_space, 1573 offset_index); 1574 if (!prev->bitmap && 1575 prev->offset + prev->bytes > offset) 1576 return prev; 1577 } 1578 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1579 return entry; 1580 } else if (entry->offset + entry->bytes > offset) 1581 return entry; 1582 1583 if (!fuzzy) 1584 return NULL; 1585 1586 while (1) { 1587 if (entry->bitmap) { 1588 if (entry->offset + BITS_PER_BITMAP * 1589 ctl->unit > offset) 1590 break; 1591 } else { 1592 if (entry->offset + entry->bytes > offset) 1593 break; 1594 } 1595 1596 n = rb_next(&entry->offset_index); 1597 if (!n) 1598 return NULL; 1599 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1600 } 1601 return entry; 1602 } 1603 1604 static inline void 1605 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1606 struct btrfs_free_space *info) 1607 { 1608 rb_erase(&info->offset_index, &ctl->free_space_offset); 1609 ctl->free_extents--; 1610 } 1611 1612 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1613 struct btrfs_free_space *info) 1614 { 1615 __unlink_free_space(ctl, info); 1616 ctl->free_space -= info->bytes; 1617 } 1618 1619 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1620 struct btrfs_free_space *info) 1621 { 1622 int ret = 0; 1623 1624 ASSERT(info->bytes || info->bitmap); 1625 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1626 &info->offset_index, (info->bitmap != NULL)); 1627 if (ret) 1628 return ret; 1629 1630 ctl->free_space += info->bytes; 1631 ctl->free_extents++; 1632 return ret; 1633 } 1634 1635 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1636 { 1637 struct btrfs_block_group_cache *block_group = ctl->private; 1638 u64 max_bytes; 1639 u64 bitmap_bytes; 1640 u64 extent_bytes; 1641 u64 size = block_group->key.offset; 1642 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1643 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1644 1645 max_bitmaps = max_t(u64, max_bitmaps, 1); 1646 1647 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1648 1649 /* 1650 * The goal is to keep the total amount of memory used per 1gb of space 1651 * at or below 32k, so we need to adjust how much memory we allow to be 1652 * used by extent based free space tracking 1653 */ 1654 if (size < SZ_1G) 1655 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1656 else 1657 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1658 1659 /* 1660 * we want to account for 1 more bitmap than what we have so we can make 1661 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1662 * we add more bitmaps. 1663 */ 1664 bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; 1665 1666 if (bitmap_bytes >= max_bytes) { 1667 ctl->extents_thresh = 0; 1668 return; 1669 } 1670 1671 /* 1672 * we want the extent entry threshold to always be at most 1/2 the max 1673 * bytes we can have, or whatever is less than that. 1674 */ 1675 extent_bytes = max_bytes - bitmap_bytes; 1676 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1677 1678 ctl->extents_thresh = 1679 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1680 } 1681 1682 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1683 struct btrfs_free_space *info, 1684 u64 offset, u64 bytes) 1685 { 1686 unsigned long start, count; 1687 1688 start = offset_to_bit(info->offset, ctl->unit, offset); 1689 count = bytes_to_bits(bytes, ctl->unit); 1690 ASSERT(start + count <= BITS_PER_BITMAP); 1691 1692 bitmap_clear(info->bitmap, start, count); 1693 1694 info->bytes -= bytes; 1695 } 1696 1697 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1698 struct btrfs_free_space *info, u64 offset, 1699 u64 bytes) 1700 { 1701 __bitmap_clear_bits(ctl, info, offset, bytes); 1702 ctl->free_space -= bytes; 1703 } 1704 1705 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1706 struct btrfs_free_space *info, u64 offset, 1707 u64 bytes) 1708 { 1709 unsigned long start, count; 1710 1711 start = offset_to_bit(info->offset, ctl->unit, offset); 1712 count = bytes_to_bits(bytes, ctl->unit); 1713 ASSERT(start + count <= BITS_PER_BITMAP); 1714 1715 bitmap_set(info->bitmap, start, count); 1716 1717 info->bytes += bytes; 1718 ctl->free_space += bytes; 1719 } 1720 1721 /* 1722 * If we can not find suitable extent, we will use bytes to record 1723 * the size of the max extent. 1724 */ 1725 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1726 struct btrfs_free_space *bitmap_info, u64 *offset, 1727 u64 *bytes, bool for_alloc) 1728 { 1729 unsigned long found_bits = 0; 1730 unsigned long max_bits = 0; 1731 unsigned long bits, i; 1732 unsigned long next_zero; 1733 unsigned long extent_bits; 1734 1735 /* 1736 * Skip searching the bitmap if we don't have a contiguous section that 1737 * is large enough for this allocation. 1738 */ 1739 if (for_alloc && 1740 bitmap_info->max_extent_size && 1741 bitmap_info->max_extent_size < *bytes) { 1742 *bytes = bitmap_info->max_extent_size; 1743 return -1; 1744 } 1745 1746 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1747 max_t(u64, *offset, bitmap_info->offset)); 1748 bits = bytes_to_bits(*bytes, ctl->unit); 1749 1750 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1751 if (for_alloc && bits == 1) { 1752 found_bits = 1; 1753 break; 1754 } 1755 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1756 BITS_PER_BITMAP, i); 1757 extent_bits = next_zero - i; 1758 if (extent_bits >= bits) { 1759 found_bits = extent_bits; 1760 break; 1761 } else if (extent_bits > max_bits) { 1762 max_bits = extent_bits; 1763 } 1764 i = next_zero; 1765 } 1766 1767 if (found_bits) { 1768 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1769 *bytes = (u64)(found_bits) * ctl->unit; 1770 return 0; 1771 } 1772 1773 *bytes = (u64)(max_bits) * ctl->unit; 1774 bitmap_info->max_extent_size = *bytes; 1775 return -1; 1776 } 1777 1778 /* Cache the size of the max extent in bytes */ 1779 static struct btrfs_free_space * 1780 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1781 unsigned long align, u64 *max_extent_size) 1782 { 1783 struct btrfs_free_space *entry; 1784 struct rb_node *node; 1785 u64 tmp; 1786 u64 align_off; 1787 int ret; 1788 1789 if (!ctl->free_space_offset.rb_node) 1790 goto out; 1791 1792 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1793 if (!entry) 1794 goto out; 1795 1796 for (node = &entry->offset_index; node; node = rb_next(node)) { 1797 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1798 if (entry->bytes < *bytes) { 1799 if (entry->bytes > *max_extent_size) 1800 *max_extent_size = entry->bytes; 1801 continue; 1802 } 1803 1804 /* make sure the space returned is big enough 1805 * to match our requested alignment 1806 */ 1807 if (*bytes >= align) { 1808 tmp = entry->offset - ctl->start + align - 1; 1809 tmp = div64_u64(tmp, align); 1810 tmp = tmp * align + ctl->start; 1811 align_off = tmp - entry->offset; 1812 } else { 1813 align_off = 0; 1814 tmp = entry->offset; 1815 } 1816 1817 if (entry->bytes < *bytes + align_off) { 1818 if (entry->bytes > *max_extent_size) 1819 *max_extent_size = entry->bytes; 1820 continue; 1821 } 1822 1823 if (entry->bitmap) { 1824 u64 size = *bytes; 1825 1826 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1827 if (!ret) { 1828 *offset = tmp; 1829 *bytes = size; 1830 return entry; 1831 } else if (size > *max_extent_size) { 1832 *max_extent_size = size; 1833 } 1834 continue; 1835 } 1836 1837 *offset = tmp; 1838 *bytes = entry->bytes - align_off; 1839 return entry; 1840 } 1841 out: 1842 return NULL; 1843 } 1844 1845 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1846 struct btrfs_free_space *info, u64 offset) 1847 { 1848 info->offset = offset_to_bitmap(ctl, offset); 1849 info->bytes = 0; 1850 INIT_LIST_HEAD(&info->list); 1851 link_free_space(ctl, info); 1852 ctl->total_bitmaps++; 1853 1854 ctl->op->recalc_thresholds(ctl); 1855 } 1856 1857 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1858 struct btrfs_free_space *bitmap_info) 1859 { 1860 unlink_free_space(ctl, bitmap_info); 1861 kfree(bitmap_info->bitmap); 1862 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1863 ctl->total_bitmaps--; 1864 ctl->op->recalc_thresholds(ctl); 1865 } 1866 1867 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1868 struct btrfs_free_space *bitmap_info, 1869 u64 *offset, u64 *bytes) 1870 { 1871 u64 end; 1872 u64 search_start, search_bytes; 1873 int ret; 1874 1875 again: 1876 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1877 1878 /* 1879 * We need to search for bits in this bitmap. We could only cover some 1880 * of the extent in this bitmap thanks to how we add space, so we need 1881 * to search for as much as it as we can and clear that amount, and then 1882 * go searching for the next bit. 1883 */ 1884 search_start = *offset; 1885 search_bytes = ctl->unit; 1886 search_bytes = min(search_bytes, end - search_start + 1); 1887 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1888 false); 1889 if (ret < 0 || search_start != *offset) 1890 return -EINVAL; 1891 1892 /* We may have found more bits than what we need */ 1893 search_bytes = min(search_bytes, *bytes); 1894 1895 /* Cannot clear past the end of the bitmap */ 1896 search_bytes = min(search_bytes, end - search_start + 1); 1897 1898 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1899 *offset += search_bytes; 1900 *bytes -= search_bytes; 1901 1902 if (*bytes) { 1903 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1904 if (!bitmap_info->bytes) 1905 free_bitmap(ctl, bitmap_info); 1906 1907 /* 1908 * no entry after this bitmap, but we still have bytes to 1909 * remove, so something has gone wrong. 1910 */ 1911 if (!next) 1912 return -EINVAL; 1913 1914 bitmap_info = rb_entry(next, struct btrfs_free_space, 1915 offset_index); 1916 1917 /* 1918 * if the next entry isn't a bitmap we need to return to let the 1919 * extent stuff do its work. 1920 */ 1921 if (!bitmap_info->bitmap) 1922 return -EAGAIN; 1923 1924 /* 1925 * Ok the next item is a bitmap, but it may not actually hold 1926 * the information for the rest of this free space stuff, so 1927 * look for it, and if we don't find it return so we can try 1928 * everything over again. 1929 */ 1930 search_start = *offset; 1931 search_bytes = ctl->unit; 1932 ret = search_bitmap(ctl, bitmap_info, &search_start, 1933 &search_bytes, false); 1934 if (ret < 0 || search_start != *offset) 1935 return -EAGAIN; 1936 1937 goto again; 1938 } else if (!bitmap_info->bytes) 1939 free_bitmap(ctl, bitmap_info); 1940 1941 return 0; 1942 } 1943 1944 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1945 struct btrfs_free_space *info, u64 offset, 1946 u64 bytes) 1947 { 1948 u64 bytes_to_set = 0; 1949 u64 end; 1950 1951 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1952 1953 bytes_to_set = min(end - offset, bytes); 1954 1955 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1956 1957 /* 1958 * We set some bytes, we have no idea what the max extent size is 1959 * anymore. 1960 */ 1961 info->max_extent_size = 0; 1962 1963 return bytes_to_set; 1964 1965 } 1966 1967 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1968 struct btrfs_free_space *info) 1969 { 1970 struct btrfs_block_group_cache *block_group = ctl->private; 1971 bool forced = false; 1972 1973 #ifdef CONFIG_BTRFS_DEBUG 1974 if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root, 1975 block_group)) 1976 forced = true; 1977 #endif 1978 1979 /* 1980 * If we are below the extents threshold then we can add this as an 1981 * extent, and don't have to deal with the bitmap 1982 */ 1983 if (!forced && ctl->free_extents < ctl->extents_thresh) { 1984 /* 1985 * If this block group has some small extents we don't want to 1986 * use up all of our free slots in the cache with them, we want 1987 * to reserve them to larger extents, however if we have plenty 1988 * of cache left then go ahead an dadd them, no sense in adding 1989 * the overhead of a bitmap if we don't have to. 1990 */ 1991 if (info->bytes <= block_group->sectorsize * 4) { 1992 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1993 return false; 1994 } else { 1995 return false; 1996 } 1997 } 1998 1999 /* 2000 * The original block groups from mkfs can be really small, like 8 2001 * megabytes, so don't bother with a bitmap for those entries. However 2002 * some block groups can be smaller than what a bitmap would cover but 2003 * are still large enough that they could overflow the 32k memory limit, 2004 * so allow those block groups to still be allowed to have a bitmap 2005 * entry. 2006 */ 2007 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 2008 return false; 2009 2010 return true; 2011 } 2012 2013 static const struct btrfs_free_space_op free_space_op = { 2014 .recalc_thresholds = recalculate_thresholds, 2015 .use_bitmap = use_bitmap, 2016 }; 2017 2018 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2019 struct btrfs_free_space *info) 2020 { 2021 struct btrfs_free_space *bitmap_info; 2022 struct btrfs_block_group_cache *block_group = NULL; 2023 int added = 0; 2024 u64 bytes, offset, bytes_added; 2025 int ret; 2026 2027 bytes = info->bytes; 2028 offset = info->offset; 2029 2030 if (!ctl->op->use_bitmap(ctl, info)) 2031 return 0; 2032 2033 if (ctl->op == &free_space_op) 2034 block_group = ctl->private; 2035 again: 2036 /* 2037 * Since we link bitmaps right into the cluster we need to see if we 2038 * have a cluster here, and if so and it has our bitmap we need to add 2039 * the free space to that bitmap. 2040 */ 2041 if (block_group && !list_empty(&block_group->cluster_list)) { 2042 struct btrfs_free_cluster *cluster; 2043 struct rb_node *node; 2044 struct btrfs_free_space *entry; 2045 2046 cluster = list_entry(block_group->cluster_list.next, 2047 struct btrfs_free_cluster, 2048 block_group_list); 2049 spin_lock(&cluster->lock); 2050 node = rb_first(&cluster->root); 2051 if (!node) { 2052 spin_unlock(&cluster->lock); 2053 goto no_cluster_bitmap; 2054 } 2055 2056 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2057 if (!entry->bitmap) { 2058 spin_unlock(&cluster->lock); 2059 goto no_cluster_bitmap; 2060 } 2061 2062 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2063 bytes_added = add_bytes_to_bitmap(ctl, entry, 2064 offset, bytes); 2065 bytes -= bytes_added; 2066 offset += bytes_added; 2067 } 2068 spin_unlock(&cluster->lock); 2069 if (!bytes) { 2070 ret = 1; 2071 goto out; 2072 } 2073 } 2074 2075 no_cluster_bitmap: 2076 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2077 1, 0); 2078 if (!bitmap_info) { 2079 ASSERT(added == 0); 2080 goto new_bitmap; 2081 } 2082 2083 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2084 bytes -= bytes_added; 2085 offset += bytes_added; 2086 added = 0; 2087 2088 if (!bytes) { 2089 ret = 1; 2090 goto out; 2091 } else 2092 goto again; 2093 2094 new_bitmap: 2095 if (info && info->bitmap) { 2096 add_new_bitmap(ctl, info, offset); 2097 added = 1; 2098 info = NULL; 2099 goto again; 2100 } else { 2101 spin_unlock(&ctl->tree_lock); 2102 2103 /* no pre-allocated info, allocate a new one */ 2104 if (!info) { 2105 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2106 GFP_NOFS); 2107 if (!info) { 2108 spin_lock(&ctl->tree_lock); 2109 ret = -ENOMEM; 2110 goto out; 2111 } 2112 } 2113 2114 /* allocate the bitmap */ 2115 info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS); 2116 spin_lock(&ctl->tree_lock); 2117 if (!info->bitmap) { 2118 ret = -ENOMEM; 2119 goto out; 2120 } 2121 goto again; 2122 } 2123 2124 out: 2125 if (info) { 2126 if (info->bitmap) 2127 kfree(info->bitmap); 2128 kmem_cache_free(btrfs_free_space_cachep, info); 2129 } 2130 2131 return ret; 2132 } 2133 2134 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2135 struct btrfs_free_space *info, bool update_stat) 2136 { 2137 struct btrfs_free_space *left_info; 2138 struct btrfs_free_space *right_info; 2139 bool merged = false; 2140 u64 offset = info->offset; 2141 u64 bytes = info->bytes; 2142 2143 /* 2144 * first we want to see if there is free space adjacent to the range we 2145 * are adding, if there is remove that struct and add a new one to 2146 * cover the entire range 2147 */ 2148 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2149 if (right_info && rb_prev(&right_info->offset_index)) 2150 left_info = rb_entry(rb_prev(&right_info->offset_index), 2151 struct btrfs_free_space, offset_index); 2152 else 2153 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2154 2155 if (right_info && !right_info->bitmap) { 2156 if (update_stat) 2157 unlink_free_space(ctl, right_info); 2158 else 2159 __unlink_free_space(ctl, right_info); 2160 info->bytes += right_info->bytes; 2161 kmem_cache_free(btrfs_free_space_cachep, right_info); 2162 merged = true; 2163 } 2164 2165 if (left_info && !left_info->bitmap && 2166 left_info->offset + left_info->bytes == offset) { 2167 if (update_stat) 2168 unlink_free_space(ctl, left_info); 2169 else 2170 __unlink_free_space(ctl, left_info); 2171 info->offset = left_info->offset; 2172 info->bytes += left_info->bytes; 2173 kmem_cache_free(btrfs_free_space_cachep, left_info); 2174 merged = true; 2175 } 2176 2177 return merged; 2178 } 2179 2180 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2181 struct btrfs_free_space *info, 2182 bool update_stat) 2183 { 2184 struct btrfs_free_space *bitmap; 2185 unsigned long i; 2186 unsigned long j; 2187 const u64 end = info->offset + info->bytes; 2188 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2189 u64 bytes; 2190 2191 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2192 if (!bitmap) 2193 return false; 2194 2195 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2196 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2197 if (j == i) 2198 return false; 2199 bytes = (j - i) * ctl->unit; 2200 info->bytes += bytes; 2201 2202 if (update_stat) 2203 bitmap_clear_bits(ctl, bitmap, end, bytes); 2204 else 2205 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2206 2207 if (!bitmap->bytes) 2208 free_bitmap(ctl, bitmap); 2209 2210 return true; 2211 } 2212 2213 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2214 struct btrfs_free_space *info, 2215 bool update_stat) 2216 { 2217 struct btrfs_free_space *bitmap; 2218 u64 bitmap_offset; 2219 unsigned long i; 2220 unsigned long j; 2221 unsigned long prev_j; 2222 u64 bytes; 2223 2224 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2225 /* If we're on a boundary, try the previous logical bitmap. */ 2226 if (bitmap_offset == info->offset) { 2227 if (info->offset == 0) 2228 return false; 2229 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2230 } 2231 2232 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2233 if (!bitmap) 2234 return false; 2235 2236 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2237 j = 0; 2238 prev_j = (unsigned long)-1; 2239 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2240 if (j > i) 2241 break; 2242 prev_j = j; 2243 } 2244 if (prev_j == i) 2245 return false; 2246 2247 if (prev_j == (unsigned long)-1) 2248 bytes = (i + 1) * ctl->unit; 2249 else 2250 bytes = (i - prev_j) * ctl->unit; 2251 2252 info->offset -= bytes; 2253 info->bytes += bytes; 2254 2255 if (update_stat) 2256 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2257 else 2258 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2259 2260 if (!bitmap->bytes) 2261 free_bitmap(ctl, bitmap); 2262 2263 return true; 2264 } 2265 2266 /* 2267 * We prefer always to allocate from extent entries, both for clustered and 2268 * non-clustered allocation requests. So when attempting to add a new extent 2269 * entry, try to see if there's adjacent free space in bitmap entries, and if 2270 * there is, migrate that space from the bitmaps to the extent. 2271 * Like this we get better chances of satisfying space allocation requests 2272 * because we attempt to satisfy them based on a single cache entry, and never 2273 * on 2 or more entries - even if the entries represent a contiguous free space 2274 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2275 * ends). 2276 */ 2277 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2278 struct btrfs_free_space *info, 2279 bool update_stat) 2280 { 2281 /* 2282 * Only work with disconnected entries, as we can change their offset, 2283 * and must be extent entries. 2284 */ 2285 ASSERT(!info->bitmap); 2286 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2287 2288 if (ctl->total_bitmaps > 0) { 2289 bool stole_end; 2290 bool stole_front = false; 2291 2292 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2293 if (ctl->total_bitmaps > 0) 2294 stole_front = steal_from_bitmap_to_front(ctl, info, 2295 update_stat); 2296 2297 if (stole_end || stole_front) 2298 try_merge_free_space(ctl, info, update_stat); 2299 } 2300 } 2301 2302 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2303 struct btrfs_free_space_ctl *ctl, 2304 u64 offset, u64 bytes) 2305 { 2306 struct btrfs_free_space *info; 2307 int ret = 0; 2308 2309 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2310 if (!info) 2311 return -ENOMEM; 2312 2313 info->offset = offset; 2314 info->bytes = bytes; 2315 RB_CLEAR_NODE(&info->offset_index); 2316 2317 spin_lock(&ctl->tree_lock); 2318 2319 if (try_merge_free_space(ctl, info, true)) 2320 goto link; 2321 2322 /* 2323 * There was no extent directly to the left or right of this new 2324 * extent then we know we're going to have to allocate a new extent, so 2325 * before we do that see if we need to drop this into a bitmap 2326 */ 2327 ret = insert_into_bitmap(ctl, info); 2328 if (ret < 0) { 2329 goto out; 2330 } else if (ret) { 2331 ret = 0; 2332 goto out; 2333 } 2334 link: 2335 /* 2336 * Only steal free space from adjacent bitmaps if we're sure we're not 2337 * going to add the new free space to existing bitmap entries - because 2338 * that would mean unnecessary work that would be reverted. Therefore 2339 * attempt to steal space from bitmaps if we're adding an extent entry. 2340 */ 2341 steal_from_bitmap(ctl, info, true); 2342 2343 ret = link_free_space(ctl, info); 2344 if (ret) 2345 kmem_cache_free(btrfs_free_space_cachep, info); 2346 out: 2347 spin_unlock(&ctl->tree_lock); 2348 2349 if (ret) { 2350 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2351 ASSERT(ret != -EEXIST); 2352 } 2353 2354 return ret; 2355 } 2356 2357 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2358 u64 offset, u64 bytes) 2359 { 2360 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2361 struct btrfs_free_space *info; 2362 int ret; 2363 bool re_search = false; 2364 2365 spin_lock(&ctl->tree_lock); 2366 2367 again: 2368 ret = 0; 2369 if (!bytes) 2370 goto out_lock; 2371 2372 info = tree_search_offset(ctl, offset, 0, 0); 2373 if (!info) { 2374 /* 2375 * oops didn't find an extent that matched the space we wanted 2376 * to remove, look for a bitmap instead 2377 */ 2378 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2379 1, 0); 2380 if (!info) { 2381 /* 2382 * If we found a partial bit of our free space in a 2383 * bitmap but then couldn't find the other part this may 2384 * be a problem, so WARN about it. 2385 */ 2386 WARN_ON(re_search); 2387 goto out_lock; 2388 } 2389 } 2390 2391 re_search = false; 2392 if (!info->bitmap) { 2393 unlink_free_space(ctl, info); 2394 if (offset == info->offset) { 2395 u64 to_free = min(bytes, info->bytes); 2396 2397 info->bytes -= to_free; 2398 info->offset += to_free; 2399 if (info->bytes) { 2400 ret = link_free_space(ctl, info); 2401 WARN_ON(ret); 2402 } else { 2403 kmem_cache_free(btrfs_free_space_cachep, info); 2404 } 2405 2406 offset += to_free; 2407 bytes -= to_free; 2408 goto again; 2409 } else { 2410 u64 old_end = info->bytes + info->offset; 2411 2412 info->bytes = offset - info->offset; 2413 ret = link_free_space(ctl, info); 2414 WARN_ON(ret); 2415 if (ret) 2416 goto out_lock; 2417 2418 /* Not enough bytes in this entry to satisfy us */ 2419 if (old_end < offset + bytes) { 2420 bytes -= old_end - offset; 2421 offset = old_end; 2422 goto again; 2423 } else if (old_end == offset + bytes) { 2424 /* all done */ 2425 goto out_lock; 2426 } 2427 spin_unlock(&ctl->tree_lock); 2428 2429 ret = btrfs_add_free_space(block_group, offset + bytes, 2430 old_end - (offset + bytes)); 2431 WARN_ON(ret); 2432 goto out; 2433 } 2434 } 2435 2436 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2437 if (ret == -EAGAIN) { 2438 re_search = true; 2439 goto again; 2440 } 2441 out_lock: 2442 spin_unlock(&ctl->tree_lock); 2443 out: 2444 return ret; 2445 } 2446 2447 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2448 u64 bytes) 2449 { 2450 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2451 struct btrfs_free_space *info; 2452 struct rb_node *n; 2453 int count = 0; 2454 2455 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2456 info = rb_entry(n, struct btrfs_free_space, offset_index); 2457 if (info->bytes >= bytes && !block_group->ro) 2458 count++; 2459 btrfs_crit(block_group->fs_info, 2460 "entry offset %llu, bytes %llu, bitmap %s", 2461 info->offset, info->bytes, 2462 (info->bitmap) ? "yes" : "no"); 2463 } 2464 btrfs_info(block_group->fs_info, "block group has cluster?: %s", 2465 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2466 btrfs_info(block_group->fs_info, 2467 "%d blocks of free space at or bigger than bytes is", count); 2468 } 2469 2470 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2471 { 2472 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2473 2474 spin_lock_init(&ctl->tree_lock); 2475 ctl->unit = block_group->sectorsize; 2476 ctl->start = block_group->key.objectid; 2477 ctl->private = block_group; 2478 ctl->op = &free_space_op; 2479 INIT_LIST_HEAD(&ctl->trimming_ranges); 2480 mutex_init(&ctl->cache_writeout_mutex); 2481 2482 /* 2483 * we only want to have 32k of ram per block group for keeping 2484 * track of free space, and if we pass 1/2 of that we want to 2485 * start converting things over to using bitmaps 2486 */ 2487 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2488 } 2489 2490 /* 2491 * for a given cluster, put all of its extents back into the free 2492 * space cache. If the block group passed doesn't match the block group 2493 * pointed to by the cluster, someone else raced in and freed the 2494 * cluster already. In that case, we just return without changing anything 2495 */ 2496 static int 2497 __btrfs_return_cluster_to_free_space( 2498 struct btrfs_block_group_cache *block_group, 2499 struct btrfs_free_cluster *cluster) 2500 { 2501 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2502 struct btrfs_free_space *entry; 2503 struct rb_node *node; 2504 2505 spin_lock(&cluster->lock); 2506 if (cluster->block_group != block_group) 2507 goto out; 2508 2509 cluster->block_group = NULL; 2510 cluster->window_start = 0; 2511 list_del_init(&cluster->block_group_list); 2512 2513 node = rb_first(&cluster->root); 2514 while (node) { 2515 bool bitmap; 2516 2517 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2518 node = rb_next(&entry->offset_index); 2519 rb_erase(&entry->offset_index, &cluster->root); 2520 RB_CLEAR_NODE(&entry->offset_index); 2521 2522 bitmap = (entry->bitmap != NULL); 2523 if (!bitmap) { 2524 try_merge_free_space(ctl, entry, false); 2525 steal_from_bitmap(ctl, entry, false); 2526 } 2527 tree_insert_offset(&ctl->free_space_offset, 2528 entry->offset, &entry->offset_index, bitmap); 2529 } 2530 cluster->root = RB_ROOT; 2531 2532 out: 2533 spin_unlock(&cluster->lock); 2534 btrfs_put_block_group(block_group); 2535 return 0; 2536 } 2537 2538 static void __btrfs_remove_free_space_cache_locked( 2539 struct btrfs_free_space_ctl *ctl) 2540 { 2541 struct btrfs_free_space *info; 2542 struct rb_node *node; 2543 2544 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2545 info = rb_entry(node, struct btrfs_free_space, offset_index); 2546 if (!info->bitmap) { 2547 unlink_free_space(ctl, info); 2548 kmem_cache_free(btrfs_free_space_cachep, info); 2549 } else { 2550 free_bitmap(ctl, info); 2551 } 2552 2553 cond_resched_lock(&ctl->tree_lock); 2554 } 2555 } 2556 2557 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2558 { 2559 spin_lock(&ctl->tree_lock); 2560 __btrfs_remove_free_space_cache_locked(ctl); 2561 spin_unlock(&ctl->tree_lock); 2562 } 2563 2564 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2565 { 2566 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2567 struct btrfs_free_cluster *cluster; 2568 struct list_head *head; 2569 2570 spin_lock(&ctl->tree_lock); 2571 while ((head = block_group->cluster_list.next) != 2572 &block_group->cluster_list) { 2573 cluster = list_entry(head, struct btrfs_free_cluster, 2574 block_group_list); 2575 2576 WARN_ON(cluster->block_group != block_group); 2577 __btrfs_return_cluster_to_free_space(block_group, cluster); 2578 2579 cond_resched_lock(&ctl->tree_lock); 2580 } 2581 __btrfs_remove_free_space_cache_locked(ctl); 2582 spin_unlock(&ctl->tree_lock); 2583 2584 } 2585 2586 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2587 u64 offset, u64 bytes, u64 empty_size, 2588 u64 *max_extent_size) 2589 { 2590 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2591 struct btrfs_free_space *entry = NULL; 2592 u64 bytes_search = bytes + empty_size; 2593 u64 ret = 0; 2594 u64 align_gap = 0; 2595 u64 align_gap_len = 0; 2596 2597 spin_lock(&ctl->tree_lock); 2598 entry = find_free_space(ctl, &offset, &bytes_search, 2599 block_group->full_stripe_len, max_extent_size); 2600 if (!entry) 2601 goto out; 2602 2603 ret = offset; 2604 if (entry->bitmap) { 2605 bitmap_clear_bits(ctl, entry, offset, bytes); 2606 if (!entry->bytes) 2607 free_bitmap(ctl, entry); 2608 } else { 2609 unlink_free_space(ctl, entry); 2610 align_gap_len = offset - entry->offset; 2611 align_gap = entry->offset; 2612 2613 entry->offset = offset + bytes; 2614 WARN_ON(entry->bytes < bytes + align_gap_len); 2615 2616 entry->bytes -= bytes + align_gap_len; 2617 if (!entry->bytes) 2618 kmem_cache_free(btrfs_free_space_cachep, entry); 2619 else 2620 link_free_space(ctl, entry); 2621 } 2622 out: 2623 spin_unlock(&ctl->tree_lock); 2624 2625 if (align_gap_len) 2626 __btrfs_add_free_space(block_group->fs_info, ctl, 2627 align_gap, align_gap_len); 2628 return ret; 2629 } 2630 2631 /* 2632 * given a cluster, put all of its extents back into the free space 2633 * cache. If a block group is passed, this function will only free 2634 * a cluster that belongs to the passed block group. 2635 * 2636 * Otherwise, it'll get a reference on the block group pointed to by the 2637 * cluster and remove the cluster from it. 2638 */ 2639 int btrfs_return_cluster_to_free_space( 2640 struct btrfs_block_group_cache *block_group, 2641 struct btrfs_free_cluster *cluster) 2642 { 2643 struct btrfs_free_space_ctl *ctl; 2644 int ret; 2645 2646 /* first, get a safe pointer to the block group */ 2647 spin_lock(&cluster->lock); 2648 if (!block_group) { 2649 block_group = cluster->block_group; 2650 if (!block_group) { 2651 spin_unlock(&cluster->lock); 2652 return 0; 2653 } 2654 } else if (cluster->block_group != block_group) { 2655 /* someone else has already freed it don't redo their work */ 2656 spin_unlock(&cluster->lock); 2657 return 0; 2658 } 2659 atomic_inc(&block_group->count); 2660 spin_unlock(&cluster->lock); 2661 2662 ctl = block_group->free_space_ctl; 2663 2664 /* now return any extents the cluster had on it */ 2665 spin_lock(&ctl->tree_lock); 2666 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2667 spin_unlock(&ctl->tree_lock); 2668 2669 /* finally drop our ref */ 2670 btrfs_put_block_group(block_group); 2671 return ret; 2672 } 2673 2674 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2675 struct btrfs_free_cluster *cluster, 2676 struct btrfs_free_space *entry, 2677 u64 bytes, u64 min_start, 2678 u64 *max_extent_size) 2679 { 2680 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2681 int err; 2682 u64 search_start = cluster->window_start; 2683 u64 search_bytes = bytes; 2684 u64 ret = 0; 2685 2686 search_start = min_start; 2687 search_bytes = bytes; 2688 2689 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2690 if (err) { 2691 if (search_bytes > *max_extent_size) 2692 *max_extent_size = search_bytes; 2693 return 0; 2694 } 2695 2696 ret = search_start; 2697 __bitmap_clear_bits(ctl, entry, ret, bytes); 2698 2699 return ret; 2700 } 2701 2702 /* 2703 * given a cluster, try to allocate 'bytes' from it, returns 0 2704 * if it couldn't find anything suitably large, or a logical disk offset 2705 * if things worked out 2706 */ 2707 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2708 struct btrfs_free_cluster *cluster, u64 bytes, 2709 u64 min_start, u64 *max_extent_size) 2710 { 2711 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2712 struct btrfs_free_space *entry = NULL; 2713 struct rb_node *node; 2714 u64 ret = 0; 2715 2716 spin_lock(&cluster->lock); 2717 if (bytes > cluster->max_size) 2718 goto out; 2719 2720 if (cluster->block_group != block_group) 2721 goto out; 2722 2723 node = rb_first(&cluster->root); 2724 if (!node) 2725 goto out; 2726 2727 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2728 while (1) { 2729 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2730 *max_extent_size = entry->bytes; 2731 2732 if (entry->bytes < bytes || 2733 (!entry->bitmap && entry->offset < min_start)) { 2734 node = rb_next(&entry->offset_index); 2735 if (!node) 2736 break; 2737 entry = rb_entry(node, struct btrfs_free_space, 2738 offset_index); 2739 continue; 2740 } 2741 2742 if (entry->bitmap) { 2743 ret = btrfs_alloc_from_bitmap(block_group, 2744 cluster, entry, bytes, 2745 cluster->window_start, 2746 max_extent_size); 2747 if (ret == 0) { 2748 node = rb_next(&entry->offset_index); 2749 if (!node) 2750 break; 2751 entry = rb_entry(node, struct btrfs_free_space, 2752 offset_index); 2753 continue; 2754 } 2755 cluster->window_start += bytes; 2756 } else { 2757 ret = entry->offset; 2758 2759 entry->offset += bytes; 2760 entry->bytes -= bytes; 2761 } 2762 2763 if (entry->bytes == 0) 2764 rb_erase(&entry->offset_index, &cluster->root); 2765 break; 2766 } 2767 out: 2768 spin_unlock(&cluster->lock); 2769 2770 if (!ret) 2771 return 0; 2772 2773 spin_lock(&ctl->tree_lock); 2774 2775 ctl->free_space -= bytes; 2776 if (entry->bytes == 0) { 2777 ctl->free_extents--; 2778 if (entry->bitmap) { 2779 kfree(entry->bitmap); 2780 ctl->total_bitmaps--; 2781 ctl->op->recalc_thresholds(ctl); 2782 } 2783 kmem_cache_free(btrfs_free_space_cachep, entry); 2784 } 2785 2786 spin_unlock(&ctl->tree_lock); 2787 2788 return ret; 2789 } 2790 2791 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2792 struct btrfs_free_space *entry, 2793 struct btrfs_free_cluster *cluster, 2794 u64 offset, u64 bytes, 2795 u64 cont1_bytes, u64 min_bytes) 2796 { 2797 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2798 unsigned long next_zero; 2799 unsigned long i; 2800 unsigned long want_bits; 2801 unsigned long min_bits; 2802 unsigned long found_bits; 2803 unsigned long max_bits = 0; 2804 unsigned long start = 0; 2805 unsigned long total_found = 0; 2806 int ret; 2807 2808 i = offset_to_bit(entry->offset, ctl->unit, 2809 max_t(u64, offset, entry->offset)); 2810 want_bits = bytes_to_bits(bytes, ctl->unit); 2811 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2812 2813 /* 2814 * Don't bother looking for a cluster in this bitmap if it's heavily 2815 * fragmented. 2816 */ 2817 if (entry->max_extent_size && 2818 entry->max_extent_size < cont1_bytes) 2819 return -ENOSPC; 2820 again: 2821 found_bits = 0; 2822 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2823 next_zero = find_next_zero_bit(entry->bitmap, 2824 BITS_PER_BITMAP, i); 2825 if (next_zero - i >= min_bits) { 2826 found_bits = next_zero - i; 2827 if (found_bits > max_bits) 2828 max_bits = found_bits; 2829 break; 2830 } 2831 if (next_zero - i > max_bits) 2832 max_bits = next_zero - i; 2833 i = next_zero; 2834 } 2835 2836 if (!found_bits) { 2837 entry->max_extent_size = (u64)max_bits * ctl->unit; 2838 return -ENOSPC; 2839 } 2840 2841 if (!total_found) { 2842 start = i; 2843 cluster->max_size = 0; 2844 } 2845 2846 total_found += found_bits; 2847 2848 if (cluster->max_size < found_bits * ctl->unit) 2849 cluster->max_size = found_bits * ctl->unit; 2850 2851 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2852 i = next_zero + 1; 2853 goto again; 2854 } 2855 2856 cluster->window_start = start * ctl->unit + entry->offset; 2857 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2858 ret = tree_insert_offset(&cluster->root, entry->offset, 2859 &entry->offset_index, 1); 2860 ASSERT(!ret); /* -EEXIST; Logic error */ 2861 2862 trace_btrfs_setup_cluster(block_group, cluster, 2863 total_found * ctl->unit, 1); 2864 return 0; 2865 } 2866 2867 /* 2868 * This searches the block group for just extents to fill the cluster with. 2869 * Try to find a cluster with at least bytes total bytes, at least one 2870 * extent of cont1_bytes, and other clusters of at least min_bytes. 2871 */ 2872 static noinline int 2873 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2874 struct btrfs_free_cluster *cluster, 2875 struct list_head *bitmaps, u64 offset, u64 bytes, 2876 u64 cont1_bytes, u64 min_bytes) 2877 { 2878 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2879 struct btrfs_free_space *first = NULL; 2880 struct btrfs_free_space *entry = NULL; 2881 struct btrfs_free_space *last; 2882 struct rb_node *node; 2883 u64 window_free; 2884 u64 max_extent; 2885 u64 total_size = 0; 2886 2887 entry = tree_search_offset(ctl, offset, 0, 1); 2888 if (!entry) 2889 return -ENOSPC; 2890 2891 /* 2892 * We don't want bitmaps, so just move along until we find a normal 2893 * extent entry. 2894 */ 2895 while (entry->bitmap || entry->bytes < min_bytes) { 2896 if (entry->bitmap && list_empty(&entry->list)) 2897 list_add_tail(&entry->list, bitmaps); 2898 node = rb_next(&entry->offset_index); 2899 if (!node) 2900 return -ENOSPC; 2901 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2902 } 2903 2904 window_free = entry->bytes; 2905 max_extent = entry->bytes; 2906 first = entry; 2907 last = entry; 2908 2909 for (node = rb_next(&entry->offset_index); node; 2910 node = rb_next(&entry->offset_index)) { 2911 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2912 2913 if (entry->bitmap) { 2914 if (list_empty(&entry->list)) 2915 list_add_tail(&entry->list, bitmaps); 2916 continue; 2917 } 2918 2919 if (entry->bytes < min_bytes) 2920 continue; 2921 2922 last = entry; 2923 window_free += entry->bytes; 2924 if (entry->bytes > max_extent) 2925 max_extent = entry->bytes; 2926 } 2927 2928 if (window_free < bytes || max_extent < cont1_bytes) 2929 return -ENOSPC; 2930 2931 cluster->window_start = first->offset; 2932 2933 node = &first->offset_index; 2934 2935 /* 2936 * now we've found our entries, pull them out of the free space 2937 * cache and put them into the cluster rbtree 2938 */ 2939 do { 2940 int ret; 2941 2942 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2943 node = rb_next(&entry->offset_index); 2944 if (entry->bitmap || entry->bytes < min_bytes) 2945 continue; 2946 2947 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2948 ret = tree_insert_offset(&cluster->root, entry->offset, 2949 &entry->offset_index, 0); 2950 total_size += entry->bytes; 2951 ASSERT(!ret); /* -EEXIST; Logic error */ 2952 } while (node && entry != last); 2953 2954 cluster->max_size = max_extent; 2955 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2956 return 0; 2957 } 2958 2959 /* 2960 * This specifically looks for bitmaps that may work in the cluster, we assume 2961 * that we have already failed to find extents that will work. 2962 */ 2963 static noinline int 2964 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2965 struct btrfs_free_cluster *cluster, 2966 struct list_head *bitmaps, u64 offset, u64 bytes, 2967 u64 cont1_bytes, u64 min_bytes) 2968 { 2969 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2970 struct btrfs_free_space *entry = NULL; 2971 int ret = -ENOSPC; 2972 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2973 2974 if (ctl->total_bitmaps == 0) 2975 return -ENOSPC; 2976 2977 /* 2978 * The bitmap that covers offset won't be in the list unless offset 2979 * is just its start offset. 2980 */ 2981 if (!list_empty(bitmaps)) 2982 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2983 2984 if (!entry || entry->offset != bitmap_offset) { 2985 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2986 if (entry && list_empty(&entry->list)) 2987 list_add(&entry->list, bitmaps); 2988 } 2989 2990 list_for_each_entry(entry, bitmaps, list) { 2991 if (entry->bytes < bytes) 2992 continue; 2993 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2994 bytes, cont1_bytes, min_bytes); 2995 if (!ret) 2996 return 0; 2997 } 2998 2999 /* 3000 * The bitmaps list has all the bitmaps that record free space 3001 * starting after offset, so no more search is required. 3002 */ 3003 return -ENOSPC; 3004 } 3005 3006 /* 3007 * here we try to find a cluster of blocks in a block group. The goal 3008 * is to find at least bytes+empty_size. 3009 * We might not find them all in one contiguous area. 3010 * 3011 * returns zero and sets up cluster if things worked out, otherwise 3012 * it returns -enospc 3013 */ 3014 int btrfs_find_space_cluster(struct btrfs_root *root, 3015 struct btrfs_block_group_cache *block_group, 3016 struct btrfs_free_cluster *cluster, 3017 u64 offset, u64 bytes, u64 empty_size) 3018 { 3019 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3020 struct btrfs_free_space *entry, *tmp; 3021 LIST_HEAD(bitmaps); 3022 u64 min_bytes; 3023 u64 cont1_bytes; 3024 int ret; 3025 3026 /* 3027 * Choose the minimum extent size we'll require for this 3028 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3029 * For metadata, allow allocates with smaller extents. For 3030 * data, keep it dense. 3031 */ 3032 if (btrfs_test_opt(root->fs_info, SSD_SPREAD)) { 3033 cont1_bytes = min_bytes = bytes + empty_size; 3034 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3035 cont1_bytes = bytes; 3036 min_bytes = block_group->sectorsize; 3037 } else { 3038 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3039 min_bytes = block_group->sectorsize; 3040 } 3041 3042 spin_lock(&ctl->tree_lock); 3043 3044 /* 3045 * If we know we don't have enough space to make a cluster don't even 3046 * bother doing all the work to try and find one. 3047 */ 3048 if (ctl->free_space < bytes) { 3049 spin_unlock(&ctl->tree_lock); 3050 return -ENOSPC; 3051 } 3052 3053 spin_lock(&cluster->lock); 3054 3055 /* someone already found a cluster, hooray */ 3056 if (cluster->block_group) { 3057 ret = 0; 3058 goto out; 3059 } 3060 3061 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3062 min_bytes); 3063 3064 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3065 bytes + empty_size, 3066 cont1_bytes, min_bytes); 3067 if (ret) 3068 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3069 offset, bytes + empty_size, 3070 cont1_bytes, min_bytes); 3071 3072 /* Clear our temporary list */ 3073 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3074 list_del_init(&entry->list); 3075 3076 if (!ret) { 3077 atomic_inc(&block_group->count); 3078 list_add_tail(&cluster->block_group_list, 3079 &block_group->cluster_list); 3080 cluster->block_group = block_group; 3081 } else { 3082 trace_btrfs_failed_cluster_setup(block_group); 3083 } 3084 out: 3085 spin_unlock(&cluster->lock); 3086 spin_unlock(&ctl->tree_lock); 3087 3088 return ret; 3089 } 3090 3091 /* 3092 * simple code to zero out a cluster 3093 */ 3094 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3095 { 3096 spin_lock_init(&cluster->lock); 3097 spin_lock_init(&cluster->refill_lock); 3098 cluster->root = RB_ROOT; 3099 cluster->max_size = 0; 3100 cluster->fragmented = false; 3101 INIT_LIST_HEAD(&cluster->block_group_list); 3102 cluster->block_group = NULL; 3103 } 3104 3105 static int do_trimming(struct btrfs_block_group_cache *block_group, 3106 u64 *total_trimmed, u64 start, u64 bytes, 3107 u64 reserved_start, u64 reserved_bytes, 3108 struct btrfs_trim_range *trim_entry) 3109 { 3110 struct btrfs_space_info *space_info = block_group->space_info; 3111 struct btrfs_fs_info *fs_info = block_group->fs_info; 3112 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3113 int ret; 3114 int update = 0; 3115 u64 trimmed = 0; 3116 3117 spin_lock(&space_info->lock); 3118 spin_lock(&block_group->lock); 3119 if (!block_group->ro) { 3120 block_group->reserved += reserved_bytes; 3121 space_info->bytes_reserved += reserved_bytes; 3122 update = 1; 3123 } 3124 spin_unlock(&block_group->lock); 3125 spin_unlock(&space_info->lock); 3126 3127 ret = btrfs_discard_extent(fs_info->extent_root, 3128 start, bytes, &trimmed); 3129 if (!ret) 3130 *total_trimmed += trimmed; 3131 3132 mutex_lock(&ctl->cache_writeout_mutex); 3133 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3134 list_del(&trim_entry->list); 3135 mutex_unlock(&ctl->cache_writeout_mutex); 3136 3137 if (update) { 3138 spin_lock(&space_info->lock); 3139 spin_lock(&block_group->lock); 3140 if (block_group->ro) 3141 space_info->bytes_readonly += reserved_bytes; 3142 block_group->reserved -= reserved_bytes; 3143 space_info->bytes_reserved -= reserved_bytes; 3144 spin_unlock(&space_info->lock); 3145 spin_unlock(&block_group->lock); 3146 } 3147 3148 return ret; 3149 } 3150 3151 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3152 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3153 { 3154 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3155 struct btrfs_free_space *entry; 3156 struct rb_node *node; 3157 int ret = 0; 3158 u64 extent_start; 3159 u64 extent_bytes; 3160 u64 bytes; 3161 3162 while (start < end) { 3163 struct btrfs_trim_range trim_entry; 3164 3165 mutex_lock(&ctl->cache_writeout_mutex); 3166 spin_lock(&ctl->tree_lock); 3167 3168 if (ctl->free_space < minlen) { 3169 spin_unlock(&ctl->tree_lock); 3170 mutex_unlock(&ctl->cache_writeout_mutex); 3171 break; 3172 } 3173 3174 entry = tree_search_offset(ctl, start, 0, 1); 3175 if (!entry) { 3176 spin_unlock(&ctl->tree_lock); 3177 mutex_unlock(&ctl->cache_writeout_mutex); 3178 break; 3179 } 3180 3181 /* skip bitmaps */ 3182 while (entry->bitmap) { 3183 node = rb_next(&entry->offset_index); 3184 if (!node) { 3185 spin_unlock(&ctl->tree_lock); 3186 mutex_unlock(&ctl->cache_writeout_mutex); 3187 goto out; 3188 } 3189 entry = rb_entry(node, struct btrfs_free_space, 3190 offset_index); 3191 } 3192 3193 if (entry->offset >= end) { 3194 spin_unlock(&ctl->tree_lock); 3195 mutex_unlock(&ctl->cache_writeout_mutex); 3196 break; 3197 } 3198 3199 extent_start = entry->offset; 3200 extent_bytes = entry->bytes; 3201 start = max(start, extent_start); 3202 bytes = min(extent_start + extent_bytes, end) - start; 3203 if (bytes < minlen) { 3204 spin_unlock(&ctl->tree_lock); 3205 mutex_unlock(&ctl->cache_writeout_mutex); 3206 goto next; 3207 } 3208 3209 unlink_free_space(ctl, entry); 3210 kmem_cache_free(btrfs_free_space_cachep, entry); 3211 3212 spin_unlock(&ctl->tree_lock); 3213 trim_entry.start = extent_start; 3214 trim_entry.bytes = extent_bytes; 3215 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3216 mutex_unlock(&ctl->cache_writeout_mutex); 3217 3218 ret = do_trimming(block_group, total_trimmed, start, bytes, 3219 extent_start, extent_bytes, &trim_entry); 3220 if (ret) 3221 break; 3222 next: 3223 start += bytes; 3224 3225 if (fatal_signal_pending(current)) { 3226 ret = -ERESTARTSYS; 3227 break; 3228 } 3229 3230 cond_resched(); 3231 } 3232 out: 3233 return ret; 3234 } 3235 3236 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3237 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3238 { 3239 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3240 struct btrfs_free_space *entry; 3241 int ret = 0; 3242 int ret2; 3243 u64 bytes; 3244 u64 offset = offset_to_bitmap(ctl, start); 3245 3246 while (offset < end) { 3247 bool next_bitmap = false; 3248 struct btrfs_trim_range trim_entry; 3249 3250 mutex_lock(&ctl->cache_writeout_mutex); 3251 spin_lock(&ctl->tree_lock); 3252 3253 if (ctl->free_space < minlen) { 3254 spin_unlock(&ctl->tree_lock); 3255 mutex_unlock(&ctl->cache_writeout_mutex); 3256 break; 3257 } 3258 3259 entry = tree_search_offset(ctl, offset, 1, 0); 3260 if (!entry) { 3261 spin_unlock(&ctl->tree_lock); 3262 mutex_unlock(&ctl->cache_writeout_mutex); 3263 next_bitmap = true; 3264 goto next; 3265 } 3266 3267 bytes = minlen; 3268 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3269 if (ret2 || start >= end) { 3270 spin_unlock(&ctl->tree_lock); 3271 mutex_unlock(&ctl->cache_writeout_mutex); 3272 next_bitmap = true; 3273 goto next; 3274 } 3275 3276 bytes = min(bytes, end - start); 3277 if (bytes < minlen) { 3278 spin_unlock(&ctl->tree_lock); 3279 mutex_unlock(&ctl->cache_writeout_mutex); 3280 goto next; 3281 } 3282 3283 bitmap_clear_bits(ctl, entry, start, bytes); 3284 if (entry->bytes == 0) 3285 free_bitmap(ctl, entry); 3286 3287 spin_unlock(&ctl->tree_lock); 3288 trim_entry.start = start; 3289 trim_entry.bytes = bytes; 3290 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3291 mutex_unlock(&ctl->cache_writeout_mutex); 3292 3293 ret = do_trimming(block_group, total_trimmed, start, bytes, 3294 start, bytes, &trim_entry); 3295 if (ret) 3296 break; 3297 next: 3298 if (next_bitmap) { 3299 offset += BITS_PER_BITMAP * ctl->unit; 3300 } else { 3301 start += bytes; 3302 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3303 offset += BITS_PER_BITMAP * ctl->unit; 3304 } 3305 3306 if (fatal_signal_pending(current)) { 3307 ret = -ERESTARTSYS; 3308 break; 3309 } 3310 3311 cond_resched(); 3312 } 3313 3314 return ret; 3315 } 3316 3317 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache) 3318 { 3319 atomic_inc(&cache->trimming); 3320 } 3321 3322 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) 3323 { 3324 struct extent_map_tree *em_tree; 3325 struct extent_map *em; 3326 bool cleanup; 3327 3328 spin_lock(&block_group->lock); 3329 cleanup = (atomic_dec_and_test(&block_group->trimming) && 3330 block_group->removed); 3331 spin_unlock(&block_group->lock); 3332 3333 if (cleanup) { 3334 lock_chunks(block_group->fs_info->chunk_root); 3335 em_tree = &block_group->fs_info->mapping_tree.map_tree; 3336 write_lock(&em_tree->lock); 3337 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3338 1); 3339 BUG_ON(!em); /* logic error, can't happen */ 3340 /* 3341 * remove_extent_mapping() will delete us from the pinned_chunks 3342 * list, which is protected by the chunk mutex. 3343 */ 3344 remove_extent_mapping(em_tree, em); 3345 write_unlock(&em_tree->lock); 3346 unlock_chunks(block_group->fs_info->chunk_root); 3347 3348 /* once for us and once for the tree */ 3349 free_extent_map(em); 3350 free_extent_map(em); 3351 3352 /* 3353 * We've left one free space entry and other tasks trimming 3354 * this block group have left 1 entry each one. Free them. 3355 */ 3356 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3357 } 3358 } 3359 3360 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3361 u64 *trimmed, u64 start, u64 end, u64 minlen) 3362 { 3363 int ret; 3364 3365 *trimmed = 0; 3366 3367 spin_lock(&block_group->lock); 3368 if (block_group->removed) { 3369 spin_unlock(&block_group->lock); 3370 return 0; 3371 } 3372 btrfs_get_block_group_trimming(block_group); 3373 spin_unlock(&block_group->lock); 3374 3375 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3376 if (ret) 3377 goto out; 3378 3379 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3380 out: 3381 btrfs_put_block_group_trimming(block_group); 3382 return ret; 3383 } 3384 3385 /* 3386 * Find the left-most item in the cache tree, and then return the 3387 * smallest inode number in the item. 3388 * 3389 * Note: the returned inode number may not be the smallest one in 3390 * the tree, if the left-most item is a bitmap. 3391 */ 3392 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3393 { 3394 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3395 struct btrfs_free_space *entry = NULL; 3396 u64 ino = 0; 3397 3398 spin_lock(&ctl->tree_lock); 3399 3400 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3401 goto out; 3402 3403 entry = rb_entry(rb_first(&ctl->free_space_offset), 3404 struct btrfs_free_space, offset_index); 3405 3406 if (!entry->bitmap) { 3407 ino = entry->offset; 3408 3409 unlink_free_space(ctl, entry); 3410 entry->offset++; 3411 entry->bytes--; 3412 if (!entry->bytes) 3413 kmem_cache_free(btrfs_free_space_cachep, entry); 3414 else 3415 link_free_space(ctl, entry); 3416 } else { 3417 u64 offset = 0; 3418 u64 count = 1; 3419 int ret; 3420 3421 ret = search_bitmap(ctl, entry, &offset, &count, true); 3422 /* Logic error; Should be empty if it can't find anything */ 3423 ASSERT(!ret); 3424 3425 ino = offset; 3426 bitmap_clear_bits(ctl, entry, offset, 1); 3427 if (entry->bytes == 0) 3428 free_bitmap(ctl, entry); 3429 } 3430 out: 3431 spin_unlock(&ctl->tree_lock); 3432 3433 return ino; 3434 } 3435 3436 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3437 struct btrfs_path *path) 3438 { 3439 struct inode *inode = NULL; 3440 3441 spin_lock(&root->ino_cache_lock); 3442 if (root->ino_cache_inode) 3443 inode = igrab(root->ino_cache_inode); 3444 spin_unlock(&root->ino_cache_lock); 3445 if (inode) 3446 return inode; 3447 3448 inode = __lookup_free_space_inode(root, path, 0); 3449 if (IS_ERR(inode)) 3450 return inode; 3451 3452 spin_lock(&root->ino_cache_lock); 3453 if (!btrfs_fs_closing(root->fs_info)) 3454 root->ino_cache_inode = igrab(inode); 3455 spin_unlock(&root->ino_cache_lock); 3456 3457 return inode; 3458 } 3459 3460 int create_free_ino_inode(struct btrfs_root *root, 3461 struct btrfs_trans_handle *trans, 3462 struct btrfs_path *path) 3463 { 3464 return __create_free_space_inode(root, trans, path, 3465 BTRFS_FREE_INO_OBJECTID, 0); 3466 } 3467 3468 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3469 { 3470 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3471 struct btrfs_path *path; 3472 struct inode *inode; 3473 int ret = 0; 3474 u64 root_gen = btrfs_root_generation(&root->root_item); 3475 3476 if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) 3477 return 0; 3478 3479 /* 3480 * If we're unmounting then just return, since this does a search on the 3481 * normal root and not the commit root and we could deadlock. 3482 */ 3483 if (btrfs_fs_closing(fs_info)) 3484 return 0; 3485 3486 path = btrfs_alloc_path(); 3487 if (!path) 3488 return 0; 3489 3490 inode = lookup_free_ino_inode(root, path); 3491 if (IS_ERR(inode)) 3492 goto out; 3493 3494 if (root_gen != BTRFS_I(inode)->generation) 3495 goto out_put; 3496 3497 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3498 3499 if (ret < 0) 3500 btrfs_err(fs_info, 3501 "failed to load free ino cache for root %llu", 3502 root->root_key.objectid); 3503 out_put: 3504 iput(inode); 3505 out: 3506 btrfs_free_path(path); 3507 return ret; 3508 } 3509 3510 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3511 struct btrfs_trans_handle *trans, 3512 struct btrfs_path *path, 3513 struct inode *inode) 3514 { 3515 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3516 int ret; 3517 struct btrfs_io_ctl io_ctl; 3518 bool release_metadata = true; 3519 3520 if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE)) 3521 return 0; 3522 3523 memset(&io_ctl, 0, sizeof(io_ctl)); 3524 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3525 trans, path, 0); 3526 if (!ret) { 3527 /* 3528 * At this point writepages() didn't error out, so our metadata 3529 * reservation is released when the writeback finishes, at 3530 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3531 * with or without an error. 3532 */ 3533 release_metadata = false; 3534 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3535 } 3536 3537 if (ret) { 3538 if (release_metadata) 3539 btrfs_delalloc_release_metadata(inode, inode->i_size); 3540 #ifdef DEBUG 3541 btrfs_err(root->fs_info, 3542 "failed to write free ino cache for root %llu", 3543 root->root_key.objectid); 3544 #endif 3545 } 3546 3547 return ret; 3548 } 3549 3550 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3551 /* 3552 * Use this if you need to make a bitmap or extent entry specifically, it 3553 * doesn't do any of the merging that add_free_space does, this acts a lot like 3554 * how the free space cache loading stuff works, so you can get really weird 3555 * configurations. 3556 */ 3557 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3558 u64 offset, u64 bytes, bool bitmap) 3559 { 3560 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3561 struct btrfs_free_space *info = NULL, *bitmap_info; 3562 void *map = NULL; 3563 u64 bytes_added; 3564 int ret; 3565 3566 again: 3567 if (!info) { 3568 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3569 if (!info) 3570 return -ENOMEM; 3571 } 3572 3573 if (!bitmap) { 3574 spin_lock(&ctl->tree_lock); 3575 info->offset = offset; 3576 info->bytes = bytes; 3577 info->max_extent_size = 0; 3578 ret = link_free_space(ctl, info); 3579 spin_unlock(&ctl->tree_lock); 3580 if (ret) 3581 kmem_cache_free(btrfs_free_space_cachep, info); 3582 return ret; 3583 } 3584 3585 if (!map) { 3586 map = kzalloc(PAGE_SIZE, GFP_NOFS); 3587 if (!map) { 3588 kmem_cache_free(btrfs_free_space_cachep, info); 3589 return -ENOMEM; 3590 } 3591 } 3592 3593 spin_lock(&ctl->tree_lock); 3594 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3595 1, 0); 3596 if (!bitmap_info) { 3597 info->bitmap = map; 3598 map = NULL; 3599 add_new_bitmap(ctl, info, offset); 3600 bitmap_info = info; 3601 info = NULL; 3602 } 3603 3604 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3605 3606 bytes -= bytes_added; 3607 offset += bytes_added; 3608 spin_unlock(&ctl->tree_lock); 3609 3610 if (bytes) 3611 goto again; 3612 3613 if (info) 3614 kmem_cache_free(btrfs_free_space_cachep, info); 3615 if (map) 3616 kfree(map); 3617 return 0; 3618 } 3619 3620 /* 3621 * Checks to see if the given range is in the free space cache. This is really 3622 * just used to check the absence of space, so if there is free space in the 3623 * range at all we will return 1. 3624 */ 3625 int test_check_exists(struct btrfs_block_group_cache *cache, 3626 u64 offset, u64 bytes) 3627 { 3628 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3629 struct btrfs_free_space *info; 3630 int ret = 0; 3631 3632 spin_lock(&ctl->tree_lock); 3633 info = tree_search_offset(ctl, offset, 0, 0); 3634 if (!info) { 3635 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3636 1, 0); 3637 if (!info) 3638 goto out; 3639 } 3640 3641 have_info: 3642 if (info->bitmap) { 3643 u64 bit_off, bit_bytes; 3644 struct rb_node *n; 3645 struct btrfs_free_space *tmp; 3646 3647 bit_off = offset; 3648 bit_bytes = ctl->unit; 3649 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 3650 if (!ret) { 3651 if (bit_off == offset) { 3652 ret = 1; 3653 goto out; 3654 } else if (bit_off > offset && 3655 offset + bytes > bit_off) { 3656 ret = 1; 3657 goto out; 3658 } 3659 } 3660 3661 n = rb_prev(&info->offset_index); 3662 while (n) { 3663 tmp = rb_entry(n, struct btrfs_free_space, 3664 offset_index); 3665 if (tmp->offset + tmp->bytes < offset) 3666 break; 3667 if (offset + bytes < tmp->offset) { 3668 n = rb_prev(&tmp->offset_index); 3669 continue; 3670 } 3671 info = tmp; 3672 goto have_info; 3673 } 3674 3675 n = rb_next(&info->offset_index); 3676 while (n) { 3677 tmp = rb_entry(n, struct btrfs_free_space, 3678 offset_index); 3679 if (offset + bytes < tmp->offset) 3680 break; 3681 if (tmp->offset + tmp->bytes < offset) { 3682 n = rb_next(&tmp->offset_index); 3683 continue; 3684 } 3685 info = tmp; 3686 goto have_info; 3687 } 3688 3689 ret = 0; 3690 goto out; 3691 } 3692 3693 if (info->offset == offset) { 3694 ret = 1; 3695 goto out; 3696 } 3697 3698 if (offset > info->offset && offset < info->offset + info->bytes) 3699 ret = 1; 3700 out: 3701 spin_unlock(&ctl->tree_lock); 3702 return ret; 3703 } 3704 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3705