1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include "ctree.h" 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 #include "disk-io.h" 27 #include "extent_io.h" 28 #include "inode-map.h" 29 30 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 31 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 32 33 static int link_free_space(struct btrfs_free_space_ctl *ctl, 34 struct btrfs_free_space *info); 35 36 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 37 struct btrfs_path *path, 38 u64 offset) 39 { 40 struct btrfs_key key; 41 struct btrfs_key location; 42 struct btrfs_disk_key disk_key; 43 struct btrfs_free_space_header *header; 44 struct extent_buffer *leaf; 45 struct inode *inode = NULL; 46 int ret; 47 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 49 key.offset = offset; 50 key.type = 0; 51 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 53 if (ret < 0) 54 return ERR_PTR(ret); 55 if (ret > 0) { 56 btrfs_release_path(path); 57 return ERR_PTR(-ENOENT); 58 } 59 60 leaf = path->nodes[0]; 61 header = btrfs_item_ptr(leaf, path->slots[0], 62 struct btrfs_free_space_header); 63 btrfs_free_space_key(leaf, header, &disk_key); 64 btrfs_disk_key_to_cpu(&location, &disk_key); 65 btrfs_release_path(path); 66 67 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 68 if (!inode) 69 return ERR_PTR(-ENOENT); 70 if (IS_ERR(inode)) 71 return inode; 72 if (is_bad_inode(inode)) { 73 iput(inode); 74 return ERR_PTR(-ENOENT); 75 } 76 77 inode->i_mapping->flags &= ~__GFP_FS; 78 79 return inode; 80 } 81 82 struct inode *lookup_free_space_inode(struct btrfs_root *root, 83 struct btrfs_block_group_cache 84 *block_group, struct btrfs_path *path) 85 { 86 struct inode *inode = NULL; 87 88 spin_lock(&block_group->lock); 89 if (block_group->inode) 90 inode = igrab(block_group->inode); 91 spin_unlock(&block_group->lock); 92 if (inode) 93 return inode; 94 95 inode = __lookup_free_space_inode(root, path, 96 block_group->key.objectid); 97 if (IS_ERR(inode)) 98 return inode; 99 100 spin_lock(&block_group->lock); 101 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { 102 printk(KERN_INFO "Old style space inode found, converting.\n"); 103 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; 104 block_group->disk_cache_state = BTRFS_DC_CLEAR; 105 } 106 107 if (!btrfs_fs_closing(root->fs_info)) { 108 block_group->inode = igrab(inode); 109 block_group->iref = 1; 110 } 111 spin_unlock(&block_group->lock); 112 113 return inode; 114 } 115 116 int __create_free_space_inode(struct btrfs_root *root, 117 struct btrfs_trans_handle *trans, 118 struct btrfs_path *path, u64 ino, u64 offset) 119 { 120 struct btrfs_key key; 121 struct btrfs_disk_key disk_key; 122 struct btrfs_free_space_header *header; 123 struct btrfs_inode_item *inode_item; 124 struct extent_buffer *leaf; 125 int ret; 126 127 ret = btrfs_insert_empty_inode(trans, root, path, ino); 128 if (ret) 129 return ret; 130 131 leaf = path->nodes[0]; 132 inode_item = btrfs_item_ptr(leaf, path->slots[0], 133 struct btrfs_inode_item); 134 btrfs_item_key(leaf, &disk_key, path->slots[0]); 135 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 136 sizeof(*inode_item)); 137 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 138 btrfs_set_inode_size(leaf, inode_item, 0); 139 btrfs_set_inode_nbytes(leaf, inode_item, 0); 140 btrfs_set_inode_uid(leaf, inode_item, 0); 141 btrfs_set_inode_gid(leaf, inode_item, 0); 142 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 143 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | 144 BTRFS_INODE_PREALLOC); 145 btrfs_set_inode_nlink(leaf, inode_item, 1); 146 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 147 btrfs_set_inode_block_group(leaf, inode_item, offset); 148 btrfs_mark_buffer_dirty(leaf); 149 btrfs_release_path(path); 150 151 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 152 key.offset = offset; 153 key.type = 0; 154 155 ret = btrfs_insert_empty_item(trans, root, path, &key, 156 sizeof(struct btrfs_free_space_header)); 157 if (ret < 0) { 158 btrfs_release_path(path); 159 return ret; 160 } 161 leaf = path->nodes[0]; 162 header = btrfs_item_ptr(leaf, path->slots[0], 163 struct btrfs_free_space_header); 164 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 165 btrfs_set_free_space_key(leaf, header, &disk_key); 166 btrfs_mark_buffer_dirty(leaf); 167 btrfs_release_path(path); 168 169 return 0; 170 } 171 172 int create_free_space_inode(struct btrfs_root *root, 173 struct btrfs_trans_handle *trans, 174 struct btrfs_block_group_cache *block_group, 175 struct btrfs_path *path) 176 { 177 int ret; 178 u64 ino; 179 180 ret = btrfs_find_free_objectid(root, &ino); 181 if (ret < 0) 182 return ret; 183 184 return __create_free_space_inode(root, trans, path, ino, 185 block_group->key.objectid); 186 } 187 188 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 189 struct btrfs_trans_handle *trans, 190 struct btrfs_path *path, 191 struct inode *inode) 192 { 193 loff_t oldsize; 194 int ret = 0; 195 196 trans->block_rsv = root->orphan_block_rsv; 197 ret = btrfs_block_rsv_check(trans, root, 198 root->orphan_block_rsv, 199 0, 5); 200 if (ret) 201 return ret; 202 203 oldsize = i_size_read(inode); 204 btrfs_i_size_write(inode, 0); 205 truncate_pagecache(inode, oldsize, 0); 206 207 /* 208 * We don't need an orphan item because truncating the free space cache 209 * will never be split across transactions. 210 */ 211 ret = btrfs_truncate_inode_items(trans, root, inode, 212 0, BTRFS_EXTENT_DATA_KEY); 213 if (ret) { 214 WARN_ON(1); 215 return ret; 216 } 217 218 ret = btrfs_update_inode(trans, root, inode); 219 return ret; 220 } 221 222 static int readahead_cache(struct inode *inode) 223 { 224 struct file_ra_state *ra; 225 unsigned long last_index; 226 227 ra = kzalloc(sizeof(*ra), GFP_NOFS); 228 if (!ra) 229 return -ENOMEM; 230 231 file_ra_state_init(ra, inode->i_mapping); 232 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 233 234 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 235 236 kfree(ra); 237 238 return 0; 239 } 240 241 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 242 struct btrfs_free_space_ctl *ctl, 243 struct btrfs_path *path, u64 offset) 244 { 245 struct btrfs_free_space_header *header; 246 struct extent_buffer *leaf; 247 struct page *page; 248 struct btrfs_key key; 249 struct list_head bitmaps; 250 u64 num_entries; 251 u64 num_bitmaps; 252 u64 generation; 253 pgoff_t index = 0; 254 int ret = 0; 255 256 INIT_LIST_HEAD(&bitmaps); 257 258 /* Nothing in the space cache, goodbye */ 259 if (!i_size_read(inode)) 260 goto out; 261 262 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 263 key.offset = offset; 264 key.type = 0; 265 266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 267 if (ret < 0) 268 goto out; 269 else if (ret > 0) { 270 btrfs_release_path(path); 271 ret = 0; 272 goto out; 273 } 274 275 ret = -1; 276 277 leaf = path->nodes[0]; 278 header = btrfs_item_ptr(leaf, path->slots[0], 279 struct btrfs_free_space_header); 280 num_entries = btrfs_free_space_entries(leaf, header); 281 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 282 generation = btrfs_free_space_generation(leaf, header); 283 btrfs_release_path(path); 284 285 if (BTRFS_I(inode)->generation != generation) { 286 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 287 " not match free space cache generation (%llu)\n", 288 (unsigned long long)BTRFS_I(inode)->generation, 289 (unsigned long long)generation); 290 goto out; 291 } 292 293 if (!num_entries) 294 goto out; 295 296 ret = readahead_cache(inode); 297 if (ret) 298 goto out; 299 300 while (1) { 301 struct btrfs_free_space_entry *entry; 302 struct btrfs_free_space *e; 303 void *addr; 304 unsigned long offset = 0; 305 int need_loop = 0; 306 307 if (!num_entries && !num_bitmaps) 308 break; 309 310 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 311 if (!page) 312 goto free_cache; 313 314 if (!PageUptodate(page)) { 315 btrfs_readpage(NULL, page); 316 lock_page(page); 317 if (!PageUptodate(page)) { 318 unlock_page(page); 319 page_cache_release(page); 320 printk(KERN_ERR "btrfs: error reading free " 321 "space cache\n"); 322 goto free_cache; 323 } 324 } 325 addr = kmap(page); 326 327 if (index == 0) { 328 u64 *gen; 329 330 /* 331 * We put a bogus crc in the front of the first page in 332 * case old kernels try to mount a fs with the new 333 * format to make sure they discard the cache. 334 */ 335 addr += sizeof(u64); 336 offset += sizeof(u64); 337 338 gen = addr; 339 if (*gen != BTRFS_I(inode)->generation) { 340 printk(KERN_ERR "btrfs: space cache generation" 341 " (%llu) does not match inode (%llu)\n", 342 (unsigned long long)*gen, 343 (unsigned long long) 344 BTRFS_I(inode)->generation); 345 kunmap(page); 346 unlock_page(page); 347 page_cache_release(page); 348 goto free_cache; 349 } 350 addr += sizeof(u64); 351 offset += sizeof(u64); 352 } 353 entry = addr; 354 355 while (1) { 356 if (!num_entries) 357 break; 358 359 need_loop = 1; 360 e = kmem_cache_zalloc(btrfs_free_space_cachep, 361 GFP_NOFS); 362 if (!e) { 363 kunmap(page); 364 unlock_page(page); 365 page_cache_release(page); 366 goto free_cache; 367 } 368 369 e->offset = le64_to_cpu(entry->offset); 370 e->bytes = le64_to_cpu(entry->bytes); 371 if (!e->bytes) { 372 kunmap(page); 373 kmem_cache_free(btrfs_free_space_cachep, e); 374 unlock_page(page); 375 page_cache_release(page); 376 goto free_cache; 377 } 378 379 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 380 spin_lock(&ctl->tree_lock); 381 ret = link_free_space(ctl, e); 382 spin_unlock(&ctl->tree_lock); 383 if (ret) { 384 printk(KERN_ERR "Duplicate entries in " 385 "free space cache, dumping\n"); 386 kunmap(page); 387 unlock_page(page); 388 page_cache_release(page); 389 goto free_cache; 390 } 391 } else { 392 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 393 if (!e->bitmap) { 394 kunmap(page); 395 kmem_cache_free( 396 btrfs_free_space_cachep, e); 397 unlock_page(page); 398 page_cache_release(page); 399 goto free_cache; 400 } 401 spin_lock(&ctl->tree_lock); 402 ret = link_free_space(ctl, e); 403 ctl->total_bitmaps++; 404 ctl->op->recalc_thresholds(ctl); 405 spin_unlock(&ctl->tree_lock); 406 if (ret) { 407 printk(KERN_ERR "Duplicate entries in " 408 "free space cache, dumping\n"); 409 kunmap(page); 410 unlock_page(page); 411 page_cache_release(page); 412 goto free_cache; 413 } 414 list_add_tail(&e->list, &bitmaps); 415 } 416 417 num_entries--; 418 offset += sizeof(struct btrfs_free_space_entry); 419 if (offset + sizeof(struct btrfs_free_space_entry) >= 420 PAGE_CACHE_SIZE) 421 break; 422 entry++; 423 } 424 425 /* 426 * We read an entry out of this page, we need to move on to the 427 * next page. 428 */ 429 if (need_loop) { 430 kunmap(page); 431 goto next; 432 } 433 434 /* 435 * We add the bitmaps at the end of the entries in order that 436 * the bitmap entries are added to the cache. 437 */ 438 e = list_entry(bitmaps.next, struct btrfs_free_space, list); 439 list_del_init(&e->list); 440 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); 441 kunmap(page); 442 num_bitmaps--; 443 next: 444 unlock_page(page); 445 page_cache_release(page); 446 index++; 447 } 448 449 ret = 1; 450 out: 451 return ret; 452 free_cache: 453 __btrfs_remove_free_space_cache(ctl); 454 goto out; 455 } 456 457 int load_free_space_cache(struct btrfs_fs_info *fs_info, 458 struct btrfs_block_group_cache *block_group) 459 { 460 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 461 struct btrfs_root *root = fs_info->tree_root; 462 struct inode *inode; 463 struct btrfs_path *path; 464 int ret; 465 bool matched; 466 u64 used = btrfs_block_group_used(&block_group->item); 467 468 /* 469 * If we're unmounting then just return, since this does a search on the 470 * normal root and not the commit root and we could deadlock. 471 */ 472 if (btrfs_fs_closing(fs_info)) 473 return 0; 474 475 /* 476 * If this block group has been marked to be cleared for one reason or 477 * another then we can't trust the on disk cache, so just return. 478 */ 479 spin_lock(&block_group->lock); 480 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 481 spin_unlock(&block_group->lock); 482 return 0; 483 } 484 spin_unlock(&block_group->lock); 485 486 path = btrfs_alloc_path(); 487 if (!path) 488 return 0; 489 490 inode = lookup_free_space_inode(root, block_group, path); 491 if (IS_ERR(inode)) { 492 btrfs_free_path(path); 493 return 0; 494 } 495 496 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 497 path, block_group->key.objectid); 498 btrfs_free_path(path); 499 if (ret <= 0) 500 goto out; 501 502 spin_lock(&ctl->tree_lock); 503 matched = (ctl->free_space == (block_group->key.offset - used - 504 block_group->bytes_super)); 505 spin_unlock(&ctl->tree_lock); 506 507 if (!matched) { 508 __btrfs_remove_free_space_cache(ctl); 509 printk(KERN_ERR "block group %llu has an wrong amount of free " 510 "space\n", block_group->key.objectid); 511 ret = -1; 512 } 513 out: 514 if (ret < 0) { 515 /* This cache is bogus, make sure it gets cleared */ 516 spin_lock(&block_group->lock); 517 block_group->disk_cache_state = BTRFS_DC_CLEAR; 518 spin_unlock(&block_group->lock); 519 ret = 0; 520 521 printk(KERN_ERR "btrfs: failed to load free space cache " 522 "for block group %llu\n", block_group->key.objectid); 523 } 524 525 iput(inode); 526 return ret; 527 } 528 529 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 530 struct btrfs_free_space_ctl *ctl, 531 struct btrfs_block_group_cache *block_group, 532 struct btrfs_trans_handle *trans, 533 struct btrfs_path *path, u64 offset) 534 { 535 struct btrfs_free_space_header *header; 536 struct extent_buffer *leaf; 537 struct rb_node *node; 538 struct list_head *pos, *n; 539 struct page **pages; 540 struct page *page; 541 struct extent_state *cached_state = NULL; 542 struct btrfs_free_cluster *cluster = NULL; 543 struct extent_io_tree *unpin = NULL; 544 struct list_head bitmap_list; 545 struct btrfs_key key; 546 u64 start, end, len; 547 u64 bytes = 0; 548 u32 crc = ~(u32)0; 549 int index = 0, num_pages = 0; 550 int entries = 0; 551 int bitmaps = 0; 552 int ret = -1; 553 bool next_page = false; 554 bool out_of_space = false; 555 556 INIT_LIST_HEAD(&bitmap_list); 557 558 node = rb_first(&ctl->free_space_offset); 559 if (!node) 560 return 0; 561 562 if (!i_size_read(inode)) 563 return -1; 564 565 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 566 PAGE_CACHE_SHIFT; 567 568 filemap_write_and_wait(inode->i_mapping); 569 btrfs_wait_ordered_range(inode, inode->i_size & 570 ~(root->sectorsize - 1), (u64)-1); 571 572 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 573 if (!pages) 574 return -1; 575 576 /* Get the cluster for this block_group if it exists */ 577 if (block_group && !list_empty(&block_group->cluster_list)) 578 cluster = list_entry(block_group->cluster_list.next, 579 struct btrfs_free_cluster, 580 block_group_list); 581 582 /* 583 * We shouldn't have switched the pinned extents yet so this is the 584 * right one 585 */ 586 unpin = root->fs_info->pinned_extents; 587 588 /* 589 * Lock all pages first so we can lock the extent safely. 590 * 591 * NOTE: Because we hold the ref the entire time we're going to write to 592 * the page find_get_page should never fail, so we don't do a check 593 * after find_get_page at this point. Just putting this here so people 594 * know and don't freak out. 595 */ 596 while (index < num_pages) { 597 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 598 if (!page) { 599 int i; 600 601 for (i = 0; i < num_pages; i++) { 602 unlock_page(pages[i]); 603 page_cache_release(pages[i]); 604 } 605 goto out; 606 } 607 pages[index] = page; 608 index++; 609 } 610 611 index = 0; 612 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 613 0, &cached_state, GFP_NOFS); 614 615 /* 616 * When searching for pinned extents, we need to start at our start 617 * offset. 618 */ 619 if (block_group) 620 start = block_group->key.objectid; 621 622 /* Write out the extent entries */ 623 do { 624 struct btrfs_free_space_entry *entry; 625 void *addr, *orig; 626 unsigned long offset = 0; 627 628 next_page = false; 629 630 if (index >= num_pages) { 631 out_of_space = true; 632 break; 633 } 634 635 page = pages[index]; 636 637 orig = addr = kmap(page); 638 if (index == 0) { 639 u64 *gen; 640 641 /* 642 * We're going to put in a bogus crc for this page to 643 * make sure that old kernels who aren't aware of this 644 * format will be sure to discard the cache. 645 */ 646 addr += sizeof(u64); 647 offset += sizeof(u64); 648 649 gen = addr; 650 *gen = trans->transid; 651 addr += sizeof(u64); 652 offset += sizeof(u64); 653 } 654 entry = addr; 655 656 memset(addr, 0, PAGE_CACHE_SIZE - offset); 657 while (node && !next_page) { 658 struct btrfs_free_space *e; 659 660 e = rb_entry(node, struct btrfs_free_space, offset_index); 661 entries++; 662 663 entry->offset = cpu_to_le64(e->offset); 664 entry->bytes = cpu_to_le64(e->bytes); 665 if (e->bitmap) { 666 entry->type = BTRFS_FREE_SPACE_BITMAP; 667 list_add_tail(&e->list, &bitmap_list); 668 bitmaps++; 669 } else { 670 entry->type = BTRFS_FREE_SPACE_EXTENT; 671 } 672 node = rb_next(node); 673 if (!node && cluster) { 674 node = rb_first(&cluster->root); 675 cluster = NULL; 676 } 677 offset += sizeof(struct btrfs_free_space_entry); 678 if (offset + sizeof(struct btrfs_free_space_entry) >= 679 PAGE_CACHE_SIZE) 680 next_page = true; 681 entry++; 682 } 683 684 /* 685 * We want to add any pinned extents to our free space cache 686 * so we don't leak the space 687 */ 688 while (block_group && !next_page && 689 (start < block_group->key.objectid + 690 block_group->key.offset)) { 691 ret = find_first_extent_bit(unpin, start, &start, &end, 692 EXTENT_DIRTY); 693 if (ret) { 694 ret = 0; 695 break; 696 } 697 698 /* This pinned extent is out of our range */ 699 if (start >= block_group->key.objectid + 700 block_group->key.offset) 701 break; 702 703 len = block_group->key.objectid + 704 block_group->key.offset - start; 705 len = min(len, end + 1 - start); 706 707 entries++; 708 entry->offset = cpu_to_le64(start); 709 entry->bytes = cpu_to_le64(len); 710 entry->type = BTRFS_FREE_SPACE_EXTENT; 711 712 start = end + 1; 713 offset += sizeof(struct btrfs_free_space_entry); 714 if (offset + sizeof(struct btrfs_free_space_entry) >= 715 PAGE_CACHE_SIZE) 716 next_page = true; 717 entry++; 718 } 719 720 /* Generate bogus crc value */ 721 if (index == 0) { 722 u32 *tmp; 723 crc = btrfs_csum_data(root, orig + sizeof(u64), crc, 724 PAGE_CACHE_SIZE - sizeof(u64)); 725 btrfs_csum_final(crc, (char *)&crc); 726 crc++; 727 tmp = orig; 728 *tmp = crc; 729 } 730 731 kunmap(page); 732 733 bytes += PAGE_CACHE_SIZE; 734 735 index++; 736 } while (node || next_page); 737 738 /* Write out the bitmaps */ 739 list_for_each_safe(pos, n, &bitmap_list) { 740 void *addr; 741 struct btrfs_free_space *entry = 742 list_entry(pos, struct btrfs_free_space, list); 743 744 if (index >= num_pages) { 745 out_of_space = true; 746 break; 747 } 748 page = pages[index]; 749 750 addr = kmap(page); 751 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 752 kunmap(page); 753 bytes += PAGE_CACHE_SIZE; 754 755 list_del_init(&entry->list); 756 index++; 757 } 758 759 if (out_of_space) { 760 btrfs_drop_pages(pages, num_pages); 761 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 762 i_size_read(inode) - 1, &cached_state, 763 GFP_NOFS); 764 ret = 0; 765 goto out; 766 } 767 768 /* Zero out the rest of the pages just to make sure */ 769 while (index < num_pages) { 770 void *addr; 771 772 page = pages[index]; 773 addr = kmap(page); 774 memset(addr, 0, PAGE_CACHE_SIZE); 775 kunmap(page); 776 bytes += PAGE_CACHE_SIZE; 777 index++; 778 } 779 780 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, 781 bytes, &cached_state); 782 btrfs_drop_pages(pages, num_pages); 783 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 784 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 785 786 if (ret) { 787 ret = 0; 788 goto out; 789 } 790 791 BTRFS_I(inode)->generation = trans->transid; 792 793 filemap_write_and_wait(inode->i_mapping); 794 795 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 796 key.offset = offset; 797 key.type = 0; 798 799 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 800 if (ret < 0) { 801 ret = -1; 802 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 803 EXTENT_DIRTY | EXTENT_DELALLOC | 804 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 805 goto out; 806 } 807 leaf = path->nodes[0]; 808 if (ret > 0) { 809 struct btrfs_key found_key; 810 BUG_ON(!path->slots[0]); 811 path->slots[0]--; 812 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 813 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 814 found_key.offset != offset) { 815 ret = -1; 816 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 817 EXTENT_DIRTY | EXTENT_DELALLOC | 818 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 819 GFP_NOFS); 820 btrfs_release_path(path); 821 goto out; 822 } 823 } 824 header = btrfs_item_ptr(leaf, path->slots[0], 825 struct btrfs_free_space_header); 826 btrfs_set_free_space_entries(leaf, header, entries); 827 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 828 btrfs_set_free_space_generation(leaf, header, trans->transid); 829 btrfs_mark_buffer_dirty(leaf); 830 btrfs_release_path(path); 831 832 ret = 1; 833 834 out: 835 kfree(pages); 836 if (ret != 1) { 837 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 838 BTRFS_I(inode)->generation = 0; 839 } 840 btrfs_update_inode(trans, root, inode); 841 return ret; 842 } 843 844 int btrfs_write_out_cache(struct btrfs_root *root, 845 struct btrfs_trans_handle *trans, 846 struct btrfs_block_group_cache *block_group, 847 struct btrfs_path *path) 848 { 849 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 850 struct inode *inode; 851 int ret = 0; 852 853 root = root->fs_info->tree_root; 854 855 spin_lock(&block_group->lock); 856 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 857 spin_unlock(&block_group->lock); 858 return 0; 859 } 860 spin_unlock(&block_group->lock); 861 862 inode = lookup_free_space_inode(root, block_group, path); 863 if (IS_ERR(inode)) 864 return 0; 865 866 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 867 path, block_group->key.objectid); 868 if (ret < 0) { 869 spin_lock(&block_group->lock); 870 block_group->disk_cache_state = BTRFS_DC_ERROR; 871 spin_unlock(&block_group->lock); 872 ret = 0; 873 874 printk(KERN_ERR "btrfs: failed to write free space cace " 875 "for block group %llu\n", block_group->key.objectid); 876 } 877 878 iput(inode); 879 return ret; 880 } 881 882 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 883 u64 offset) 884 { 885 BUG_ON(offset < bitmap_start); 886 offset -= bitmap_start; 887 return (unsigned long)(div_u64(offset, unit)); 888 } 889 890 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 891 { 892 return (unsigned long)(div_u64(bytes, unit)); 893 } 894 895 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 896 u64 offset) 897 { 898 u64 bitmap_start; 899 u64 bytes_per_bitmap; 900 901 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 902 bitmap_start = offset - ctl->start; 903 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 904 bitmap_start *= bytes_per_bitmap; 905 bitmap_start += ctl->start; 906 907 return bitmap_start; 908 } 909 910 static int tree_insert_offset(struct rb_root *root, u64 offset, 911 struct rb_node *node, int bitmap) 912 { 913 struct rb_node **p = &root->rb_node; 914 struct rb_node *parent = NULL; 915 struct btrfs_free_space *info; 916 917 while (*p) { 918 parent = *p; 919 info = rb_entry(parent, struct btrfs_free_space, offset_index); 920 921 if (offset < info->offset) { 922 p = &(*p)->rb_left; 923 } else if (offset > info->offset) { 924 p = &(*p)->rb_right; 925 } else { 926 /* 927 * we could have a bitmap entry and an extent entry 928 * share the same offset. If this is the case, we want 929 * the extent entry to always be found first if we do a 930 * linear search through the tree, since we want to have 931 * the quickest allocation time, and allocating from an 932 * extent is faster than allocating from a bitmap. So 933 * if we're inserting a bitmap and we find an entry at 934 * this offset, we want to go right, or after this entry 935 * logically. If we are inserting an extent and we've 936 * found a bitmap, we want to go left, or before 937 * logically. 938 */ 939 if (bitmap) { 940 if (info->bitmap) { 941 WARN_ON_ONCE(1); 942 return -EEXIST; 943 } 944 p = &(*p)->rb_right; 945 } else { 946 if (!info->bitmap) { 947 WARN_ON_ONCE(1); 948 return -EEXIST; 949 } 950 p = &(*p)->rb_left; 951 } 952 } 953 } 954 955 rb_link_node(node, parent, p); 956 rb_insert_color(node, root); 957 958 return 0; 959 } 960 961 /* 962 * searches the tree for the given offset. 963 * 964 * fuzzy - If this is set, then we are trying to make an allocation, and we just 965 * want a section that has at least bytes size and comes at or after the given 966 * offset. 967 */ 968 static struct btrfs_free_space * 969 tree_search_offset(struct btrfs_free_space_ctl *ctl, 970 u64 offset, int bitmap_only, int fuzzy) 971 { 972 struct rb_node *n = ctl->free_space_offset.rb_node; 973 struct btrfs_free_space *entry, *prev = NULL; 974 975 /* find entry that is closest to the 'offset' */ 976 while (1) { 977 if (!n) { 978 entry = NULL; 979 break; 980 } 981 982 entry = rb_entry(n, struct btrfs_free_space, offset_index); 983 prev = entry; 984 985 if (offset < entry->offset) 986 n = n->rb_left; 987 else if (offset > entry->offset) 988 n = n->rb_right; 989 else 990 break; 991 } 992 993 if (bitmap_only) { 994 if (!entry) 995 return NULL; 996 if (entry->bitmap) 997 return entry; 998 999 /* 1000 * bitmap entry and extent entry may share same offset, 1001 * in that case, bitmap entry comes after extent entry. 1002 */ 1003 n = rb_next(n); 1004 if (!n) 1005 return NULL; 1006 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1007 if (entry->offset != offset) 1008 return NULL; 1009 1010 WARN_ON(!entry->bitmap); 1011 return entry; 1012 } else if (entry) { 1013 if (entry->bitmap) { 1014 /* 1015 * if previous extent entry covers the offset, 1016 * we should return it instead of the bitmap entry 1017 */ 1018 n = &entry->offset_index; 1019 while (1) { 1020 n = rb_prev(n); 1021 if (!n) 1022 break; 1023 prev = rb_entry(n, struct btrfs_free_space, 1024 offset_index); 1025 if (!prev->bitmap) { 1026 if (prev->offset + prev->bytes > offset) 1027 entry = prev; 1028 break; 1029 } 1030 } 1031 } 1032 return entry; 1033 } 1034 1035 if (!prev) 1036 return NULL; 1037 1038 /* find last entry before the 'offset' */ 1039 entry = prev; 1040 if (entry->offset > offset) { 1041 n = rb_prev(&entry->offset_index); 1042 if (n) { 1043 entry = rb_entry(n, struct btrfs_free_space, 1044 offset_index); 1045 BUG_ON(entry->offset > offset); 1046 } else { 1047 if (fuzzy) 1048 return entry; 1049 else 1050 return NULL; 1051 } 1052 } 1053 1054 if (entry->bitmap) { 1055 n = &entry->offset_index; 1056 while (1) { 1057 n = rb_prev(n); 1058 if (!n) 1059 break; 1060 prev = rb_entry(n, struct btrfs_free_space, 1061 offset_index); 1062 if (!prev->bitmap) { 1063 if (prev->offset + prev->bytes > offset) 1064 return prev; 1065 break; 1066 } 1067 } 1068 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1069 return entry; 1070 } else if (entry->offset + entry->bytes > offset) 1071 return entry; 1072 1073 if (!fuzzy) 1074 return NULL; 1075 1076 while (1) { 1077 if (entry->bitmap) { 1078 if (entry->offset + BITS_PER_BITMAP * 1079 ctl->unit > offset) 1080 break; 1081 } else { 1082 if (entry->offset + entry->bytes > offset) 1083 break; 1084 } 1085 1086 n = rb_next(&entry->offset_index); 1087 if (!n) 1088 return NULL; 1089 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1090 } 1091 return entry; 1092 } 1093 1094 static inline void 1095 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1096 struct btrfs_free_space *info) 1097 { 1098 rb_erase(&info->offset_index, &ctl->free_space_offset); 1099 ctl->free_extents--; 1100 } 1101 1102 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1103 struct btrfs_free_space *info) 1104 { 1105 __unlink_free_space(ctl, info); 1106 ctl->free_space -= info->bytes; 1107 } 1108 1109 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1110 struct btrfs_free_space *info) 1111 { 1112 int ret = 0; 1113 1114 BUG_ON(!info->bitmap && !info->bytes); 1115 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1116 &info->offset_index, (info->bitmap != NULL)); 1117 if (ret) 1118 return ret; 1119 1120 ctl->free_space += info->bytes; 1121 ctl->free_extents++; 1122 return ret; 1123 } 1124 1125 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1126 { 1127 struct btrfs_block_group_cache *block_group = ctl->private; 1128 u64 max_bytes; 1129 u64 bitmap_bytes; 1130 u64 extent_bytes; 1131 u64 size = block_group->key.offset; 1132 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1133 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1134 1135 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1136 1137 /* 1138 * The goal is to keep the total amount of memory used per 1gb of space 1139 * at or below 32k, so we need to adjust how much memory we allow to be 1140 * used by extent based free space tracking 1141 */ 1142 if (size < 1024 * 1024 * 1024) 1143 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1144 else 1145 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1146 div64_u64(size, 1024 * 1024 * 1024); 1147 1148 /* 1149 * we want to account for 1 more bitmap than what we have so we can make 1150 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1151 * we add more bitmaps. 1152 */ 1153 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1154 1155 if (bitmap_bytes >= max_bytes) { 1156 ctl->extents_thresh = 0; 1157 return; 1158 } 1159 1160 /* 1161 * we want the extent entry threshold to always be at most 1/2 the maxw 1162 * bytes we can have, or whatever is less than that. 1163 */ 1164 extent_bytes = max_bytes - bitmap_bytes; 1165 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1166 1167 ctl->extents_thresh = 1168 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1169 } 1170 1171 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1172 struct btrfs_free_space *info, u64 offset, 1173 u64 bytes) 1174 { 1175 unsigned long start, count; 1176 1177 start = offset_to_bit(info->offset, ctl->unit, offset); 1178 count = bytes_to_bits(bytes, ctl->unit); 1179 BUG_ON(start + count > BITS_PER_BITMAP); 1180 1181 bitmap_clear(info->bitmap, start, count); 1182 1183 info->bytes -= bytes; 1184 ctl->free_space -= bytes; 1185 } 1186 1187 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1188 struct btrfs_free_space *info, u64 offset, 1189 u64 bytes) 1190 { 1191 unsigned long start, count; 1192 1193 start = offset_to_bit(info->offset, ctl->unit, offset); 1194 count = bytes_to_bits(bytes, ctl->unit); 1195 BUG_ON(start + count > BITS_PER_BITMAP); 1196 1197 bitmap_set(info->bitmap, start, count); 1198 1199 info->bytes += bytes; 1200 ctl->free_space += bytes; 1201 } 1202 1203 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1204 struct btrfs_free_space *bitmap_info, u64 *offset, 1205 u64 *bytes) 1206 { 1207 unsigned long found_bits = 0; 1208 unsigned long bits, i; 1209 unsigned long next_zero; 1210 1211 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1212 max_t(u64, *offset, bitmap_info->offset)); 1213 bits = bytes_to_bits(*bytes, ctl->unit); 1214 1215 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1216 i < BITS_PER_BITMAP; 1217 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1218 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1219 BITS_PER_BITMAP, i); 1220 if ((next_zero - i) >= bits) { 1221 found_bits = next_zero - i; 1222 break; 1223 } 1224 i = next_zero; 1225 } 1226 1227 if (found_bits) { 1228 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1229 *bytes = (u64)(found_bits) * ctl->unit; 1230 return 0; 1231 } 1232 1233 return -1; 1234 } 1235 1236 static struct btrfs_free_space * 1237 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1238 { 1239 struct btrfs_free_space *entry; 1240 struct rb_node *node; 1241 int ret; 1242 1243 if (!ctl->free_space_offset.rb_node) 1244 return NULL; 1245 1246 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1247 if (!entry) 1248 return NULL; 1249 1250 for (node = &entry->offset_index; node; node = rb_next(node)) { 1251 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1252 if (entry->bytes < *bytes) 1253 continue; 1254 1255 if (entry->bitmap) { 1256 ret = search_bitmap(ctl, entry, offset, bytes); 1257 if (!ret) 1258 return entry; 1259 continue; 1260 } 1261 1262 *offset = entry->offset; 1263 *bytes = entry->bytes; 1264 return entry; 1265 } 1266 1267 return NULL; 1268 } 1269 1270 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1271 struct btrfs_free_space *info, u64 offset) 1272 { 1273 info->offset = offset_to_bitmap(ctl, offset); 1274 info->bytes = 0; 1275 link_free_space(ctl, info); 1276 ctl->total_bitmaps++; 1277 1278 ctl->op->recalc_thresholds(ctl); 1279 } 1280 1281 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1282 struct btrfs_free_space *bitmap_info) 1283 { 1284 unlink_free_space(ctl, bitmap_info); 1285 kfree(bitmap_info->bitmap); 1286 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1287 ctl->total_bitmaps--; 1288 ctl->op->recalc_thresholds(ctl); 1289 } 1290 1291 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1292 struct btrfs_free_space *bitmap_info, 1293 u64 *offset, u64 *bytes) 1294 { 1295 u64 end; 1296 u64 search_start, search_bytes; 1297 int ret; 1298 1299 again: 1300 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1301 1302 /* 1303 * XXX - this can go away after a few releases. 1304 * 1305 * since the only user of btrfs_remove_free_space is the tree logging 1306 * stuff, and the only way to test that is under crash conditions, we 1307 * want to have this debug stuff here just in case somethings not 1308 * working. Search the bitmap for the space we are trying to use to 1309 * make sure its actually there. If its not there then we need to stop 1310 * because something has gone wrong. 1311 */ 1312 search_start = *offset; 1313 search_bytes = *bytes; 1314 search_bytes = min(search_bytes, end - search_start + 1); 1315 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1316 BUG_ON(ret < 0 || search_start != *offset); 1317 1318 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1319 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1320 *bytes -= end - *offset + 1; 1321 *offset = end + 1; 1322 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1323 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1324 *bytes = 0; 1325 } 1326 1327 if (*bytes) { 1328 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1329 if (!bitmap_info->bytes) 1330 free_bitmap(ctl, bitmap_info); 1331 1332 /* 1333 * no entry after this bitmap, but we still have bytes to 1334 * remove, so something has gone wrong. 1335 */ 1336 if (!next) 1337 return -EINVAL; 1338 1339 bitmap_info = rb_entry(next, struct btrfs_free_space, 1340 offset_index); 1341 1342 /* 1343 * if the next entry isn't a bitmap we need to return to let the 1344 * extent stuff do its work. 1345 */ 1346 if (!bitmap_info->bitmap) 1347 return -EAGAIN; 1348 1349 /* 1350 * Ok the next item is a bitmap, but it may not actually hold 1351 * the information for the rest of this free space stuff, so 1352 * look for it, and if we don't find it return so we can try 1353 * everything over again. 1354 */ 1355 search_start = *offset; 1356 search_bytes = *bytes; 1357 ret = search_bitmap(ctl, bitmap_info, &search_start, 1358 &search_bytes); 1359 if (ret < 0 || search_start != *offset) 1360 return -EAGAIN; 1361 1362 goto again; 1363 } else if (!bitmap_info->bytes) 1364 free_bitmap(ctl, bitmap_info); 1365 1366 return 0; 1367 } 1368 1369 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1370 struct btrfs_free_space *info, u64 offset, 1371 u64 bytes) 1372 { 1373 u64 bytes_to_set = 0; 1374 u64 end; 1375 1376 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1377 1378 bytes_to_set = min(end - offset, bytes); 1379 1380 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1381 1382 return bytes_to_set; 1383 1384 } 1385 1386 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1387 struct btrfs_free_space *info) 1388 { 1389 struct btrfs_block_group_cache *block_group = ctl->private; 1390 1391 /* 1392 * If we are below the extents threshold then we can add this as an 1393 * extent, and don't have to deal with the bitmap 1394 */ 1395 if (ctl->free_extents < ctl->extents_thresh) { 1396 /* 1397 * If this block group has some small extents we don't want to 1398 * use up all of our free slots in the cache with them, we want 1399 * to reserve them to larger extents, however if we have plent 1400 * of cache left then go ahead an dadd them, no sense in adding 1401 * the overhead of a bitmap if we don't have to. 1402 */ 1403 if (info->bytes <= block_group->sectorsize * 4) { 1404 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1405 return false; 1406 } else { 1407 return false; 1408 } 1409 } 1410 1411 /* 1412 * some block groups are so tiny they can't be enveloped by a bitmap, so 1413 * don't even bother to create a bitmap for this 1414 */ 1415 if (BITS_PER_BITMAP * block_group->sectorsize > 1416 block_group->key.offset) 1417 return false; 1418 1419 return true; 1420 } 1421 1422 static struct btrfs_free_space_op free_space_op = { 1423 .recalc_thresholds = recalculate_thresholds, 1424 .use_bitmap = use_bitmap, 1425 }; 1426 1427 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1428 struct btrfs_free_space *info) 1429 { 1430 struct btrfs_free_space *bitmap_info; 1431 struct btrfs_block_group_cache *block_group = NULL; 1432 int added = 0; 1433 u64 bytes, offset, bytes_added; 1434 int ret; 1435 1436 bytes = info->bytes; 1437 offset = info->offset; 1438 1439 if (!ctl->op->use_bitmap(ctl, info)) 1440 return 0; 1441 1442 if (ctl->op == &free_space_op) 1443 block_group = ctl->private; 1444 again: 1445 /* 1446 * Since we link bitmaps right into the cluster we need to see if we 1447 * have a cluster here, and if so and it has our bitmap we need to add 1448 * the free space to that bitmap. 1449 */ 1450 if (block_group && !list_empty(&block_group->cluster_list)) { 1451 struct btrfs_free_cluster *cluster; 1452 struct rb_node *node; 1453 struct btrfs_free_space *entry; 1454 1455 cluster = list_entry(block_group->cluster_list.next, 1456 struct btrfs_free_cluster, 1457 block_group_list); 1458 spin_lock(&cluster->lock); 1459 node = rb_first(&cluster->root); 1460 if (!node) { 1461 spin_unlock(&cluster->lock); 1462 goto no_cluster_bitmap; 1463 } 1464 1465 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1466 if (!entry->bitmap) { 1467 spin_unlock(&cluster->lock); 1468 goto no_cluster_bitmap; 1469 } 1470 1471 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1472 bytes_added = add_bytes_to_bitmap(ctl, entry, 1473 offset, bytes); 1474 bytes -= bytes_added; 1475 offset += bytes_added; 1476 } 1477 spin_unlock(&cluster->lock); 1478 if (!bytes) { 1479 ret = 1; 1480 goto out; 1481 } 1482 } 1483 1484 no_cluster_bitmap: 1485 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1486 1, 0); 1487 if (!bitmap_info) { 1488 BUG_ON(added); 1489 goto new_bitmap; 1490 } 1491 1492 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1493 bytes -= bytes_added; 1494 offset += bytes_added; 1495 added = 0; 1496 1497 if (!bytes) { 1498 ret = 1; 1499 goto out; 1500 } else 1501 goto again; 1502 1503 new_bitmap: 1504 if (info && info->bitmap) { 1505 add_new_bitmap(ctl, info, offset); 1506 added = 1; 1507 info = NULL; 1508 goto again; 1509 } else { 1510 spin_unlock(&ctl->tree_lock); 1511 1512 /* no pre-allocated info, allocate a new one */ 1513 if (!info) { 1514 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1515 GFP_NOFS); 1516 if (!info) { 1517 spin_lock(&ctl->tree_lock); 1518 ret = -ENOMEM; 1519 goto out; 1520 } 1521 } 1522 1523 /* allocate the bitmap */ 1524 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1525 spin_lock(&ctl->tree_lock); 1526 if (!info->bitmap) { 1527 ret = -ENOMEM; 1528 goto out; 1529 } 1530 goto again; 1531 } 1532 1533 out: 1534 if (info) { 1535 if (info->bitmap) 1536 kfree(info->bitmap); 1537 kmem_cache_free(btrfs_free_space_cachep, info); 1538 } 1539 1540 return ret; 1541 } 1542 1543 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1544 struct btrfs_free_space *info, bool update_stat) 1545 { 1546 struct btrfs_free_space *left_info; 1547 struct btrfs_free_space *right_info; 1548 bool merged = false; 1549 u64 offset = info->offset; 1550 u64 bytes = info->bytes; 1551 1552 /* 1553 * first we want to see if there is free space adjacent to the range we 1554 * are adding, if there is remove that struct and add a new one to 1555 * cover the entire range 1556 */ 1557 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1558 if (right_info && rb_prev(&right_info->offset_index)) 1559 left_info = rb_entry(rb_prev(&right_info->offset_index), 1560 struct btrfs_free_space, offset_index); 1561 else 1562 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1563 1564 if (right_info && !right_info->bitmap) { 1565 if (update_stat) 1566 unlink_free_space(ctl, right_info); 1567 else 1568 __unlink_free_space(ctl, right_info); 1569 info->bytes += right_info->bytes; 1570 kmem_cache_free(btrfs_free_space_cachep, right_info); 1571 merged = true; 1572 } 1573 1574 if (left_info && !left_info->bitmap && 1575 left_info->offset + left_info->bytes == offset) { 1576 if (update_stat) 1577 unlink_free_space(ctl, left_info); 1578 else 1579 __unlink_free_space(ctl, left_info); 1580 info->offset = left_info->offset; 1581 info->bytes += left_info->bytes; 1582 kmem_cache_free(btrfs_free_space_cachep, left_info); 1583 merged = true; 1584 } 1585 1586 return merged; 1587 } 1588 1589 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1590 u64 offset, u64 bytes) 1591 { 1592 struct btrfs_free_space *info; 1593 int ret = 0; 1594 1595 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1596 if (!info) 1597 return -ENOMEM; 1598 1599 info->offset = offset; 1600 info->bytes = bytes; 1601 1602 spin_lock(&ctl->tree_lock); 1603 1604 if (try_merge_free_space(ctl, info, true)) 1605 goto link; 1606 1607 /* 1608 * There was no extent directly to the left or right of this new 1609 * extent then we know we're going to have to allocate a new extent, so 1610 * before we do that see if we need to drop this into a bitmap 1611 */ 1612 ret = insert_into_bitmap(ctl, info); 1613 if (ret < 0) { 1614 goto out; 1615 } else if (ret) { 1616 ret = 0; 1617 goto out; 1618 } 1619 link: 1620 ret = link_free_space(ctl, info); 1621 if (ret) 1622 kmem_cache_free(btrfs_free_space_cachep, info); 1623 out: 1624 spin_unlock(&ctl->tree_lock); 1625 1626 if (ret) { 1627 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1628 BUG_ON(ret == -EEXIST); 1629 } 1630 1631 return ret; 1632 } 1633 1634 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1635 u64 offset, u64 bytes) 1636 { 1637 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1638 struct btrfs_free_space *info; 1639 struct btrfs_free_space *next_info = NULL; 1640 int ret = 0; 1641 1642 spin_lock(&ctl->tree_lock); 1643 1644 again: 1645 info = tree_search_offset(ctl, offset, 0, 0); 1646 if (!info) { 1647 /* 1648 * oops didn't find an extent that matched the space we wanted 1649 * to remove, look for a bitmap instead 1650 */ 1651 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1652 1, 0); 1653 if (!info) { 1654 WARN_ON(1); 1655 goto out_lock; 1656 } 1657 } 1658 1659 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1660 u64 end; 1661 next_info = rb_entry(rb_next(&info->offset_index), 1662 struct btrfs_free_space, 1663 offset_index); 1664 1665 if (next_info->bitmap) 1666 end = next_info->offset + 1667 BITS_PER_BITMAP * ctl->unit - 1; 1668 else 1669 end = next_info->offset + next_info->bytes; 1670 1671 if (next_info->bytes < bytes || 1672 next_info->offset > offset || offset > end) { 1673 printk(KERN_CRIT "Found free space at %llu, size %llu," 1674 " trying to use %llu\n", 1675 (unsigned long long)info->offset, 1676 (unsigned long long)info->bytes, 1677 (unsigned long long)bytes); 1678 WARN_ON(1); 1679 ret = -EINVAL; 1680 goto out_lock; 1681 } 1682 1683 info = next_info; 1684 } 1685 1686 if (info->bytes == bytes) { 1687 unlink_free_space(ctl, info); 1688 if (info->bitmap) { 1689 kfree(info->bitmap); 1690 ctl->total_bitmaps--; 1691 } 1692 kmem_cache_free(btrfs_free_space_cachep, info); 1693 goto out_lock; 1694 } 1695 1696 if (!info->bitmap && info->offset == offset) { 1697 unlink_free_space(ctl, info); 1698 info->offset += bytes; 1699 info->bytes -= bytes; 1700 link_free_space(ctl, info); 1701 goto out_lock; 1702 } 1703 1704 if (!info->bitmap && info->offset <= offset && 1705 info->offset + info->bytes >= offset + bytes) { 1706 u64 old_start = info->offset; 1707 /* 1708 * we're freeing space in the middle of the info, 1709 * this can happen during tree log replay 1710 * 1711 * first unlink the old info and then 1712 * insert it again after the hole we're creating 1713 */ 1714 unlink_free_space(ctl, info); 1715 if (offset + bytes < info->offset + info->bytes) { 1716 u64 old_end = info->offset + info->bytes; 1717 1718 info->offset = offset + bytes; 1719 info->bytes = old_end - info->offset; 1720 ret = link_free_space(ctl, info); 1721 WARN_ON(ret); 1722 if (ret) 1723 goto out_lock; 1724 } else { 1725 /* the hole we're creating ends at the end 1726 * of the info struct, just free the info 1727 */ 1728 kmem_cache_free(btrfs_free_space_cachep, info); 1729 } 1730 spin_unlock(&ctl->tree_lock); 1731 1732 /* step two, insert a new info struct to cover 1733 * anything before the hole 1734 */ 1735 ret = btrfs_add_free_space(block_group, old_start, 1736 offset - old_start); 1737 WARN_ON(ret); 1738 goto out; 1739 } 1740 1741 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1742 if (ret == -EAGAIN) 1743 goto again; 1744 BUG_ON(ret); 1745 out_lock: 1746 spin_unlock(&ctl->tree_lock); 1747 out: 1748 return ret; 1749 } 1750 1751 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1752 u64 bytes) 1753 { 1754 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1755 struct btrfs_free_space *info; 1756 struct rb_node *n; 1757 int count = 0; 1758 1759 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1760 info = rb_entry(n, struct btrfs_free_space, offset_index); 1761 if (info->bytes >= bytes) 1762 count++; 1763 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1764 (unsigned long long)info->offset, 1765 (unsigned long long)info->bytes, 1766 (info->bitmap) ? "yes" : "no"); 1767 } 1768 printk(KERN_INFO "block group has cluster?: %s\n", 1769 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1770 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1771 "\n", count); 1772 } 1773 1774 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1775 { 1776 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1777 1778 spin_lock_init(&ctl->tree_lock); 1779 ctl->unit = block_group->sectorsize; 1780 ctl->start = block_group->key.objectid; 1781 ctl->private = block_group; 1782 ctl->op = &free_space_op; 1783 1784 /* 1785 * we only want to have 32k of ram per block group for keeping 1786 * track of free space, and if we pass 1/2 of that we want to 1787 * start converting things over to using bitmaps 1788 */ 1789 ctl->extents_thresh = ((1024 * 32) / 2) / 1790 sizeof(struct btrfs_free_space); 1791 } 1792 1793 /* 1794 * for a given cluster, put all of its extents back into the free 1795 * space cache. If the block group passed doesn't match the block group 1796 * pointed to by the cluster, someone else raced in and freed the 1797 * cluster already. In that case, we just return without changing anything 1798 */ 1799 static int 1800 __btrfs_return_cluster_to_free_space( 1801 struct btrfs_block_group_cache *block_group, 1802 struct btrfs_free_cluster *cluster) 1803 { 1804 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1805 struct btrfs_free_space *entry; 1806 struct rb_node *node; 1807 1808 spin_lock(&cluster->lock); 1809 if (cluster->block_group != block_group) 1810 goto out; 1811 1812 cluster->block_group = NULL; 1813 cluster->window_start = 0; 1814 list_del_init(&cluster->block_group_list); 1815 1816 node = rb_first(&cluster->root); 1817 while (node) { 1818 bool bitmap; 1819 1820 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1821 node = rb_next(&entry->offset_index); 1822 rb_erase(&entry->offset_index, &cluster->root); 1823 1824 bitmap = (entry->bitmap != NULL); 1825 if (!bitmap) 1826 try_merge_free_space(ctl, entry, false); 1827 tree_insert_offset(&ctl->free_space_offset, 1828 entry->offset, &entry->offset_index, bitmap); 1829 } 1830 cluster->root = RB_ROOT; 1831 1832 out: 1833 spin_unlock(&cluster->lock); 1834 btrfs_put_block_group(block_group); 1835 return 0; 1836 } 1837 1838 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 1839 { 1840 struct btrfs_free_space *info; 1841 struct rb_node *node; 1842 1843 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 1844 info = rb_entry(node, struct btrfs_free_space, offset_index); 1845 if (!info->bitmap) { 1846 unlink_free_space(ctl, info); 1847 kmem_cache_free(btrfs_free_space_cachep, info); 1848 } else { 1849 free_bitmap(ctl, info); 1850 } 1851 if (need_resched()) { 1852 spin_unlock(&ctl->tree_lock); 1853 cond_resched(); 1854 spin_lock(&ctl->tree_lock); 1855 } 1856 } 1857 } 1858 1859 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 1860 { 1861 spin_lock(&ctl->tree_lock); 1862 __btrfs_remove_free_space_cache_locked(ctl); 1863 spin_unlock(&ctl->tree_lock); 1864 } 1865 1866 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1867 { 1868 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1869 struct btrfs_free_cluster *cluster; 1870 struct list_head *head; 1871 1872 spin_lock(&ctl->tree_lock); 1873 while ((head = block_group->cluster_list.next) != 1874 &block_group->cluster_list) { 1875 cluster = list_entry(head, struct btrfs_free_cluster, 1876 block_group_list); 1877 1878 WARN_ON(cluster->block_group != block_group); 1879 __btrfs_return_cluster_to_free_space(block_group, cluster); 1880 if (need_resched()) { 1881 spin_unlock(&ctl->tree_lock); 1882 cond_resched(); 1883 spin_lock(&ctl->tree_lock); 1884 } 1885 } 1886 __btrfs_remove_free_space_cache_locked(ctl); 1887 spin_unlock(&ctl->tree_lock); 1888 1889 } 1890 1891 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1892 u64 offset, u64 bytes, u64 empty_size) 1893 { 1894 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1895 struct btrfs_free_space *entry = NULL; 1896 u64 bytes_search = bytes + empty_size; 1897 u64 ret = 0; 1898 1899 spin_lock(&ctl->tree_lock); 1900 entry = find_free_space(ctl, &offset, &bytes_search); 1901 if (!entry) 1902 goto out; 1903 1904 ret = offset; 1905 if (entry->bitmap) { 1906 bitmap_clear_bits(ctl, entry, offset, bytes); 1907 if (!entry->bytes) 1908 free_bitmap(ctl, entry); 1909 } else { 1910 unlink_free_space(ctl, entry); 1911 entry->offset += bytes; 1912 entry->bytes -= bytes; 1913 if (!entry->bytes) 1914 kmem_cache_free(btrfs_free_space_cachep, entry); 1915 else 1916 link_free_space(ctl, entry); 1917 } 1918 1919 out: 1920 spin_unlock(&ctl->tree_lock); 1921 1922 return ret; 1923 } 1924 1925 /* 1926 * given a cluster, put all of its extents back into the free space 1927 * cache. If a block group is passed, this function will only free 1928 * a cluster that belongs to the passed block group. 1929 * 1930 * Otherwise, it'll get a reference on the block group pointed to by the 1931 * cluster and remove the cluster from it. 1932 */ 1933 int btrfs_return_cluster_to_free_space( 1934 struct btrfs_block_group_cache *block_group, 1935 struct btrfs_free_cluster *cluster) 1936 { 1937 struct btrfs_free_space_ctl *ctl; 1938 int ret; 1939 1940 /* first, get a safe pointer to the block group */ 1941 spin_lock(&cluster->lock); 1942 if (!block_group) { 1943 block_group = cluster->block_group; 1944 if (!block_group) { 1945 spin_unlock(&cluster->lock); 1946 return 0; 1947 } 1948 } else if (cluster->block_group != block_group) { 1949 /* someone else has already freed it don't redo their work */ 1950 spin_unlock(&cluster->lock); 1951 return 0; 1952 } 1953 atomic_inc(&block_group->count); 1954 spin_unlock(&cluster->lock); 1955 1956 ctl = block_group->free_space_ctl; 1957 1958 /* now return any extents the cluster had on it */ 1959 spin_lock(&ctl->tree_lock); 1960 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1961 spin_unlock(&ctl->tree_lock); 1962 1963 /* finally drop our ref */ 1964 btrfs_put_block_group(block_group); 1965 return ret; 1966 } 1967 1968 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 1969 struct btrfs_free_cluster *cluster, 1970 struct btrfs_free_space *entry, 1971 u64 bytes, u64 min_start) 1972 { 1973 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1974 int err; 1975 u64 search_start = cluster->window_start; 1976 u64 search_bytes = bytes; 1977 u64 ret = 0; 1978 1979 search_start = min_start; 1980 search_bytes = bytes; 1981 1982 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 1983 if (err) 1984 return 0; 1985 1986 ret = search_start; 1987 bitmap_clear_bits(ctl, entry, ret, bytes); 1988 1989 return ret; 1990 } 1991 1992 /* 1993 * given a cluster, try to allocate 'bytes' from it, returns 0 1994 * if it couldn't find anything suitably large, or a logical disk offset 1995 * if things worked out 1996 */ 1997 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 1998 struct btrfs_free_cluster *cluster, u64 bytes, 1999 u64 min_start) 2000 { 2001 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2002 struct btrfs_free_space *entry = NULL; 2003 struct rb_node *node; 2004 u64 ret = 0; 2005 2006 spin_lock(&cluster->lock); 2007 if (bytes > cluster->max_size) 2008 goto out; 2009 2010 if (cluster->block_group != block_group) 2011 goto out; 2012 2013 node = rb_first(&cluster->root); 2014 if (!node) 2015 goto out; 2016 2017 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2018 while(1) { 2019 if (entry->bytes < bytes || 2020 (!entry->bitmap && entry->offset < min_start)) { 2021 node = rb_next(&entry->offset_index); 2022 if (!node) 2023 break; 2024 entry = rb_entry(node, struct btrfs_free_space, 2025 offset_index); 2026 continue; 2027 } 2028 2029 if (entry->bitmap) { 2030 ret = btrfs_alloc_from_bitmap(block_group, 2031 cluster, entry, bytes, 2032 min_start); 2033 if (ret == 0) { 2034 node = rb_next(&entry->offset_index); 2035 if (!node) 2036 break; 2037 entry = rb_entry(node, struct btrfs_free_space, 2038 offset_index); 2039 continue; 2040 } 2041 } else { 2042 2043 ret = entry->offset; 2044 2045 entry->offset += bytes; 2046 entry->bytes -= bytes; 2047 } 2048 2049 if (entry->bytes == 0) 2050 rb_erase(&entry->offset_index, &cluster->root); 2051 break; 2052 } 2053 out: 2054 spin_unlock(&cluster->lock); 2055 2056 if (!ret) 2057 return 0; 2058 2059 spin_lock(&ctl->tree_lock); 2060 2061 ctl->free_space -= bytes; 2062 if (entry->bytes == 0) { 2063 ctl->free_extents--; 2064 if (entry->bitmap) { 2065 kfree(entry->bitmap); 2066 ctl->total_bitmaps--; 2067 ctl->op->recalc_thresholds(ctl); 2068 } 2069 kmem_cache_free(btrfs_free_space_cachep, entry); 2070 } 2071 2072 spin_unlock(&ctl->tree_lock); 2073 2074 return ret; 2075 } 2076 2077 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2078 struct btrfs_free_space *entry, 2079 struct btrfs_free_cluster *cluster, 2080 u64 offset, u64 bytes, u64 min_bytes) 2081 { 2082 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2083 unsigned long next_zero; 2084 unsigned long i; 2085 unsigned long search_bits; 2086 unsigned long total_bits; 2087 unsigned long found_bits; 2088 unsigned long start = 0; 2089 unsigned long total_found = 0; 2090 int ret; 2091 bool found = false; 2092 2093 i = offset_to_bit(entry->offset, block_group->sectorsize, 2094 max_t(u64, offset, entry->offset)); 2095 search_bits = bytes_to_bits(bytes, block_group->sectorsize); 2096 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2097 2098 again: 2099 found_bits = 0; 2100 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2101 i < BITS_PER_BITMAP; 2102 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2103 next_zero = find_next_zero_bit(entry->bitmap, 2104 BITS_PER_BITMAP, i); 2105 if (next_zero - i >= search_bits) { 2106 found_bits = next_zero - i; 2107 break; 2108 } 2109 i = next_zero; 2110 } 2111 2112 if (!found_bits) 2113 return -ENOSPC; 2114 2115 if (!found) { 2116 start = i; 2117 found = true; 2118 } 2119 2120 total_found += found_bits; 2121 2122 if (cluster->max_size < found_bits * block_group->sectorsize) 2123 cluster->max_size = found_bits * block_group->sectorsize; 2124 2125 if (total_found < total_bits) { 2126 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 2127 if (i - start > total_bits * 2) { 2128 total_found = 0; 2129 cluster->max_size = 0; 2130 found = false; 2131 } 2132 goto again; 2133 } 2134 2135 cluster->window_start = start * block_group->sectorsize + 2136 entry->offset; 2137 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2138 ret = tree_insert_offset(&cluster->root, entry->offset, 2139 &entry->offset_index, 1); 2140 BUG_ON(ret); 2141 2142 return 0; 2143 } 2144 2145 /* 2146 * This searches the block group for just extents to fill the cluster with. 2147 */ 2148 static noinline int 2149 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2150 struct btrfs_free_cluster *cluster, 2151 struct list_head *bitmaps, u64 offset, u64 bytes, 2152 u64 min_bytes) 2153 { 2154 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2155 struct btrfs_free_space *first = NULL; 2156 struct btrfs_free_space *entry = NULL; 2157 struct btrfs_free_space *prev = NULL; 2158 struct btrfs_free_space *last; 2159 struct rb_node *node; 2160 u64 window_start; 2161 u64 window_free; 2162 u64 max_extent; 2163 u64 max_gap = 128 * 1024; 2164 2165 entry = tree_search_offset(ctl, offset, 0, 1); 2166 if (!entry) 2167 return -ENOSPC; 2168 2169 /* 2170 * We don't want bitmaps, so just move along until we find a normal 2171 * extent entry. 2172 */ 2173 while (entry->bitmap) { 2174 if (list_empty(&entry->list)) 2175 list_add_tail(&entry->list, bitmaps); 2176 node = rb_next(&entry->offset_index); 2177 if (!node) 2178 return -ENOSPC; 2179 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2180 } 2181 2182 window_start = entry->offset; 2183 window_free = entry->bytes; 2184 max_extent = entry->bytes; 2185 first = entry; 2186 last = entry; 2187 prev = entry; 2188 2189 while (window_free <= min_bytes) { 2190 node = rb_next(&entry->offset_index); 2191 if (!node) 2192 return -ENOSPC; 2193 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2194 2195 if (entry->bitmap) { 2196 if (list_empty(&entry->list)) 2197 list_add_tail(&entry->list, bitmaps); 2198 continue; 2199 } 2200 2201 /* 2202 * we haven't filled the empty size and the window is 2203 * very large. reset and try again 2204 */ 2205 if (entry->offset - (prev->offset + prev->bytes) > max_gap || 2206 entry->offset - window_start > (min_bytes * 2)) { 2207 first = entry; 2208 window_start = entry->offset; 2209 window_free = entry->bytes; 2210 last = entry; 2211 max_extent = entry->bytes; 2212 } else { 2213 last = entry; 2214 window_free += entry->bytes; 2215 if (entry->bytes > max_extent) 2216 max_extent = entry->bytes; 2217 } 2218 prev = entry; 2219 } 2220 2221 cluster->window_start = first->offset; 2222 2223 node = &first->offset_index; 2224 2225 /* 2226 * now we've found our entries, pull them out of the free space 2227 * cache and put them into the cluster rbtree 2228 */ 2229 do { 2230 int ret; 2231 2232 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2233 node = rb_next(&entry->offset_index); 2234 if (entry->bitmap) 2235 continue; 2236 2237 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2238 ret = tree_insert_offset(&cluster->root, entry->offset, 2239 &entry->offset_index, 0); 2240 BUG_ON(ret); 2241 } while (node && entry != last); 2242 2243 cluster->max_size = max_extent; 2244 2245 return 0; 2246 } 2247 2248 /* 2249 * This specifically looks for bitmaps that may work in the cluster, we assume 2250 * that we have already failed to find extents that will work. 2251 */ 2252 static noinline int 2253 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2254 struct btrfs_free_cluster *cluster, 2255 struct list_head *bitmaps, u64 offset, u64 bytes, 2256 u64 min_bytes) 2257 { 2258 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2259 struct btrfs_free_space *entry; 2260 struct rb_node *node; 2261 int ret = -ENOSPC; 2262 2263 if (ctl->total_bitmaps == 0) 2264 return -ENOSPC; 2265 2266 /* 2267 * First check our cached list of bitmaps and see if there is an entry 2268 * here that will work. 2269 */ 2270 list_for_each_entry(entry, bitmaps, list) { 2271 if (entry->bytes < min_bytes) 2272 continue; 2273 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2274 bytes, min_bytes); 2275 if (!ret) 2276 return 0; 2277 } 2278 2279 /* 2280 * If we do have entries on our list and we are here then we didn't find 2281 * anything, so go ahead and get the next entry after the last entry in 2282 * this list and start the search from there. 2283 */ 2284 if (!list_empty(bitmaps)) { 2285 entry = list_entry(bitmaps->prev, struct btrfs_free_space, 2286 list); 2287 node = rb_next(&entry->offset_index); 2288 if (!node) 2289 return -ENOSPC; 2290 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2291 goto search; 2292 } 2293 2294 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2295 if (!entry) 2296 return -ENOSPC; 2297 2298 search: 2299 node = &entry->offset_index; 2300 do { 2301 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2302 node = rb_next(&entry->offset_index); 2303 if (!entry->bitmap) 2304 continue; 2305 if (entry->bytes < min_bytes) 2306 continue; 2307 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2308 bytes, min_bytes); 2309 } while (ret && node); 2310 2311 return ret; 2312 } 2313 2314 /* 2315 * here we try to find a cluster of blocks in a block group. The goal 2316 * is to find at least bytes free and up to empty_size + bytes free. 2317 * We might not find them all in one contiguous area. 2318 * 2319 * returns zero and sets up cluster if things worked out, otherwise 2320 * it returns -enospc 2321 */ 2322 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2323 struct btrfs_root *root, 2324 struct btrfs_block_group_cache *block_group, 2325 struct btrfs_free_cluster *cluster, 2326 u64 offset, u64 bytes, u64 empty_size) 2327 { 2328 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2329 struct list_head bitmaps; 2330 struct btrfs_free_space *entry, *tmp; 2331 u64 min_bytes; 2332 int ret; 2333 2334 /* for metadata, allow allocates with more holes */ 2335 if (btrfs_test_opt(root, SSD_SPREAD)) { 2336 min_bytes = bytes + empty_size; 2337 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2338 /* 2339 * we want to do larger allocations when we are 2340 * flushing out the delayed refs, it helps prevent 2341 * making more work as we go along. 2342 */ 2343 if (trans->transaction->delayed_refs.flushing) 2344 min_bytes = max(bytes, (bytes + empty_size) >> 1); 2345 else 2346 min_bytes = max(bytes, (bytes + empty_size) >> 4); 2347 } else 2348 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2349 2350 spin_lock(&ctl->tree_lock); 2351 2352 /* 2353 * If we know we don't have enough space to make a cluster don't even 2354 * bother doing all the work to try and find one. 2355 */ 2356 if (ctl->free_space < min_bytes) { 2357 spin_unlock(&ctl->tree_lock); 2358 return -ENOSPC; 2359 } 2360 2361 spin_lock(&cluster->lock); 2362 2363 /* someone already found a cluster, hooray */ 2364 if (cluster->block_group) { 2365 ret = 0; 2366 goto out; 2367 } 2368 2369 INIT_LIST_HEAD(&bitmaps); 2370 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2371 bytes, min_bytes); 2372 if (ret) 2373 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2374 offset, bytes, min_bytes); 2375 2376 /* Clear our temporary list */ 2377 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2378 list_del_init(&entry->list); 2379 2380 if (!ret) { 2381 atomic_inc(&block_group->count); 2382 list_add_tail(&cluster->block_group_list, 2383 &block_group->cluster_list); 2384 cluster->block_group = block_group; 2385 } 2386 out: 2387 spin_unlock(&cluster->lock); 2388 spin_unlock(&ctl->tree_lock); 2389 2390 return ret; 2391 } 2392 2393 /* 2394 * simple code to zero out a cluster 2395 */ 2396 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2397 { 2398 spin_lock_init(&cluster->lock); 2399 spin_lock_init(&cluster->refill_lock); 2400 cluster->root = RB_ROOT; 2401 cluster->max_size = 0; 2402 INIT_LIST_HEAD(&cluster->block_group_list); 2403 cluster->block_group = NULL; 2404 } 2405 2406 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2407 u64 *trimmed, u64 start, u64 end, u64 minlen) 2408 { 2409 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2410 struct btrfs_free_space *entry = NULL; 2411 struct btrfs_fs_info *fs_info = block_group->fs_info; 2412 u64 bytes = 0; 2413 u64 actually_trimmed; 2414 int ret = 0; 2415 2416 *trimmed = 0; 2417 2418 while (start < end) { 2419 spin_lock(&ctl->tree_lock); 2420 2421 if (ctl->free_space < minlen) { 2422 spin_unlock(&ctl->tree_lock); 2423 break; 2424 } 2425 2426 entry = tree_search_offset(ctl, start, 0, 1); 2427 if (!entry) 2428 entry = tree_search_offset(ctl, 2429 offset_to_bitmap(ctl, start), 2430 1, 1); 2431 2432 if (!entry || entry->offset >= end) { 2433 spin_unlock(&ctl->tree_lock); 2434 break; 2435 } 2436 2437 if (entry->bitmap) { 2438 ret = search_bitmap(ctl, entry, &start, &bytes); 2439 if (!ret) { 2440 if (start >= end) { 2441 spin_unlock(&ctl->tree_lock); 2442 break; 2443 } 2444 bytes = min(bytes, end - start); 2445 bitmap_clear_bits(ctl, entry, start, bytes); 2446 if (entry->bytes == 0) 2447 free_bitmap(ctl, entry); 2448 } else { 2449 start = entry->offset + BITS_PER_BITMAP * 2450 block_group->sectorsize; 2451 spin_unlock(&ctl->tree_lock); 2452 ret = 0; 2453 continue; 2454 } 2455 } else { 2456 start = entry->offset; 2457 bytes = min(entry->bytes, end - start); 2458 unlink_free_space(ctl, entry); 2459 kmem_cache_free(btrfs_free_space_cachep, entry); 2460 } 2461 2462 spin_unlock(&ctl->tree_lock); 2463 2464 if (bytes >= minlen) { 2465 int update_ret; 2466 update_ret = btrfs_update_reserved_bytes(block_group, 2467 bytes, 1, 1); 2468 2469 ret = btrfs_error_discard_extent(fs_info->extent_root, 2470 start, 2471 bytes, 2472 &actually_trimmed); 2473 2474 btrfs_add_free_space(block_group, start, bytes); 2475 if (!update_ret) 2476 btrfs_update_reserved_bytes(block_group, 2477 bytes, 0, 1); 2478 2479 if (ret) 2480 break; 2481 *trimmed += actually_trimmed; 2482 } 2483 start += bytes; 2484 bytes = 0; 2485 2486 if (fatal_signal_pending(current)) { 2487 ret = -ERESTARTSYS; 2488 break; 2489 } 2490 2491 cond_resched(); 2492 } 2493 2494 return ret; 2495 } 2496 2497 /* 2498 * Find the left-most item in the cache tree, and then return the 2499 * smallest inode number in the item. 2500 * 2501 * Note: the returned inode number may not be the smallest one in 2502 * the tree, if the left-most item is a bitmap. 2503 */ 2504 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2505 { 2506 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2507 struct btrfs_free_space *entry = NULL; 2508 u64 ino = 0; 2509 2510 spin_lock(&ctl->tree_lock); 2511 2512 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2513 goto out; 2514 2515 entry = rb_entry(rb_first(&ctl->free_space_offset), 2516 struct btrfs_free_space, offset_index); 2517 2518 if (!entry->bitmap) { 2519 ino = entry->offset; 2520 2521 unlink_free_space(ctl, entry); 2522 entry->offset++; 2523 entry->bytes--; 2524 if (!entry->bytes) 2525 kmem_cache_free(btrfs_free_space_cachep, entry); 2526 else 2527 link_free_space(ctl, entry); 2528 } else { 2529 u64 offset = 0; 2530 u64 count = 1; 2531 int ret; 2532 2533 ret = search_bitmap(ctl, entry, &offset, &count); 2534 BUG_ON(ret); 2535 2536 ino = offset; 2537 bitmap_clear_bits(ctl, entry, offset, 1); 2538 if (entry->bytes == 0) 2539 free_bitmap(ctl, entry); 2540 } 2541 out: 2542 spin_unlock(&ctl->tree_lock); 2543 2544 return ino; 2545 } 2546 2547 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2548 struct btrfs_path *path) 2549 { 2550 struct inode *inode = NULL; 2551 2552 spin_lock(&root->cache_lock); 2553 if (root->cache_inode) 2554 inode = igrab(root->cache_inode); 2555 spin_unlock(&root->cache_lock); 2556 if (inode) 2557 return inode; 2558 2559 inode = __lookup_free_space_inode(root, path, 0); 2560 if (IS_ERR(inode)) 2561 return inode; 2562 2563 spin_lock(&root->cache_lock); 2564 if (!btrfs_fs_closing(root->fs_info)) 2565 root->cache_inode = igrab(inode); 2566 spin_unlock(&root->cache_lock); 2567 2568 return inode; 2569 } 2570 2571 int create_free_ino_inode(struct btrfs_root *root, 2572 struct btrfs_trans_handle *trans, 2573 struct btrfs_path *path) 2574 { 2575 return __create_free_space_inode(root, trans, path, 2576 BTRFS_FREE_INO_OBJECTID, 0); 2577 } 2578 2579 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2580 { 2581 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2582 struct btrfs_path *path; 2583 struct inode *inode; 2584 int ret = 0; 2585 u64 root_gen = btrfs_root_generation(&root->root_item); 2586 2587 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2588 return 0; 2589 2590 /* 2591 * If we're unmounting then just return, since this does a search on the 2592 * normal root and not the commit root and we could deadlock. 2593 */ 2594 if (btrfs_fs_closing(fs_info)) 2595 return 0; 2596 2597 path = btrfs_alloc_path(); 2598 if (!path) 2599 return 0; 2600 2601 inode = lookup_free_ino_inode(root, path); 2602 if (IS_ERR(inode)) 2603 goto out; 2604 2605 if (root_gen != BTRFS_I(inode)->generation) 2606 goto out_put; 2607 2608 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2609 2610 if (ret < 0) 2611 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2612 "root %llu\n", root->root_key.objectid); 2613 out_put: 2614 iput(inode); 2615 out: 2616 btrfs_free_path(path); 2617 return ret; 2618 } 2619 2620 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2621 struct btrfs_trans_handle *trans, 2622 struct btrfs_path *path) 2623 { 2624 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2625 struct inode *inode; 2626 int ret; 2627 2628 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2629 return 0; 2630 2631 inode = lookup_free_ino_inode(root, path); 2632 if (IS_ERR(inode)) 2633 return 0; 2634 2635 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2636 if (ret < 0) 2637 printk(KERN_ERR "btrfs: failed to write free ino cache " 2638 "for root %llu\n", root->root_key.objectid); 2639 2640 iput(inode); 2641 return ret; 2642 } 2643