1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include "ctree.h" 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 #include "disk-io.h" 27 #include "extent_io.h" 28 #include "inode-map.h" 29 30 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 31 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 32 33 static int link_free_space(struct btrfs_free_space_ctl *ctl, 34 struct btrfs_free_space *info); 35 36 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 37 struct btrfs_path *path, 38 u64 offset) 39 { 40 struct btrfs_key key; 41 struct btrfs_key location; 42 struct btrfs_disk_key disk_key; 43 struct btrfs_free_space_header *header; 44 struct extent_buffer *leaf; 45 struct inode *inode = NULL; 46 int ret; 47 48 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 49 key.offset = offset; 50 key.type = 0; 51 52 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 53 if (ret < 0) 54 return ERR_PTR(ret); 55 if (ret > 0) { 56 btrfs_release_path(path); 57 return ERR_PTR(-ENOENT); 58 } 59 60 leaf = path->nodes[0]; 61 header = btrfs_item_ptr(leaf, path->slots[0], 62 struct btrfs_free_space_header); 63 btrfs_free_space_key(leaf, header, &disk_key); 64 btrfs_disk_key_to_cpu(&location, &disk_key); 65 btrfs_release_path(path); 66 67 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 68 if (!inode) 69 return ERR_PTR(-ENOENT); 70 if (IS_ERR(inode)) 71 return inode; 72 if (is_bad_inode(inode)) { 73 iput(inode); 74 return ERR_PTR(-ENOENT); 75 } 76 77 inode->i_mapping->flags &= ~__GFP_FS; 78 79 return inode; 80 } 81 82 struct inode *lookup_free_space_inode(struct btrfs_root *root, 83 struct btrfs_block_group_cache 84 *block_group, struct btrfs_path *path) 85 { 86 struct inode *inode = NULL; 87 88 spin_lock(&block_group->lock); 89 if (block_group->inode) 90 inode = igrab(block_group->inode); 91 spin_unlock(&block_group->lock); 92 if (inode) 93 return inode; 94 95 inode = __lookup_free_space_inode(root, path, 96 block_group->key.objectid); 97 if (IS_ERR(inode)) 98 return inode; 99 100 spin_lock(&block_group->lock); 101 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { 102 printk(KERN_INFO "Old style space inode found, converting.\n"); 103 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; 104 block_group->disk_cache_state = BTRFS_DC_CLEAR; 105 } 106 107 if (!btrfs_fs_closing(root->fs_info)) { 108 block_group->inode = igrab(inode); 109 block_group->iref = 1; 110 } 111 spin_unlock(&block_group->lock); 112 113 return inode; 114 } 115 116 int __create_free_space_inode(struct btrfs_root *root, 117 struct btrfs_trans_handle *trans, 118 struct btrfs_path *path, u64 ino, u64 offset) 119 { 120 struct btrfs_key key; 121 struct btrfs_disk_key disk_key; 122 struct btrfs_free_space_header *header; 123 struct btrfs_inode_item *inode_item; 124 struct extent_buffer *leaf; 125 int ret; 126 127 ret = btrfs_insert_empty_inode(trans, root, path, ino); 128 if (ret) 129 return ret; 130 131 leaf = path->nodes[0]; 132 inode_item = btrfs_item_ptr(leaf, path->slots[0], 133 struct btrfs_inode_item); 134 btrfs_item_key(leaf, &disk_key, path->slots[0]); 135 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 136 sizeof(*inode_item)); 137 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 138 btrfs_set_inode_size(leaf, inode_item, 0); 139 btrfs_set_inode_nbytes(leaf, inode_item, 0); 140 btrfs_set_inode_uid(leaf, inode_item, 0); 141 btrfs_set_inode_gid(leaf, inode_item, 0); 142 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 143 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | 144 BTRFS_INODE_PREALLOC); 145 btrfs_set_inode_nlink(leaf, inode_item, 1); 146 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 147 btrfs_set_inode_block_group(leaf, inode_item, offset); 148 btrfs_mark_buffer_dirty(leaf); 149 btrfs_release_path(path); 150 151 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 152 key.offset = offset; 153 key.type = 0; 154 155 ret = btrfs_insert_empty_item(trans, root, path, &key, 156 sizeof(struct btrfs_free_space_header)); 157 if (ret < 0) { 158 btrfs_release_path(path); 159 return ret; 160 } 161 leaf = path->nodes[0]; 162 header = btrfs_item_ptr(leaf, path->slots[0], 163 struct btrfs_free_space_header); 164 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 165 btrfs_set_free_space_key(leaf, header, &disk_key); 166 btrfs_mark_buffer_dirty(leaf); 167 btrfs_release_path(path); 168 169 return 0; 170 } 171 172 int create_free_space_inode(struct btrfs_root *root, 173 struct btrfs_trans_handle *trans, 174 struct btrfs_block_group_cache *block_group, 175 struct btrfs_path *path) 176 { 177 int ret; 178 u64 ino; 179 180 ret = btrfs_find_free_objectid(root, &ino); 181 if (ret < 0) 182 return ret; 183 184 return __create_free_space_inode(root, trans, path, ino, 185 block_group->key.objectid); 186 } 187 188 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 189 struct btrfs_trans_handle *trans, 190 struct btrfs_path *path, 191 struct inode *inode) 192 { 193 struct btrfs_block_rsv *rsv; 194 loff_t oldsize; 195 int ret = 0; 196 197 rsv = trans->block_rsv; 198 trans->block_rsv = root->orphan_block_rsv; 199 ret = btrfs_block_rsv_check(trans, root, 200 root->orphan_block_rsv, 201 0, 5); 202 if (ret) 203 return ret; 204 205 oldsize = i_size_read(inode); 206 btrfs_i_size_write(inode, 0); 207 truncate_pagecache(inode, oldsize, 0); 208 209 /* 210 * We don't need an orphan item because truncating the free space cache 211 * will never be split across transactions. 212 */ 213 ret = btrfs_truncate_inode_items(trans, root, inode, 214 0, BTRFS_EXTENT_DATA_KEY); 215 216 trans->block_rsv = rsv; 217 if (ret) { 218 WARN_ON(1); 219 return ret; 220 } 221 222 ret = btrfs_update_inode(trans, root, inode); 223 return ret; 224 } 225 226 static int readahead_cache(struct inode *inode) 227 { 228 struct file_ra_state *ra; 229 unsigned long last_index; 230 231 ra = kzalloc(sizeof(*ra), GFP_NOFS); 232 if (!ra) 233 return -ENOMEM; 234 235 file_ra_state_init(ra, inode->i_mapping); 236 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 237 238 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 239 240 kfree(ra); 241 242 return 0; 243 } 244 245 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 246 struct btrfs_free_space_ctl *ctl, 247 struct btrfs_path *path, u64 offset) 248 { 249 struct btrfs_free_space_header *header; 250 struct extent_buffer *leaf; 251 struct page *page; 252 struct btrfs_key key; 253 struct list_head bitmaps; 254 u64 num_entries; 255 u64 num_bitmaps; 256 u64 generation; 257 pgoff_t index = 0; 258 int ret = 0; 259 260 INIT_LIST_HEAD(&bitmaps); 261 262 /* Nothing in the space cache, goodbye */ 263 if (!i_size_read(inode)) 264 goto out; 265 266 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 267 key.offset = offset; 268 key.type = 0; 269 270 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 271 if (ret < 0) 272 goto out; 273 else if (ret > 0) { 274 btrfs_release_path(path); 275 ret = 0; 276 goto out; 277 } 278 279 ret = -1; 280 281 leaf = path->nodes[0]; 282 header = btrfs_item_ptr(leaf, path->slots[0], 283 struct btrfs_free_space_header); 284 num_entries = btrfs_free_space_entries(leaf, header); 285 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 286 generation = btrfs_free_space_generation(leaf, header); 287 btrfs_release_path(path); 288 289 if (BTRFS_I(inode)->generation != generation) { 290 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 291 " not match free space cache generation (%llu)\n", 292 (unsigned long long)BTRFS_I(inode)->generation, 293 (unsigned long long)generation); 294 goto out; 295 } 296 297 if (!num_entries) 298 goto out; 299 300 ret = readahead_cache(inode); 301 if (ret) 302 goto out; 303 304 while (1) { 305 struct btrfs_free_space_entry *entry; 306 struct btrfs_free_space *e; 307 void *addr; 308 unsigned long offset = 0; 309 int need_loop = 0; 310 311 if (!num_entries && !num_bitmaps) 312 break; 313 314 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 315 if (!page) 316 goto free_cache; 317 318 if (!PageUptodate(page)) { 319 btrfs_readpage(NULL, page); 320 lock_page(page); 321 if (!PageUptodate(page)) { 322 unlock_page(page); 323 page_cache_release(page); 324 printk(KERN_ERR "btrfs: error reading free " 325 "space cache\n"); 326 goto free_cache; 327 } 328 } 329 addr = kmap(page); 330 331 if (index == 0) { 332 u64 *gen; 333 334 /* 335 * We put a bogus crc in the front of the first page in 336 * case old kernels try to mount a fs with the new 337 * format to make sure they discard the cache. 338 */ 339 addr += sizeof(u64); 340 offset += sizeof(u64); 341 342 gen = addr; 343 if (*gen != BTRFS_I(inode)->generation) { 344 printk(KERN_ERR "btrfs: space cache generation" 345 " (%llu) does not match inode (%llu)\n", 346 (unsigned long long)*gen, 347 (unsigned long long) 348 BTRFS_I(inode)->generation); 349 kunmap(page); 350 unlock_page(page); 351 page_cache_release(page); 352 goto free_cache; 353 } 354 addr += sizeof(u64); 355 offset += sizeof(u64); 356 } 357 entry = addr; 358 359 while (1) { 360 if (!num_entries) 361 break; 362 363 need_loop = 1; 364 e = kmem_cache_zalloc(btrfs_free_space_cachep, 365 GFP_NOFS); 366 if (!e) { 367 kunmap(page); 368 unlock_page(page); 369 page_cache_release(page); 370 goto free_cache; 371 } 372 373 e->offset = le64_to_cpu(entry->offset); 374 e->bytes = le64_to_cpu(entry->bytes); 375 if (!e->bytes) { 376 kunmap(page); 377 kmem_cache_free(btrfs_free_space_cachep, e); 378 unlock_page(page); 379 page_cache_release(page); 380 goto free_cache; 381 } 382 383 if (entry->type == BTRFS_FREE_SPACE_EXTENT) { 384 spin_lock(&ctl->tree_lock); 385 ret = link_free_space(ctl, e); 386 spin_unlock(&ctl->tree_lock); 387 if (ret) { 388 printk(KERN_ERR "Duplicate entries in " 389 "free space cache, dumping\n"); 390 kunmap(page); 391 unlock_page(page); 392 page_cache_release(page); 393 goto free_cache; 394 } 395 } else { 396 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 397 if (!e->bitmap) { 398 kunmap(page); 399 kmem_cache_free( 400 btrfs_free_space_cachep, e); 401 unlock_page(page); 402 page_cache_release(page); 403 goto free_cache; 404 } 405 spin_lock(&ctl->tree_lock); 406 ret = link_free_space(ctl, e); 407 ctl->total_bitmaps++; 408 ctl->op->recalc_thresholds(ctl); 409 spin_unlock(&ctl->tree_lock); 410 if (ret) { 411 printk(KERN_ERR "Duplicate entries in " 412 "free space cache, dumping\n"); 413 kunmap(page); 414 unlock_page(page); 415 page_cache_release(page); 416 goto free_cache; 417 } 418 list_add_tail(&e->list, &bitmaps); 419 } 420 421 num_entries--; 422 offset += sizeof(struct btrfs_free_space_entry); 423 if (offset + sizeof(struct btrfs_free_space_entry) >= 424 PAGE_CACHE_SIZE) 425 break; 426 entry++; 427 } 428 429 /* 430 * We read an entry out of this page, we need to move on to the 431 * next page. 432 */ 433 if (need_loop) { 434 kunmap(page); 435 goto next; 436 } 437 438 /* 439 * We add the bitmaps at the end of the entries in order that 440 * the bitmap entries are added to the cache. 441 */ 442 e = list_entry(bitmaps.next, struct btrfs_free_space, list); 443 list_del_init(&e->list); 444 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); 445 kunmap(page); 446 num_bitmaps--; 447 next: 448 unlock_page(page); 449 page_cache_release(page); 450 index++; 451 } 452 453 ret = 1; 454 out: 455 return ret; 456 free_cache: 457 __btrfs_remove_free_space_cache(ctl); 458 goto out; 459 } 460 461 int load_free_space_cache(struct btrfs_fs_info *fs_info, 462 struct btrfs_block_group_cache *block_group) 463 { 464 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 465 struct btrfs_root *root = fs_info->tree_root; 466 struct inode *inode; 467 struct btrfs_path *path; 468 int ret; 469 bool matched; 470 u64 used = btrfs_block_group_used(&block_group->item); 471 472 /* 473 * If we're unmounting then just return, since this does a search on the 474 * normal root and not the commit root and we could deadlock. 475 */ 476 if (btrfs_fs_closing(fs_info)) 477 return 0; 478 479 /* 480 * If this block group has been marked to be cleared for one reason or 481 * another then we can't trust the on disk cache, so just return. 482 */ 483 spin_lock(&block_group->lock); 484 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 485 spin_unlock(&block_group->lock); 486 return 0; 487 } 488 spin_unlock(&block_group->lock); 489 490 path = btrfs_alloc_path(); 491 if (!path) 492 return 0; 493 494 inode = lookup_free_space_inode(root, block_group, path); 495 if (IS_ERR(inode)) { 496 btrfs_free_path(path); 497 return 0; 498 } 499 500 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 501 path, block_group->key.objectid); 502 btrfs_free_path(path); 503 if (ret <= 0) 504 goto out; 505 506 spin_lock(&ctl->tree_lock); 507 matched = (ctl->free_space == (block_group->key.offset - used - 508 block_group->bytes_super)); 509 spin_unlock(&ctl->tree_lock); 510 511 if (!matched) { 512 __btrfs_remove_free_space_cache(ctl); 513 printk(KERN_ERR "block group %llu has an wrong amount of free " 514 "space\n", block_group->key.objectid); 515 ret = -1; 516 } 517 out: 518 if (ret < 0) { 519 /* This cache is bogus, make sure it gets cleared */ 520 spin_lock(&block_group->lock); 521 block_group->disk_cache_state = BTRFS_DC_CLEAR; 522 spin_unlock(&block_group->lock); 523 ret = 0; 524 525 printk(KERN_ERR "btrfs: failed to load free space cache " 526 "for block group %llu\n", block_group->key.objectid); 527 } 528 529 iput(inode); 530 return ret; 531 } 532 533 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 534 struct btrfs_free_space_ctl *ctl, 535 struct btrfs_block_group_cache *block_group, 536 struct btrfs_trans_handle *trans, 537 struct btrfs_path *path, u64 offset) 538 { 539 struct btrfs_free_space_header *header; 540 struct extent_buffer *leaf; 541 struct rb_node *node; 542 struct list_head *pos, *n; 543 struct page **pages; 544 struct page *page; 545 struct extent_state *cached_state = NULL; 546 struct btrfs_free_cluster *cluster = NULL; 547 struct extent_io_tree *unpin = NULL; 548 struct list_head bitmap_list; 549 struct btrfs_key key; 550 u64 start, end, len; 551 u64 bytes = 0; 552 u32 crc = ~(u32)0; 553 int index = 0, num_pages = 0; 554 int entries = 0; 555 int bitmaps = 0; 556 int ret = -1; 557 bool next_page = false; 558 bool out_of_space = false; 559 560 INIT_LIST_HEAD(&bitmap_list); 561 562 node = rb_first(&ctl->free_space_offset); 563 if (!node) 564 return 0; 565 566 if (!i_size_read(inode)) 567 return -1; 568 569 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 570 PAGE_CACHE_SHIFT; 571 572 filemap_write_and_wait(inode->i_mapping); 573 btrfs_wait_ordered_range(inode, inode->i_size & 574 ~(root->sectorsize - 1), (u64)-1); 575 576 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 577 if (!pages) 578 return -1; 579 580 /* Get the cluster for this block_group if it exists */ 581 if (block_group && !list_empty(&block_group->cluster_list)) 582 cluster = list_entry(block_group->cluster_list.next, 583 struct btrfs_free_cluster, 584 block_group_list); 585 586 /* 587 * We shouldn't have switched the pinned extents yet so this is the 588 * right one 589 */ 590 unpin = root->fs_info->pinned_extents; 591 592 /* 593 * Lock all pages first so we can lock the extent safely. 594 * 595 * NOTE: Because we hold the ref the entire time we're going to write to 596 * the page find_get_page should never fail, so we don't do a check 597 * after find_get_page at this point. Just putting this here so people 598 * know and don't freak out. 599 */ 600 while (index < num_pages) { 601 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 602 if (!page) { 603 int i; 604 605 for (i = 0; i < num_pages; i++) { 606 unlock_page(pages[i]); 607 page_cache_release(pages[i]); 608 } 609 goto out; 610 } 611 pages[index] = page; 612 index++; 613 } 614 615 index = 0; 616 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 0, &cached_state, GFP_NOFS); 618 619 /* 620 * When searching for pinned extents, we need to start at our start 621 * offset. 622 */ 623 if (block_group) 624 start = block_group->key.objectid; 625 626 /* Write out the extent entries */ 627 do { 628 struct btrfs_free_space_entry *entry; 629 void *addr, *orig; 630 unsigned long offset = 0; 631 632 next_page = false; 633 634 if (index >= num_pages) { 635 out_of_space = true; 636 break; 637 } 638 639 page = pages[index]; 640 641 orig = addr = kmap(page); 642 if (index == 0) { 643 u64 *gen; 644 645 /* 646 * We're going to put in a bogus crc for this page to 647 * make sure that old kernels who aren't aware of this 648 * format will be sure to discard the cache. 649 */ 650 addr += sizeof(u64); 651 offset += sizeof(u64); 652 653 gen = addr; 654 *gen = trans->transid; 655 addr += sizeof(u64); 656 offset += sizeof(u64); 657 } 658 entry = addr; 659 660 memset(addr, 0, PAGE_CACHE_SIZE - offset); 661 while (node && !next_page) { 662 struct btrfs_free_space *e; 663 664 e = rb_entry(node, struct btrfs_free_space, offset_index); 665 entries++; 666 667 entry->offset = cpu_to_le64(e->offset); 668 entry->bytes = cpu_to_le64(e->bytes); 669 if (e->bitmap) { 670 entry->type = BTRFS_FREE_SPACE_BITMAP; 671 list_add_tail(&e->list, &bitmap_list); 672 bitmaps++; 673 } else { 674 entry->type = BTRFS_FREE_SPACE_EXTENT; 675 } 676 node = rb_next(node); 677 if (!node && cluster) { 678 node = rb_first(&cluster->root); 679 cluster = NULL; 680 } 681 offset += sizeof(struct btrfs_free_space_entry); 682 if (offset + sizeof(struct btrfs_free_space_entry) >= 683 PAGE_CACHE_SIZE) 684 next_page = true; 685 entry++; 686 } 687 688 /* 689 * We want to add any pinned extents to our free space cache 690 * so we don't leak the space 691 */ 692 while (block_group && !next_page && 693 (start < block_group->key.objectid + 694 block_group->key.offset)) { 695 ret = find_first_extent_bit(unpin, start, &start, &end, 696 EXTENT_DIRTY); 697 if (ret) { 698 ret = 0; 699 break; 700 } 701 702 /* This pinned extent is out of our range */ 703 if (start >= block_group->key.objectid + 704 block_group->key.offset) 705 break; 706 707 len = block_group->key.objectid + 708 block_group->key.offset - start; 709 len = min(len, end + 1 - start); 710 711 entries++; 712 entry->offset = cpu_to_le64(start); 713 entry->bytes = cpu_to_le64(len); 714 entry->type = BTRFS_FREE_SPACE_EXTENT; 715 716 start = end + 1; 717 offset += sizeof(struct btrfs_free_space_entry); 718 if (offset + sizeof(struct btrfs_free_space_entry) >= 719 PAGE_CACHE_SIZE) 720 next_page = true; 721 entry++; 722 } 723 724 /* Generate bogus crc value */ 725 if (index == 0) { 726 u32 *tmp; 727 crc = btrfs_csum_data(root, orig + sizeof(u64), crc, 728 PAGE_CACHE_SIZE - sizeof(u64)); 729 btrfs_csum_final(crc, (char *)&crc); 730 crc++; 731 tmp = orig; 732 *tmp = crc; 733 } 734 735 kunmap(page); 736 737 bytes += PAGE_CACHE_SIZE; 738 739 index++; 740 } while (node || next_page); 741 742 /* Write out the bitmaps */ 743 list_for_each_safe(pos, n, &bitmap_list) { 744 void *addr; 745 struct btrfs_free_space *entry = 746 list_entry(pos, struct btrfs_free_space, list); 747 748 if (index >= num_pages) { 749 out_of_space = true; 750 break; 751 } 752 page = pages[index]; 753 754 addr = kmap(page); 755 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 756 kunmap(page); 757 bytes += PAGE_CACHE_SIZE; 758 759 list_del_init(&entry->list); 760 index++; 761 } 762 763 if (out_of_space) { 764 btrfs_drop_pages(pages, num_pages); 765 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 766 i_size_read(inode) - 1, &cached_state, 767 GFP_NOFS); 768 ret = 0; 769 goto out; 770 } 771 772 /* Zero out the rest of the pages just to make sure */ 773 while (index < num_pages) { 774 void *addr; 775 776 page = pages[index]; 777 addr = kmap(page); 778 memset(addr, 0, PAGE_CACHE_SIZE); 779 kunmap(page); 780 bytes += PAGE_CACHE_SIZE; 781 index++; 782 } 783 784 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, 785 bytes, &cached_state); 786 btrfs_drop_pages(pages, num_pages); 787 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 790 if (ret) { 791 ret = 0; 792 goto out; 793 } 794 795 BTRFS_I(inode)->generation = trans->transid; 796 797 filemap_write_and_wait(inode->i_mapping); 798 799 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.offset = offset; 801 key.type = 0; 802 803 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 804 if (ret < 0) { 805 ret = -1; 806 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 807 EXTENT_DIRTY | EXTENT_DELALLOC | 808 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); 809 goto out; 810 } 811 leaf = path->nodes[0]; 812 if (ret > 0) { 813 struct btrfs_key found_key; 814 BUG_ON(!path->slots[0]); 815 path->slots[0]--; 816 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 817 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 818 found_key.offset != offset) { 819 ret = -1; 820 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, 821 EXTENT_DIRTY | EXTENT_DELALLOC | 822 EXTENT_DO_ACCOUNTING, 0, 0, NULL, 823 GFP_NOFS); 824 btrfs_release_path(path); 825 goto out; 826 } 827 } 828 header = btrfs_item_ptr(leaf, path->slots[0], 829 struct btrfs_free_space_header); 830 btrfs_set_free_space_entries(leaf, header, entries); 831 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 832 btrfs_set_free_space_generation(leaf, header, trans->transid); 833 btrfs_mark_buffer_dirty(leaf); 834 btrfs_release_path(path); 835 836 ret = 1; 837 838 out: 839 kfree(pages); 840 if (ret != 1) { 841 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 842 BTRFS_I(inode)->generation = 0; 843 } 844 btrfs_update_inode(trans, root, inode); 845 return ret; 846 } 847 848 int btrfs_write_out_cache(struct btrfs_root *root, 849 struct btrfs_trans_handle *trans, 850 struct btrfs_block_group_cache *block_group, 851 struct btrfs_path *path) 852 { 853 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 854 struct inode *inode; 855 int ret = 0; 856 857 root = root->fs_info->tree_root; 858 859 spin_lock(&block_group->lock); 860 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 861 spin_unlock(&block_group->lock); 862 return 0; 863 } 864 spin_unlock(&block_group->lock); 865 866 inode = lookup_free_space_inode(root, block_group, path); 867 if (IS_ERR(inode)) 868 return 0; 869 870 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 871 path, block_group->key.objectid); 872 if (ret < 0) { 873 spin_lock(&block_group->lock); 874 block_group->disk_cache_state = BTRFS_DC_ERROR; 875 spin_unlock(&block_group->lock); 876 ret = 0; 877 878 printk(KERN_ERR "btrfs: failed to write free space cace " 879 "for block group %llu\n", block_group->key.objectid); 880 } 881 882 iput(inode); 883 return ret; 884 } 885 886 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 887 u64 offset) 888 { 889 BUG_ON(offset < bitmap_start); 890 offset -= bitmap_start; 891 return (unsigned long)(div_u64(offset, unit)); 892 } 893 894 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 895 { 896 return (unsigned long)(div_u64(bytes, unit)); 897 } 898 899 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 900 u64 offset) 901 { 902 u64 bitmap_start; 903 u64 bytes_per_bitmap; 904 905 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 906 bitmap_start = offset - ctl->start; 907 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 908 bitmap_start *= bytes_per_bitmap; 909 bitmap_start += ctl->start; 910 911 return bitmap_start; 912 } 913 914 static int tree_insert_offset(struct rb_root *root, u64 offset, 915 struct rb_node *node, int bitmap) 916 { 917 struct rb_node **p = &root->rb_node; 918 struct rb_node *parent = NULL; 919 struct btrfs_free_space *info; 920 921 while (*p) { 922 parent = *p; 923 info = rb_entry(parent, struct btrfs_free_space, offset_index); 924 925 if (offset < info->offset) { 926 p = &(*p)->rb_left; 927 } else if (offset > info->offset) { 928 p = &(*p)->rb_right; 929 } else { 930 /* 931 * we could have a bitmap entry and an extent entry 932 * share the same offset. If this is the case, we want 933 * the extent entry to always be found first if we do a 934 * linear search through the tree, since we want to have 935 * the quickest allocation time, and allocating from an 936 * extent is faster than allocating from a bitmap. So 937 * if we're inserting a bitmap and we find an entry at 938 * this offset, we want to go right, or after this entry 939 * logically. If we are inserting an extent and we've 940 * found a bitmap, we want to go left, or before 941 * logically. 942 */ 943 if (bitmap) { 944 if (info->bitmap) { 945 WARN_ON_ONCE(1); 946 return -EEXIST; 947 } 948 p = &(*p)->rb_right; 949 } else { 950 if (!info->bitmap) { 951 WARN_ON_ONCE(1); 952 return -EEXIST; 953 } 954 p = &(*p)->rb_left; 955 } 956 } 957 } 958 959 rb_link_node(node, parent, p); 960 rb_insert_color(node, root); 961 962 return 0; 963 } 964 965 /* 966 * searches the tree for the given offset. 967 * 968 * fuzzy - If this is set, then we are trying to make an allocation, and we just 969 * want a section that has at least bytes size and comes at or after the given 970 * offset. 971 */ 972 static struct btrfs_free_space * 973 tree_search_offset(struct btrfs_free_space_ctl *ctl, 974 u64 offset, int bitmap_only, int fuzzy) 975 { 976 struct rb_node *n = ctl->free_space_offset.rb_node; 977 struct btrfs_free_space *entry, *prev = NULL; 978 979 /* find entry that is closest to the 'offset' */ 980 while (1) { 981 if (!n) { 982 entry = NULL; 983 break; 984 } 985 986 entry = rb_entry(n, struct btrfs_free_space, offset_index); 987 prev = entry; 988 989 if (offset < entry->offset) 990 n = n->rb_left; 991 else if (offset > entry->offset) 992 n = n->rb_right; 993 else 994 break; 995 } 996 997 if (bitmap_only) { 998 if (!entry) 999 return NULL; 1000 if (entry->bitmap) 1001 return entry; 1002 1003 /* 1004 * bitmap entry and extent entry may share same offset, 1005 * in that case, bitmap entry comes after extent entry. 1006 */ 1007 n = rb_next(n); 1008 if (!n) 1009 return NULL; 1010 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1011 if (entry->offset != offset) 1012 return NULL; 1013 1014 WARN_ON(!entry->bitmap); 1015 return entry; 1016 } else if (entry) { 1017 if (entry->bitmap) { 1018 /* 1019 * if previous extent entry covers the offset, 1020 * we should return it instead of the bitmap entry 1021 */ 1022 n = &entry->offset_index; 1023 while (1) { 1024 n = rb_prev(n); 1025 if (!n) 1026 break; 1027 prev = rb_entry(n, struct btrfs_free_space, 1028 offset_index); 1029 if (!prev->bitmap) { 1030 if (prev->offset + prev->bytes > offset) 1031 entry = prev; 1032 break; 1033 } 1034 } 1035 } 1036 return entry; 1037 } 1038 1039 if (!prev) 1040 return NULL; 1041 1042 /* find last entry before the 'offset' */ 1043 entry = prev; 1044 if (entry->offset > offset) { 1045 n = rb_prev(&entry->offset_index); 1046 if (n) { 1047 entry = rb_entry(n, struct btrfs_free_space, 1048 offset_index); 1049 BUG_ON(entry->offset > offset); 1050 } else { 1051 if (fuzzy) 1052 return entry; 1053 else 1054 return NULL; 1055 } 1056 } 1057 1058 if (entry->bitmap) { 1059 n = &entry->offset_index; 1060 while (1) { 1061 n = rb_prev(n); 1062 if (!n) 1063 break; 1064 prev = rb_entry(n, struct btrfs_free_space, 1065 offset_index); 1066 if (!prev->bitmap) { 1067 if (prev->offset + prev->bytes > offset) 1068 return prev; 1069 break; 1070 } 1071 } 1072 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1073 return entry; 1074 } else if (entry->offset + entry->bytes > offset) 1075 return entry; 1076 1077 if (!fuzzy) 1078 return NULL; 1079 1080 while (1) { 1081 if (entry->bitmap) { 1082 if (entry->offset + BITS_PER_BITMAP * 1083 ctl->unit > offset) 1084 break; 1085 } else { 1086 if (entry->offset + entry->bytes > offset) 1087 break; 1088 } 1089 1090 n = rb_next(&entry->offset_index); 1091 if (!n) 1092 return NULL; 1093 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1094 } 1095 return entry; 1096 } 1097 1098 static inline void 1099 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1100 struct btrfs_free_space *info) 1101 { 1102 rb_erase(&info->offset_index, &ctl->free_space_offset); 1103 ctl->free_extents--; 1104 } 1105 1106 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1107 struct btrfs_free_space *info) 1108 { 1109 __unlink_free_space(ctl, info); 1110 ctl->free_space -= info->bytes; 1111 } 1112 1113 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1114 struct btrfs_free_space *info) 1115 { 1116 int ret = 0; 1117 1118 BUG_ON(!info->bitmap && !info->bytes); 1119 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1120 &info->offset_index, (info->bitmap != NULL)); 1121 if (ret) 1122 return ret; 1123 1124 ctl->free_space += info->bytes; 1125 ctl->free_extents++; 1126 return ret; 1127 } 1128 1129 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1130 { 1131 struct btrfs_block_group_cache *block_group = ctl->private; 1132 u64 max_bytes; 1133 u64 bitmap_bytes; 1134 u64 extent_bytes; 1135 u64 size = block_group->key.offset; 1136 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1137 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1138 1139 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1140 1141 /* 1142 * The goal is to keep the total amount of memory used per 1gb of space 1143 * at or below 32k, so we need to adjust how much memory we allow to be 1144 * used by extent based free space tracking 1145 */ 1146 if (size < 1024 * 1024 * 1024) 1147 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1148 else 1149 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1150 div64_u64(size, 1024 * 1024 * 1024); 1151 1152 /* 1153 * we want to account for 1 more bitmap than what we have so we can make 1154 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1155 * we add more bitmaps. 1156 */ 1157 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1158 1159 if (bitmap_bytes >= max_bytes) { 1160 ctl->extents_thresh = 0; 1161 return; 1162 } 1163 1164 /* 1165 * we want the extent entry threshold to always be at most 1/2 the maxw 1166 * bytes we can have, or whatever is less than that. 1167 */ 1168 extent_bytes = max_bytes - bitmap_bytes; 1169 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1170 1171 ctl->extents_thresh = 1172 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1173 } 1174 1175 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1176 struct btrfs_free_space *info, 1177 u64 offset, u64 bytes) 1178 { 1179 unsigned long start, count; 1180 1181 start = offset_to_bit(info->offset, ctl->unit, offset); 1182 count = bytes_to_bits(bytes, ctl->unit); 1183 BUG_ON(start + count > BITS_PER_BITMAP); 1184 1185 bitmap_clear(info->bitmap, start, count); 1186 1187 info->bytes -= bytes; 1188 } 1189 1190 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1191 struct btrfs_free_space *info, u64 offset, 1192 u64 bytes) 1193 { 1194 __bitmap_clear_bits(ctl, info, offset, bytes); 1195 ctl->free_space -= bytes; 1196 } 1197 1198 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1199 struct btrfs_free_space *info, u64 offset, 1200 u64 bytes) 1201 { 1202 unsigned long start, count; 1203 1204 start = offset_to_bit(info->offset, ctl->unit, offset); 1205 count = bytes_to_bits(bytes, ctl->unit); 1206 BUG_ON(start + count > BITS_PER_BITMAP); 1207 1208 bitmap_set(info->bitmap, start, count); 1209 1210 info->bytes += bytes; 1211 ctl->free_space += bytes; 1212 } 1213 1214 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1215 struct btrfs_free_space *bitmap_info, u64 *offset, 1216 u64 *bytes) 1217 { 1218 unsigned long found_bits = 0; 1219 unsigned long bits, i; 1220 unsigned long next_zero; 1221 1222 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1223 max_t(u64, *offset, bitmap_info->offset)); 1224 bits = bytes_to_bits(*bytes, ctl->unit); 1225 1226 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1227 i < BITS_PER_BITMAP; 1228 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1229 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1230 BITS_PER_BITMAP, i); 1231 if ((next_zero - i) >= bits) { 1232 found_bits = next_zero - i; 1233 break; 1234 } 1235 i = next_zero; 1236 } 1237 1238 if (found_bits) { 1239 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1240 *bytes = (u64)(found_bits) * ctl->unit; 1241 return 0; 1242 } 1243 1244 return -1; 1245 } 1246 1247 static struct btrfs_free_space * 1248 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1249 { 1250 struct btrfs_free_space *entry; 1251 struct rb_node *node; 1252 int ret; 1253 1254 if (!ctl->free_space_offset.rb_node) 1255 return NULL; 1256 1257 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1258 if (!entry) 1259 return NULL; 1260 1261 for (node = &entry->offset_index; node; node = rb_next(node)) { 1262 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1263 if (entry->bytes < *bytes) 1264 continue; 1265 1266 if (entry->bitmap) { 1267 ret = search_bitmap(ctl, entry, offset, bytes); 1268 if (!ret) 1269 return entry; 1270 continue; 1271 } 1272 1273 *offset = entry->offset; 1274 *bytes = entry->bytes; 1275 return entry; 1276 } 1277 1278 return NULL; 1279 } 1280 1281 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1282 struct btrfs_free_space *info, u64 offset) 1283 { 1284 info->offset = offset_to_bitmap(ctl, offset); 1285 info->bytes = 0; 1286 link_free_space(ctl, info); 1287 ctl->total_bitmaps++; 1288 1289 ctl->op->recalc_thresholds(ctl); 1290 } 1291 1292 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1293 struct btrfs_free_space *bitmap_info) 1294 { 1295 unlink_free_space(ctl, bitmap_info); 1296 kfree(bitmap_info->bitmap); 1297 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1298 ctl->total_bitmaps--; 1299 ctl->op->recalc_thresholds(ctl); 1300 } 1301 1302 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1303 struct btrfs_free_space *bitmap_info, 1304 u64 *offset, u64 *bytes) 1305 { 1306 u64 end; 1307 u64 search_start, search_bytes; 1308 int ret; 1309 1310 again: 1311 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1312 1313 /* 1314 * XXX - this can go away after a few releases. 1315 * 1316 * since the only user of btrfs_remove_free_space is the tree logging 1317 * stuff, and the only way to test that is under crash conditions, we 1318 * want to have this debug stuff here just in case somethings not 1319 * working. Search the bitmap for the space we are trying to use to 1320 * make sure its actually there. If its not there then we need to stop 1321 * because something has gone wrong. 1322 */ 1323 search_start = *offset; 1324 search_bytes = *bytes; 1325 search_bytes = min(search_bytes, end - search_start + 1); 1326 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1327 BUG_ON(ret < 0 || search_start != *offset); 1328 1329 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1330 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1331 *bytes -= end - *offset + 1; 1332 *offset = end + 1; 1333 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1334 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1335 *bytes = 0; 1336 } 1337 1338 if (*bytes) { 1339 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1340 if (!bitmap_info->bytes) 1341 free_bitmap(ctl, bitmap_info); 1342 1343 /* 1344 * no entry after this bitmap, but we still have bytes to 1345 * remove, so something has gone wrong. 1346 */ 1347 if (!next) 1348 return -EINVAL; 1349 1350 bitmap_info = rb_entry(next, struct btrfs_free_space, 1351 offset_index); 1352 1353 /* 1354 * if the next entry isn't a bitmap we need to return to let the 1355 * extent stuff do its work. 1356 */ 1357 if (!bitmap_info->bitmap) 1358 return -EAGAIN; 1359 1360 /* 1361 * Ok the next item is a bitmap, but it may not actually hold 1362 * the information for the rest of this free space stuff, so 1363 * look for it, and if we don't find it return so we can try 1364 * everything over again. 1365 */ 1366 search_start = *offset; 1367 search_bytes = *bytes; 1368 ret = search_bitmap(ctl, bitmap_info, &search_start, 1369 &search_bytes); 1370 if (ret < 0 || search_start != *offset) 1371 return -EAGAIN; 1372 1373 goto again; 1374 } else if (!bitmap_info->bytes) 1375 free_bitmap(ctl, bitmap_info); 1376 1377 return 0; 1378 } 1379 1380 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1381 struct btrfs_free_space *info, u64 offset, 1382 u64 bytes) 1383 { 1384 u64 bytes_to_set = 0; 1385 u64 end; 1386 1387 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1388 1389 bytes_to_set = min(end - offset, bytes); 1390 1391 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1392 1393 return bytes_to_set; 1394 1395 } 1396 1397 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1398 struct btrfs_free_space *info) 1399 { 1400 struct btrfs_block_group_cache *block_group = ctl->private; 1401 1402 /* 1403 * If we are below the extents threshold then we can add this as an 1404 * extent, and don't have to deal with the bitmap 1405 */ 1406 if (ctl->free_extents < ctl->extents_thresh) { 1407 /* 1408 * If this block group has some small extents we don't want to 1409 * use up all of our free slots in the cache with them, we want 1410 * to reserve them to larger extents, however if we have plent 1411 * of cache left then go ahead an dadd them, no sense in adding 1412 * the overhead of a bitmap if we don't have to. 1413 */ 1414 if (info->bytes <= block_group->sectorsize * 4) { 1415 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1416 return false; 1417 } else { 1418 return false; 1419 } 1420 } 1421 1422 /* 1423 * some block groups are so tiny they can't be enveloped by a bitmap, so 1424 * don't even bother to create a bitmap for this 1425 */ 1426 if (BITS_PER_BITMAP * block_group->sectorsize > 1427 block_group->key.offset) 1428 return false; 1429 1430 return true; 1431 } 1432 1433 static struct btrfs_free_space_op free_space_op = { 1434 .recalc_thresholds = recalculate_thresholds, 1435 .use_bitmap = use_bitmap, 1436 }; 1437 1438 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1439 struct btrfs_free_space *info) 1440 { 1441 struct btrfs_free_space *bitmap_info; 1442 struct btrfs_block_group_cache *block_group = NULL; 1443 int added = 0; 1444 u64 bytes, offset, bytes_added; 1445 int ret; 1446 1447 bytes = info->bytes; 1448 offset = info->offset; 1449 1450 if (!ctl->op->use_bitmap(ctl, info)) 1451 return 0; 1452 1453 if (ctl->op == &free_space_op) 1454 block_group = ctl->private; 1455 again: 1456 /* 1457 * Since we link bitmaps right into the cluster we need to see if we 1458 * have a cluster here, and if so and it has our bitmap we need to add 1459 * the free space to that bitmap. 1460 */ 1461 if (block_group && !list_empty(&block_group->cluster_list)) { 1462 struct btrfs_free_cluster *cluster; 1463 struct rb_node *node; 1464 struct btrfs_free_space *entry; 1465 1466 cluster = list_entry(block_group->cluster_list.next, 1467 struct btrfs_free_cluster, 1468 block_group_list); 1469 spin_lock(&cluster->lock); 1470 node = rb_first(&cluster->root); 1471 if (!node) { 1472 spin_unlock(&cluster->lock); 1473 goto no_cluster_bitmap; 1474 } 1475 1476 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1477 if (!entry->bitmap) { 1478 spin_unlock(&cluster->lock); 1479 goto no_cluster_bitmap; 1480 } 1481 1482 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1483 bytes_added = add_bytes_to_bitmap(ctl, entry, 1484 offset, bytes); 1485 bytes -= bytes_added; 1486 offset += bytes_added; 1487 } 1488 spin_unlock(&cluster->lock); 1489 if (!bytes) { 1490 ret = 1; 1491 goto out; 1492 } 1493 } 1494 1495 no_cluster_bitmap: 1496 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1497 1, 0); 1498 if (!bitmap_info) { 1499 BUG_ON(added); 1500 goto new_bitmap; 1501 } 1502 1503 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1504 bytes -= bytes_added; 1505 offset += bytes_added; 1506 added = 0; 1507 1508 if (!bytes) { 1509 ret = 1; 1510 goto out; 1511 } else 1512 goto again; 1513 1514 new_bitmap: 1515 if (info && info->bitmap) { 1516 add_new_bitmap(ctl, info, offset); 1517 added = 1; 1518 info = NULL; 1519 goto again; 1520 } else { 1521 spin_unlock(&ctl->tree_lock); 1522 1523 /* no pre-allocated info, allocate a new one */ 1524 if (!info) { 1525 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1526 GFP_NOFS); 1527 if (!info) { 1528 spin_lock(&ctl->tree_lock); 1529 ret = -ENOMEM; 1530 goto out; 1531 } 1532 } 1533 1534 /* allocate the bitmap */ 1535 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1536 spin_lock(&ctl->tree_lock); 1537 if (!info->bitmap) { 1538 ret = -ENOMEM; 1539 goto out; 1540 } 1541 goto again; 1542 } 1543 1544 out: 1545 if (info) { 1546 if (info->bitmap) 1547 kfree(info->bitmap); 1548 kmem_cache_free(btrfs_free_space_cachep, info); 1549 } 1550 1551 return ret; 1552 } 1553 1554 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1555 struct btrfs_free_space *info, bool update_stat) 1556 { 1557 struct btrfs_free_space *left_info; 1558 struct btrfs_free_space *right_info; 1559 bool merged = false; 1560 u64 offset = info->offset; 1561 u64 bytes = info->bytes; 1562 1563 /* 1564 * first we want to see if there is free space adjacent to the range we 1565 * are adding, if there is remove that struct and add a new one to 1566 * cover the entire range 1567 */ 1568 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1569 if (right_info && rb_prev(&right_info->offset_index)) 1570 left_info = rb_entry(rb_prev(&right_info->offset_index), 1571 struct btrfs_free_space, offset_index); 1572 else 1573 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1574 1575 if (right_info && !right_info->bitmap) { 1576 if (update_stat) 1577 unlink_free_space(ctl, right_info); 1578 else 1579 __unlink_free_space(ctl, right_info); 1580 info->bytes += right_info->bytes; 1581 kmem_cache_free(btrfs_free_space_cachep, right_info); 1582 merged = true; 1583 } 1584 1585 if (left_info && !left_info->bitmap && 1586 left_info->offset + left_info->bytes == offset) { 1587 if (update_stat) 1588 unlink_free_space(ctl, left_info); 1589 else 1590 __unlink_free_space(ctl, left_info); 1591 info->offset = left_info->offset; 1592 info->bytes += left_info->bytes; 1593 kmem_cache_free(btrfs_free_space_cachep, left_info); 1594 merged = true; 1595 } 1596 1597 return merged; 1598 } 1599 1600 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1601 u64 offset, u64 bytes) 1602 { 1603 struct btrfs_free_space *info; 1604 int ret = 0; 1605 1606 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1607 if (!info) 1608 return -ENOMEM; 1609 1610 info->offset = offset; 1611 info->bytes = bytes; 1612 1613 spin_lock(&ctl->tree_lock); 1614 1615 if (try_merge_free_space(ctl, info, true)) 1616 goto link; 1617 1618 /* 1619 * There was no extent directly to the left or right of this new 1620 * extent then we know we're going to have to allocate a new extent, so 1621 * before we do that see if we need to drop this into a bitmap 1622 */ 1623 ret = insert_into_bitmap(ctl, info); 1624 if (ret < 0) { 1625 goto out; 1626 } else if (ret) { 1627 ret = 0; 1628 goto out; 1629 } 1630 link: 1631 ret = link_free_space(ctl, info); 1632 if (ret) 1633 kmem_cache_free(btrfs_free_space_cachep, info); 1634 out: 1635 spin_unlock(&ctl->tree_lock); 1636 1637 if (ret) { 1638 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1639 BUG_ON(ret == -EEXIST); 1640 } 1641 1642 return ret; 1643 } 1644 1645 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1646 u64 offset, u64 bytes) 1647 { 1648 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1649 struct btrfs_free_space *info; 1650 struct btrfs_free_space *next_info = NULL; 1651 int ret = 0; 1652 1653 spin_lock(&ctl->tree_lock); 1654 1655 again: 1656 info = tree_search_offset(ctl, offset, 0, 0); 1657 if (!info) { 1658 /* 1659 * oops didn't find an extent that matched the space we wanted 1660 * to remove, look for a bitmap instead 1661 */ 1662 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1663 1, 0); 1664 if (!info) { 1665 WARN_ON(1); 1666 goto out_lock; 1667 } 1668 } 1669 1670 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1671 u64 end; 1672 next_info = rb_entry(rb_next(&info->offset_index), 1673 struct btrfs_free_space, 1674 offset_index); 1675 1676 if (next_info->bitmap) 1677 end = next_info->offset + 1678 BITS_PER_BITMAP * ctl->unit - 1; 1679 else 1680 end = next_info->offset + next_info->bytes; 1681 1682 if (next_info->bytes < bytes || 1683 next_info->offset > offset || offset > end) { 1684 printk(KERN_CRIT "Found free space at %llu, size %llu," 1685 " trying to use %llu\n", 1686 (unsigned long long)info->offset, 1687 (unsigned long long)info->bytes, 1688 (unsigned long long)bytes); 1689 WARN_ON(1); 1690 ret = -EINVAL; 1691 goto out_lock; 1692 } 1693 1694 info = next_info; 1695 } 1696 1697 if (info->bytes == bytes) { 1698 unlink_free_space(ctl, info); 1699 if (info->bitmap) { 1700 kfree(info->bitmap); 1701 ctl->total_bitmaps--; 1702 } 1703 kmem_cache_free(btrfs_free_space_cachep, info); 1704 goto out_lock; 1705 } 1706 1707 if (!info->bitmap && info->offset == offset) { 1708 unlink_free_space(ctl, info); 1709 info->offset += bytes; 1710 info->bytes -= bytes; 1711 link_free_space(ctl, info); 1712 goto out_lock; 1713 } 1714 1715 if (!info->bitmap && info->offset <= offset && 1716 info->offset + info->bytes >= offset + bytes) { 1717 u64 old_start = info->offset; 1718 /* 1719 * we're freeing space in the middle of the info, 1720 * this can happen during tree log replay 1721 * 1722 * first unlink the old info and then 1723 * insert it again after the hole we're creating 1724 */ 1725 unlink_free_space(ctl, info); 1726 if (offset + bytes < info->offset + info->bytes) { 1727 u64 old_end = info->offset + info->bytes; 1728 1729 info->offset = offset + bytes; 1730 info->bytes = old_end - info->offset; 1731 ret = link_free_space(ctl, info); 1732 WARN_ON(ret); 1733 if (ret) 1734 goto out_lock; 1735 } else { 1736 /* the hole we're creating ends at the end 1737 * of the info struct, just free the info 1738 */ 1739 kmem_cache_free(btrfs_free_space_cachep, info); 1740 } 1741 spin_unlock(&ctl->tree_lock); 1742 1743 /* step two, insert a new info struct to cover 1744 * anything before the hole 1745 */ 1746 ret = btrfs_add_free_space(block_group, old_start, 1747 offset - old_start); 1748 WARN_ON(ret); 1749 goto out; 1750 } 1751 1752 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1753 if (ret == -EAGAIN) 1754 goto again; 1755 BUG_ON(ret); 1756 out_lock: 1757 spin_unlock(&ctl->tree_lock); 1758 out: 1759 return ret; 1760 } 1761 1762 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1763 u64 bytes) 1764 { 1765 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1766 struct btrfs_free_space *info; 1767 struct rb_node *n; 1768 int count = 0; 1769 1770 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1771 info = rb_entry(n, struct btrfs_free_space, offset_index); 1772 if (info->bytes >= bytes) 1773 count++; 1774 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1775 (unsigned long long)info->offset, 1776 (unsigned long long)info->bytes, 1777 (info->bitmap) ? "yes" : "no"); 1778 } 1779 printk(KERN_INFO "block group has cluster?: %s\n", 1780 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1781 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1782 "\n", count); 1783 } 1784 1785 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1786 { 1787 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1788 1789 spin_lock_init(&ctl->tree_lock); 1790 ctl->unit = block_group->sectorsize; 1791 ctl->start = block_group->key.objectid; 1792 ctl->private = block_group; 1793 ctl->op = &free_space_op; 1794 1795 /* 1796 * we only want to have 32k of ram per block group for keeping 1797 * track of free space, and if we pass 1/2 of that we want to 1798 * start converting things over to using bitmaps 1799 */ 1800 ctl->extents_thresh = ((1024 * 32) / 2) / 1801 sizeof(struct btrfs_free_space); 1802 } 1803 1804 /* 1805 * for a given cluster, put all of its extents back into the free 1806 * space cache. If the block group passed doesn't match the block group 1807 * pointed to by the cluster, someone else raced in and freed the 1808 * cluster already. In that case, we just return without changing anything 1809 */ 1810 static int 1811 __btrfs_return_cluster_to_free_space( 1812 struct btrfs_block_group_cache *block_group, 1813 struct btrfs_free_cluster *cluster) 1814 { 1815 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1816 struct btrfs_free_space *entry; 1817 struct rb_node *node; 1818 1819 spin_lock(&cluster->lock); 1820 if (cluster->block_group != block_group) 1821 goto out; 1822 1823 cluster->block_group = NULL; 1824 cluster->window_start = 0; 1825 list_del_init(&cluster->block_group_list); 1826 1827 node = rb_first(&cluster->root); 1828 while (node) { 1829 bool bitmap; 1830 1831 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1832 node = rb_next(&entry->offset_index); 1833 rb_erase(&entry->offset_index, &cluster->root); 1834 1835 bitmap = (entry->bitmap != NULL); 1836 if (!bitmap) 1837 try_merge_free_space(ctl, entry, false); 1838 tree_insert_offset(&ctl->free_space_offset, 1839 entry->offset, &entry->offset_index, bitmap); 1840 } 1841 cluster->root = RB_ROOT; 1842 1843 out: 1844 spin_unlock(&cluster->lock); 1845 btrfs_put_block_group(block_group); 1846 return 0; 1847 } 1848 1849 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 1850 { 1851 struct btrfs_free_space *info; 1852 struct rb_node *node; 1853 1854 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 1855 info = rb_entry(node, struct btrfs_free_space, offset_index); 1856 if (!info->bitmap) { 1857 unlink_free_space(ctl, info); 1858 kmem_cache_free(btrfs_free_space_cachep, info); 1859 } else { 1860 free_bitmap(ctl, info); 1861 } 1862 if (need_resched()) { 1863 spin_unlock(&ctl->tree_lock); 1864 cond_resched(); 1865 spin_lock(&ctl->tree_lock); 1866 } 1867 } 1868 } 1869 1870 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 1871 { 1872 spin_lock(&ctl->tree_lock); 1873 __btrfs_remove_free_space_cache_locked(ctl); 1874 spin_unlock(&ctl->tree_lock); 1875 } 1876 1877 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 1878 { 1879 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1880 struct btrfs_free_cluster *cluster; 1881 struct list_head *head; 1882 1883 spin_lock(&ctl->tree_lock); 1884 while ((head = block_group->cluster_list.next) != 1885 &block_group->cluster_list) { 1886 cluster = list_entry(head, struct btrfs_free_cluster, 1887 block_group_list); 1888 1889 WARN_ON(cluster->block_group != block_group); 1890 __btrfs_return_cluster_to_free_space(block_group, cluster); 1891 if (need_resched()) { 1892 spin_unlock(&ctl->tree_lock); 1893 cond_resched(); 1894 spin_lock(&ctl->tree_lock); 1895 } 1896 } 1897 __btrfs_remove_free_space_cache_locked(ctl); 1898 spin_unlock(&ctl->tree_lock); 1899 1900 } 1901 1902 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 1903 u64 offset, u64 bytes, u64 empty_size) 1904 { 1905 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1906 struct btrfs_free_space *entry = NULL; 1907 u64 bytes_search = bytes + empty_size; 1908 u64 ret = 0; 1909 1910 spin_lock(&ctl->tree_lock); 1911 entry = find_free_space(ctl, &offset, &bytes_search); 1912 if (!entry) 1913 goto out; 1914 1915 ret = offset; 1916 if (entry->bitmap) { 1917 bitmap_clear_bits(ctl, entry, offset, bytes); 1918 if (!entry->bytes) 1919 free_bitmap(ctl, entry); 1920 } else { 1921 unlink_free_space(ctl, entry); 1922 entry->offset += bytes; 1923 entry->bytes -= bytes; 1924 if (!entry->bytes) 1925 kmem_cache_free(btrfs_free_space_cachep, entry); 1926 else 1927 link_free_space(ctl, entry); 1928 } 1929 1930 out: 1931 spin_unlock(&ctl->tree_lock); 1932 1933 return ret; 1934 } 1935 1936 /* 1937 * given a cluster, put all of its extents back into the free space 1938 * cache. If a block group is passed, this function will only free 1939 * a cluster that belongs to the passed block group. 1940 * 1941 * Otherwise, it'll get a reference on the block group pointed to by the 1942 * cluster and remove the cluster from it. 1943 */ 1944 int btrfs_return_cluster_to_free_space( 1945 struct btrfs_block_group_cache *block_group, 1946 struct btrfs_free_cluster *cluster) 1947 { 1948 struct btrfs_free_space_ctl *ctl; 1949 int ret; 1950 1951 /* first, get a safe pointer to the block group */ 1952 spin_lock(&cluster->lock); 1953 if (!block_group) { 1954 block_group = cluster->block_group; 1955 if (!block_group) { 1956 spin_unlock(&cluster->lock); 1957 return 0; 1958 } 1959 } else if (cluster->block_group != block_group) { 1960 /* someone else has already freed it don't redo their work */ 1961 spin_unlock(&cluster->lock); 1962 return 0; 1963 } 1964 atomic_inc(&block_group->count); 1965 spin_unlock(&cluster->lock); 1966 1967 ctl = block_group->free_space_ctl; 1968 1969 /* now return any extents the cluster had on it */ 1970 spin_lock(&ctl->tree_lock); 1971 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 1972 spin_unlock(&ctl->tree_lock); 1973 1974 /* finally drop our ref */ 1975 btrfs_put_block_group(block_group); 1976 return ret; 1977 } 1978 1979 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 1980 struct btrfs_free_cluster *cluster, 1981 struct btrfs_free_space *entry, 1982 u64 bytes, u64 min_start) 1983 { 1984 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1985 int err; 1986 u64 search_start = cluster->window_start; 1987 u64 search_bytes = bytes; 1988 u64 ret = 0; 1989 1990 search_start = min_start; 1991 search_bytes = bytes; 1992 1993 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 1994 if (err) 1995 return 0; 1996 1997 ret = search_start; 1998 __bitmap_clear_bits(ctl, entry, ret, bytes); 1999 2000 return ret; 2001 } 2002 2003 /* 2004 * given a cluster, try to allocate 'bytes' from it, returns 0 2005 * if it couldn't find anything suitably large, or a logical disk offset 2006 * if things worked out 2007 */ 2008 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2009 struct btrfs_free_cluster *cluster, u64 bytes, 2010 u64 min_start) 2011 { 2012 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2013 struct btrfs_free_space *entry = NULL; 2014 struct rb_node *node; 2015 u64 ret = 0; 2016 2017 spin_lock(&cluster->lock); 2018 if (bytes > cluster->max_size) 2019 goto out; 2020 2021 if (cluster->block_group != block_group) 2022 goto out; 2023 2024 node = rb_first(&cluster->root); 2025 if (!node) 2026 goto out; 2027 2028 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2029 while(1) { 2030 if (entry->bytes < bytes || 2031 (!entry->bitmap && entry->offset < min_start)) { 2032 node = rb_next(&entry->offset_index); 2033 if (!node) 2034 break; 2035 entry = rb_entry(node, struct btrfs_free_space, 2036 offset_index); 2037 continue; 2038 } 2039 2040 if (entry->bitmap) { 2041 ret = btrfs_alloc_from_bitmap(block_group, 2042 cluster, entry, bytes, 2043 min_start); 2044 if (ret == 0) { 2045 node = rb_next(&entry->offset_index); 2046 if (!node) 2047 break; 2048 entry = rb_entry(node, struct btrfs_free_space, 2049 offset_index); 2050 continue; 2051 } 2052 } else { 2053 ret = entry->offset; 2054 2055 entry->offset += bytes; 2056 entry->bytes -= bytes; 2057 } 2058 2059 if (entry->bytes == 0) 2060 rb_erase(&entry->offset_index, &cluster->root); 2061 break; 2062 } 2063 out: 2064 spin_unlock(&cluster->lock); 2065 2066 if (!ret) 2067 return 0; 2068 2069 spin_lock(&ctl->tree_lock); 2070 2071 ctl->free_space -= bytes; 2072 if (entry->bytes == 0) { 2073 ctl->free_extents--; 2074 if (entry->bitmap) { 2075 kfree(entry->bitmap); 2076 ctl->total_bitmaps--; 2077 ctl->op->recalc_thresholds(ctl); 2078 } 2079 kmem_cache_free(btrfs_free_space_cachep, entry); 2080 } 2081 2082 spin_unlock(&ctl->tree_lock); 2083 2084 return ret; 2085 } 2086 2087 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2088 struct btrfs_free_space *entry, 2089 struct btrfs_free_cluster *cluster, 2090 u64 offset, u64 bytes, u64 min_bytes) 2091 { 2092 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2093 unsigned long next_zero; 2094 unsigned long i; 2095 unsigned long search_bits; 2096 unsigned long total_bits; 2097 unsigned long found_bits; 2098 unsigned long start = 0; 2099 unsigned long total_found = 0; 2100 int ret; 2101 bool found = false; 2102 2103 i = offset_to_bit(entry->offset, block_group->sectorsize, 2104 max_t(u64, offset, entry->offset)); 2105 search_bits = bytes_to_bits(bytes, block_group->sectorsize); 2106 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2107 2108 again: 2109 found_bits = 0; 2110 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2111 i < BITS_PER_BITMAP; 2112 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2113 next_zero = find_next_zero_bit(entry->bitmap, 2114 BITS_PER_BITMAP, i); 2115 if (next_zero - i >= search_bits) { 2116 found_bits = next_zero - i; 2117 break; 2118 } 2119 i = next_zero; 2120 } 2121 2122 if (!found_bits) 2123 return -ENOSPC; 2124 2125 if (!found) { 2126 start = i; 2127 found = true; 2128 } 2129 2130 total_found += found_bits; 2131 2132 if (cluster->max_size < found_bits * block_group->sectorsize) 2133 cluster->max_size = found_bits * block_group->sectorsize; 2134 2135 if (total_found < total_bits) { 2136 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 2137 if (i - start > total_bits * 2) { 2138 total_found = 0; 2139 cluster->max_size = 0; 2140 found = false; 2141 } 2142 goto again; 2143 } 2144 2145 cluster->window_start = start * block_group->sectorsize + 2146 entry->offset; 2147 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2148 ret = tree_insert_offset(&cluster->root, entry->offset, 2149 &entry->offset_index, 1); 2150 BUG_ON(ret); 2151 2152 return 0; 2153 } 2154 2155 /* 2156 * This searches the block group for just extents to fill the cluster with. 2157 */ 2158 static noinline int 2159 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2160 struct btrfs_free_cluster *cluster, 2161 struct list_head *bitmaps, u64 offset, u64 bytes, 2162 u64 min_bytes) 2163 { 2164 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2165 struct btrfs_free_space *first = NULL; 2166 struct btrfs_free_space *entry = NULL; 2167 struct btrfs_free_space *prev = NULL; 2168 struct btrfs_free_space *last; 2169 struct rb_node *node; 2170 u64 window_start; 2171 u64 window_free; 2172 u64 max_extent; 2173 u64 max_gap = 128 * 1024; 2174 2175 entry = tree_search_offset(ctl, offset, 0, 1); 2176 if (!entry) 2177 return -ENOSPC; 2178 2179 /* 2180 * We don't want bitmaps, so just move along until we find a normal 2181 * extent entry. 2182 */ 2183 while (entry->bitmap) { 2184 if (list_empty(&entry->list)) 2185 list_add_tail(&entry->list, bitmaps); 2186 node = rb_next(&entry->offset_index); 2187 if (!node) 2188 return -ENOSPC; 2189 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2190 } 2191 2192 window_start = entry->offset; 2193 window_free = entry->bytes; 2194 max_extent = entry->bytes; 2195 first = entry; 2196 last = entry; 2197 prev = entry; 2198 2199 while (window_free <= min_bytes) { 2200 node = rb_next(&entry->offset_index); 2201 if (!node) 2202 return -ENOSPC; 2203 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2204 2205 if (entry->bitmap) { 2206 if (list_empty(&entry->list)) 2207 list_add_tail(&entry->list, bitmaps); 2208 continue; 2209 } 2210 2211 /* 2212 * we haven't filled the empty size and the window is 2213 * very large. reset and try again 2214 */ 2215 if (entry->offset - (prev->offset + prev->bytes) > max_gap || 2216 entry->offset - window_start > (min_bytes * 2)) { 2217 first = entry; 2218 window_start = entry->offset; 2219 window_free = entry->bytes; 2220 last = entry; 2221 max_extent = entry->bytes; 2222 } else { 2223 last = entry; 2224 window_free += entry->bytes; 2225 if (entry->bytes > max_extent) 2226 max_extent = entry->bytes; 2227 } 2228 prev = entry; 2229 } 2230 2231 cluster->window_start = first->offset; 2232 2233 node = &first->offset_index; 2234 2235 /* 2236 * now we've found our entries, pull them out of the free space 2237 * cache and put them into the cluster rbtree 2238 */ 2239 do { 2240 int ret; 2241 2242 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2243 node = rb_next(&entry->offset_index); 2244 if (entry->bitmap) 2245 continue; 2246 2247 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2248 ret = tree_insert_offset(&cluster->root, entry->offset, 2249 &entry->offset_index, 0); 2250 BUG_ON(ret); 2251 } while (node && entry != last); 2252 2253 cluster->max_size = max_extent; 2254 2255 return 0; 2256 } 2257 2258 /* 2259 * This specifically looks for bitmaps that may work in the cluster, we assume 2260 * that we have already failed to find extents that will work. 2261 */ 2262 static noinline int 2263 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2264 struct btrfs_free_cluster *cluster, 2265 struct list_head *bitmaps, u64 offset, u64 bytes, 2266 u64 min_bytes) 2267 { 2268 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2269 struct btrfs_free_space *entry; 2270 struct rb_node *node; 2271 int ret = -ENOSPC; 2272 2273 if (ctl->total_bitmaps == 0) 2274 return -ENOSPC; 2275 2276 /* 2277 * First check our cached list of bitmaps and see if there is an entry 2278 * here that will work. 2279 */ 2280 list_for_each_entry(entry, bitmaps, list) { 2281 if (entry->bytes < min_bytes) 2282 continue; 2283 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2284 bytes, min_bytes); 2285 if (!ret) 2286 return 0; 2287 } 2288 2289 /* 2290 * If we do have entries on our list and we are here then we didn't find 2291 * anything, so go ahead and get the next entry after the last entry in 2292 * this list and start the search from there. 2293 */ 2294 if (!list_empty(bitmaps)) { 2295 entry = list_entry(bitmaps->prev, struct btrfs_free_space, 2296 list); 2297 node = rb_next(&entry->offset_index); 2298 if (!node) 2299 return -ENOSPC; 2300 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2301 goto search; 2302 } 2303 2304 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2305 if (!entry) 2306 return -ENOSPC; 2307 2308 search: 2309 node = &entry->offset_index; 2310 do { 2311 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2312 node = rb_next(&entry->offset_index); 2313 if (!entry->bitmap) 2314 continue; 2315 if (entry->bytes < min_bytes) 2316 continue; 2317 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2318 bytes, min_bytes); 2319 } while (ret && node); 2320 2321 return ret; 2322 } 2323 2324 /* 2325 * here we try to find a cluster of blocks in a block group. The goal 2326 * is to find at least bytes free and up to empty_size + bytes free. 2327 * We might not find them all in one contiguous area. 2328 * 2329 * returns zero and sets up cluster if things worked out, otherwise 2330 * it returns -enospc 2331 */ 2332 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2333 struct btrfs_root *root, 2334 struct btrfs_block_group_cache *block_group, 2335 struct btrfs_free_cluster *cluster, 2336 u64 offset, u64 bytes, u64 empty_size) 2337 { 2338 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2339 struct list_head bitmaps; 2340 struct btrfs_free_space *entry, *tmp; 2341 u64 min_bytes; 2342 int ret; 2343 2344 /* for metadata, allow allocates with more holes */ 2345 if (btrfs_test_opt(root, SSD_SPREAD)) { 2346 min_bytes = bytes + empty_size; 2347 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2348 /* 2349 * we want to do larger allocations when we are 2350 * flushing out the delayed refs, it helps prevent 2351 * making more work as we go along. 2352 */ 2353 if (trans->transaction->delayed_refs.flushing) 2354 min_bytes = max(bytes, (bytes + empty_size) >> 1); 2355 else 2356 min_bytes = max(bytes, (bytes + empty_size) >> 4); 2357 } else 2358 min_bytes = max(bytes, (bytes + empty_size) >> 2); 2359 2360 spin_lock(&ctl->tree_lock); 2361 2362 /* 2363 * If we know we don't have enough space to make a cluster don't even 2364 * bother doing all the work to try and find one. 2365 */ 2366 if (ctl->free_space < min_bytes) { 2367 spin_unlock(&ctl->tree_lock); 2368 return -ENOSPC; 2369 } 2370 2371 spin_lock(&cluster->lock); 2372 2373 /* someone already found a cluster, hooray */ 2374 if (cluster->block_group) { 2375 ret = 0; 2376 goto out; 2377 } 2378 2379 INIT_LIST_HEAD(&bitmaps); 2380 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2381 bytes, min_bytes); 2382 if (ret) 2383 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2384 offset, bytes, min_bytes); 2385 2386 /* Clear our temporary list */ 2387 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2388 list_del_init(&entry->list); 2389 2390 if (!ret) { 2391 atomic_inc(&block_group->count); 2392 list_add_tail(&cluster->block_group_list, 2393 &block_group->cluster_list); 2394 cluster->block_group = block_group; 2395 } 2396 out: 2397 spin_unlock(&cluster->lock); 2398 spin_unlock(&ctl->tree_lock); 2399 2400 return ret; 2401 } 2402 2403 /* 2404 * simple code to zero out a cluster 2405 */ 2406 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2407 { 2408 spin_lock_init(&cluster->lock); 2409 spin_lock_init(&cluster->refill_lock); 2410 cluster->root = RB_ROOT; 2411 cluster->max_size = 0; 2412 INIT_LIST_HEAD(&cluster->block_group_list); 2413 cluster->block_group = NULL; 2414 } 2415 2416 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2417 u64 *trimmed, u64 start, u64 end, u64 minlen) 2418 { 2419 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2420 struct btrfs_free_space *entry = NULL; 2421 struct btrfs_fs_info *fs_info = block_group->fs_info; 2422 u64 bytes = 0; 2423 u64 actually_trimmed; 2424 int ret = 0; 2425 2426 *trimmed = 0; 2427 2428 while (start < end) { 2429 spin_lock(&ctl->tree_lock); 2430 2431 if (ctl->free_space < minlen) { 2432 spin_unlock(&ctl->tree_lock); 2433 break; 2434 } 2435 2436 entry = tree_search_offset(ctl, start, 0, 1); 2437 if (!entry) 2438 entry = tree_search_offset(ctl, 2439 offset_to_bitmap(ctl, start), 2440 1, 1); 2441 2442 if (!entry || entry->offset >= end) { 2443 spin_unlock(&ctl->tree_lock); 2444 break; 2445 } 2446 2447 if (entry->bitmap) { 2448 ret = search_bitmap(ctl, entry, &start, &bytes); 2449 if (!ret) { 2450 if (start >= end) { 2451 spin_unlock(&ctl->tree_lock); 2452 break; 2453 } 2454 bytes = min(bytes, end - start); 2455 bitmap_clear_bits(ctl, entry, start, bytes); 2456 if (entry->bytes == 0) 2457 free_bitmap(ctl, entry); 2458 } else { 2459 start = entry->offset + BITS_PER_BITMAP * 2460 block_group->sectorsize; 2461 spin_unlock(&ctl->tree_lock); 2462 ret = 0; 2463 continue; 2464 } 2465 } else { 2466 start = entry->offset; 2467 bytes = min(entry->bytes, end - start); 2468 unlink_free_space(ctl, entry); 2469 kmem_cache_free(btrfs_free_space_cachep, entry); 2470 } 2471 2472 spin_unlock(&ctl->tree_lock); 2473 2474 if (bytes >= minlen) { 2475 int update_ret; 2476 update_ret = btrfs_update_reserved_bytes(block_group, 2477 bytes, 1, 1); 2478 2479 ret = btrfs_error_discard_extent(fs_info->extent_root, 2480 start, 2481 bytes, 2482 &actually_trimmed); 2483 2484 btrfs_add_free_space(block_group, start, bytes); 2485 if (!update_ret) 2486 btrfs_update_reserved_bytes(block_group, 2487 bytes, 0, 1); 2488 2489 if (ret) 2490 break; 2491 *trimmed += actually_trimmed; 2492 } 2493 start += bytes; 2494 bytes = 0; 2495 2496 if (fatal_signal_pending(current)) { 2497 ret = -ERESTARTSYS; 2498 break; 2499 } 2500 2501 cond_resched(); 2502 } 2503 2504 return ret; 2505 } 2506 2507 /* 2508 * Find the left-most item in the cache tree, and then return the 2509 * smallest inode number in the item. 2510 * 2511 * Note: the returned inode number may not be the smallest one in 2512 * the tree, if the left-most item is a bitmap. 2513 */ 2514 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2515 { 2516 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2517 struct btrfs_free_space *entry = NULL; 2518 u64 ino = 0; 2519 2520 spin_lock(&ctl->tree_lock); 2521 2522 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2523 goto out; 2524 2525 entry = rb_entry(rb_first(&ctl->free_space_offset), 2526 struct btrfs_free_space, offset_index); 2527 2528 if (!entry->bitmap) { 2529 ino = entry->offset; 2530 2531 unlink_free_space(ctl, entry); 2532 entry->offset++; 2533 entry->bytes--; 2534 if (!entry->bytes) 2535 kmem_cache_free(btrfs_free_space_cachep, entry); 2536 else 2537 link_free_space(ctl, entry); 2538 } else { 2539 u64 offset = 0; 2540 u64 count = 1; 2541 int ret; 2542 2543 ret = search_bitmap(ctl, entry, &offset, &count); 2544 BUG_ON(ret); 2545 2546 ino = offset; 2547 bitmap_clear_bits(ctl, entry, offset, 1); 2548 if (entry->bytes == 0) 2549 free_bitmap(ctl, entry); 2550 } 2551 out: 2552 spin_unlock(&ctl->tree_lock); 2553 2554 return ino; 2555 } 2556 2557 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2558 struct btrfs_path *path) 2559 { 2560 struct inode *inode = NULL; 2561 2562 spin_lock(&root->cache_lock); 2563 if (root->cache_inode) 2564 inode = igrab(root->cache_inode); 2565 spin_unlock(&root->cache_lock); 2566 if (inode) 2567 return inode; 2568 2569 inode = __lookup_free_space_inode(root, path, 0); 2570 if (IS_ERR(inode)) 2571 return inode; 2572 2573 spin_lock(&root->cache_lock); 2574 if (!btrfs_fs_closing(root->fs_info)) 2575 root->cache_inode = igrab(inode); 2576 spin_unlock(&root->cache_lock); 2577 2578 return inode; 2579 } 2580 2581 int create_free_ino_inode(struct btrfs_root *root, 2582 struct btrfs_trans_handle *trans, 2583 struct btrfs_path *path) 2584 { 2585 return __create_free_space_inode(root, trans, path, 2586 BTRFS_FREE_INO_OBJECTID, 0); 2587 } 2588 2589 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2590 { 2591 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2592 struct btrfs_path *path; 2593 struct inode *inode; 2594 int ret = 0; 2595 u64 root_gen = btrfs_root_generation(&root->root_item); 2596 2597 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2598 return 0; 2599 2600 /* 2601 * If we're unmounting then just return, since this does a search on the 2602 * normal root and not the commit root and we could deadlock. 2603 */ 2604 if (btrfs_fs_closing(fs_info)) 2605 return 0; 2606 2607 path = btrfs_alloc_path(); 2608 if (!path) 2609 return 0; 2610 2611 inode = lookup_free_ino_inode(root, path); 2612 if (IS_ERR(inode)) 2613 goto out; 2614 2615 if (root_gen != BTRFS_I(inode)->generation) 2616 goto out_put; 2617 2618 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2619 2620 if (ret < 0) 2621 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2622 "root %llu\n", root->root_key.objectid); 2623 out_put: 2624 iput(inode); 2625 out: 2626 btrfs_free_path(path); 2627 return ret; 2628 } 2629 2630 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2631 struct btrfs_trans_handle *trans, 2632 struct btrfs_path *path) 2633 { 2634 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2635 struct inode *inode; 2636 int ret; 2637 2638 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2639 return 0; 2640 2641 inode = lookup_free_ino_inode(root, path); 2642 if (IS_ERR(inode)) 2643 return 0; 2644 2645 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2646 if (ret < 0) 2647 printk(KERN_ERR "btrfs: failed to write free ino cache " 2648 "for root %llu\n", root->root_key.objectid); 2649 2650 iput(inode); 2651 return ret; 2652 } 2653