1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 38 struct btrfs_path *path, 39 u64 offset) 40 { 41 struct btrfs_key key; 42 struct btrfs_key location; 43 struct btrfs_disk_key disk_key; 44 struct btrfs_free_space_header *header; 45 struct extent_buffer *leaf; 46 struct inode *inode = NULL; 47 int ret; 48 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 50 key.offset = offset; 51 key.type = 0; 52 53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 54 if (ret < 0) 55 return ERR_PTR(ret); 56 if (ret > 0) { 57 btrfs_release_path(path); 58 return ERR_PTR(-ENOENT); 59 } 60 61 leaf = path->nodes[0]; 62 header = btrfs_item_ptr(leaf, path->slots[0], 63 struct btrfs_free_space_header); 64 btrfs_free_space_key(leaf, header, &disk_key); 65 btrfs_disk_key_to_cpu(&location, &disk_key); 66 btrfs_release_path(path); 67 68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 69 if (!inode) 70 return ERR_PTR(-ENOENT); 71 if (IS_ERR(inode)) 72 return inode; 73 if (is_bad_inode(inode)) { 74 iput(inode); 75 return ERR_PTR(-ENOENT); 76 } 77 78 inode->i_mapping->flags &= ~__GFP_FS; 79 80 return inode; 81 } 82 83 struct inode *lookup_free_space_inode(struct btrfs_root *root, 84 struct btrfs_block_group_cache 85 *block_group, struct btrfs_path *path) 86 { 87 struct inode *inode = NULL; 88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 89 90 spin_lock(&block_group->lock); 91 if (block_group->inode) 92 inode = igrab(block_group->inode); 93 spin_unlock(&block_group->lock); 94 if (inode) 95 return inode; 96 97 inode = __lookup_free_space_inode(root, path, 98 block_group->key.objectid); 99 if (IS_ERR(inode)) 100 return inode; 101 102 spin_lock(&block_group->lock); 103 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 104 printk(KERN_INFO "Old style space inode found, converting.\n"); 105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 106 BTRFS_INODE_NODATACOW; 107 block_group->disk_cache_state = BTRFS_DC_CLEAR; 108 } 109 110 if (!block_group->iref) { 111 block_group->inode = igrab(inode); 112 block_group->iref = 1; 113 } 114 spin_unlock(&block_group->lock); 115 116 return inode; 117 } 118 119 int __create_free_space_inode(struct btrfs_root *root, 120 struct btrfs_trans_handle *trans, 121 struct btrfs_path *path, u64 ino, u64 offset) 122 { 123 struct btrfs_key key; 124 struct btrfs_disk_key disk_key; 125 struct btrfs_free_space_header *header; 126 struct btrfs_inode_item *inode_item; 127 struct extent_buffer *leaf; 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 129 int ret; 130 131 ret = btrfs_insert_empty_inode(trans, root, path, ino); 132 if (ret) 133 return ret; 134 135 /* We inline crc's for the free disk space cache */ 136 if (ino != BTRFS_FREE_INO_OBJECTID) 137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 138 139 leaf = path->nodes[0]; 140 inode_item = btrfs_item_ptr(leaf, path->slots[0], 141 struct btrfs_inode_item); 142 btrfs_item_key(leaf, &disk_key, path->slots[0]); 143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 144 sizeof(*inode_item)); 145 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 146 btrfs_set_inode_size(leaf, inode_item, 0); 147 btrfs_set_inode_nbytes(leaf, inode_item, 0); 148 btrfs_set_inode_uid(leaf, inode_item, 0); 149 btrfs_set_inode_gid(leaf, inode_item, 0); 150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 151 btrfs_set_inode_flags(leaf, inode_item, flags); 152 btrfs_set_inode_nlink(leaf, inode_item, 1); 153 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 154 btrfs_set_inode_block_group(leaf, inode_item, offset); 155 btrfs_mark_buffer_dirty(leaf); 156 btrfs_release_path(path); 157 158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 159 key.offset = offset; 160 key.type = 0; 161 162 ret = btrfs_insert_empty_item(trans, root, path, &key, 163 sizeof(struct btrfs_free_space_header)); 164 if (ret < 0) { 165 btrfs_release_path(path); 166 return ret; 167 } 168 leaf = path->nodes[0]; 169 header = btrfs_item_ptr(leaf, path->slots[0], 170 struct btrfs_free_space_header); 171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 172 btrfs_set_free_space_key(leaf, header, &disk_key); 173 btrfs_mark_buffer_dirty(leaf); 174 btrfs_release_path(path); 175 176 return 0; 177 } 178 179 int create_free_space_inode(struct btrfs_root *root, 180 struct btrfs_trans_handle *trans, 181 struct btrfs_block_group_cache *block_group, 182 struct btrfs_path *path) 183 { 184 int ret; 185 u64 ino; 186 187 ret = btrfs_find_free_objectid(root, &ino); 188 if (ret < 0) 189 return ret; 190 191 return __create_free_space_inode(root, trans, path, ino, 192 block_group->key.objectid); 193 } 194 195 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_path *path, 198 struct inode *inode) 199 { 200 struct btrfs_block_rsv *rsv; 201 u64 needed_bytes; 202 loff_t oldsize; 203 int ret = 0; 204 205 rsv = trans->block_rsv; 206 trans->block_rsv = &root->fs_info->global_block_rsv; 207 208 /* 1 for slack space, 1 for updating the inode */ 209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 210 btrfs_calc_trans_metadata_size(root, 1); 211 212 spin_lock(&trans->block_rsv->lock); 213 if (trans->block_rsv->reserved < needed_bytes) { 214 spin_unlock(&trans->block_rsv->lock); 215 trans->block_rsv = rsv; 216 return -ENOSPC; 217 } 218 spin_unlock(&trans->block_rsv->lock); 219 220 oldsize = i_size_read(inode); 221 btrfs_i_size_write(inode, 0); 222 truncate_pagecache(inode, oldsize, 0); 223 224 /* 225 * We don't need an orphan item because truncating the free space cache 226 * will never be split across transactions. 227 */ 228 ret = btrfs_truncate_inode_items(trans, root, inode, 229 0, BTRFS_EXTENT_DATA_KEY); 230 231 if (ret) { 232 trans->block_rsv = rsv; 233 WARN_ON(1); 234 return ret; 235 } 236 237 ret = btrfs_update_inode(trans, root, inode); 238 trans->block_rsv = rsv; 239 240 return ret; 241 } 242 243 static int readahead_cache(struct inode *inode) 244 { 245 struct file_ra_state *ra; 246 unsigned long last_index; 247 248 ra = kzalloc(sizeof(*ra), GFP_NOFS); 249 if (!ra) 250 return -ENOMEM; 251 252 file_ra_state_init(ra, inode->i_mapping); 253 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 254 255 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 256 257 kfree(ra); 258 259 return 0; 260 } 261 262 struct io_ctl { 263 void *cur, *orig; 264 struct page *page; 265 struct page **pages; 266 struct btrfs_root *root; 267 unsigned long size; 268 int index; 269 int num_pages; 270 unsigned check_crcs:1; 271 }; 272 273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 274 struct btrfs_root *root) 275 { 276 memset(io_ctl, 0, sizeof(struct io_ctl)); 277 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 278 PAGE_CACHE_SHIFT; 279 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 280 GFP_NOFS); 281 if (!io_ctl->pages) 282 return -ENOMEM; 283 io_ctl->root = root; 284 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 285 io_ctl->check_crcs = 1; 286 return 0; 287 } 288 289 static void io_ctl_free(struct io_ctl *io_ctl) 290 { 291 kfree(io_ctl->pages); 292 } 293 294 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 295 { 296 if (io_ctl->cur) { 297 kunmap(io_ctl->page); 298 io_ctl->cur = NULL; 299 io_ctl->orig = NULL; 300 } 301 } 302 303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 304 { 305 WARN_ON(io_ctl->cur); 306 BUG_ON(io_ctl->index >= io_ctl->num_pages); 307 io_ctl->page = io_ctl->pages[io_ctl->index++]; 308 io_ctl->cur = kmap(io_ctl->page); 309 io_ctl->orig = io_ctl->cur; 310 io_ctl->size = PAGE_CACHE_SIZE; 311 if (clear) 312 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 313 } 314 315 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 316 { 317 int i; 318 319 io_ctl_unmap_page(io_ctl); 320 321 for (i = 0; i < io_ctl->num_pages; i++) { 322 if (io_ctl->pages[i]) { 323 ClearPageChecked(io_ctl->pages[i]); 324 unlock_page(io_ctl->pages[i]); 325 page_cache_release(io_ctl->pages[i]); 326 } 327 } 328 } 329 330 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 331 int uptodate) 332 { 333 struct page *page; 334 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 335 int i; 336 337 for (i = 0; i < io_ctl->num_pages; i++) { 338 page = find_or_create_page(inode->i_mapping, i, mask); 339 if (!page) { 340 io_ctl_drop_pages(io_ctl); 341 return -ENOMEM; 342 } 343 io_ctl->pages[i] = page; 344 if (uptodate && !PageUptodate(page)) { 345 btrfs_readpage(NULL, page); 346 lock_page(page); 347 if (!PageUptodate(page)) { 348 printk(KERN_ERR "btrfs: error reading free " 349 "space cache\n"); 350 io_ctl_drop_pages(io_ctl); 351 return -EIO; 352 } 353 } 354 } 355 356 for (i = 0; i < io_ctl->num_pages; i++) { 357 clear_page_dirty_for_io(io_ctl->pages[i]); 358 set_page_extent_mapped(io_ctl->pages[i]); 359 } 360 361 return 0; 362 } 363 364 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 365 { 366 u64 *val; 367 368 io_ctl_map_page(io_ctl, 1); 369 370 /* 371 * Skip the csum areas. If we don't check crcs then we just have a 372 * 64bit chunk at the front of the first page. 373 */ 374 if (io_ctl->check_crcs) { 375 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 376 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 377 } else { 378 io_ctl->cur += sizeof(u64); 379 io_ctl->size -= sizeof(u64) * 2; 380 } 381 382 val = io_ctl->cur; 383 *val = cpu_to_le64(generation); 384 io_ctl->cur += sizeof(u64); 385 } 386 387 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 388 { 389 u64 *gen; 390 391 /* 392 * Skip the crc area. If we don't check crcs then we just have a 64bit 393 * chunk at the front of the first page. 394 */ 395 if (io_ctl->check_crcs) { 396 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 397 io_ctl->size -= sizeof(u64) + 398 (sizeof(u32) * io_ctl->num_pages); 399 } else { 400 io_ctl->cur += sizeof(u64); 401 io_ctl->size -= sizeof(u64) * 2; 402 } 403 404 gen = io_ctl->cur; 405 if (le64_to_cpu(*gen) != generation) { 406 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 407 "(%Lu) does not match inode (%Lu)\n", *gen, 408 generation); 409 io_ctl_unmap_page(io_ctl); 410 return -EIO; 411 } 412 io_ctl->cur += sizeof(u64); 413 return 0; 414 } 415 416 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 417 { 418 u32 *tmp; 419 u32 crc = ~(u32)0; 420 unsigned offset = 0; 421 422 if (!io_ctl->check_crcs) { 423 io_ctl_unmap_page(io_ctl); 424 return; 425 } 426 427 if (index == 0) 428 offset = sizeof(u32) * io_ctl->num_pages; 429 430 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 431 PAGE_CACHE_SIZE - offset); 432 btrfs_csum_final(crc, (char *)&crc); 433 io_ctl_unmap_page(io_ctl); 434 tmp = kmap(io_ctl->pages[0]); 435 tmp += index; 436 *tmp = crc; 437 kunmap(io_ctl->pages[0]); 438 } 439 440 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 441 { 442 u32 *tmp, val; 443 u32 crc = ~(u32)0; 444 unsigned offset = 0; 445 446 if (!io_ctl->check_crcs) { 447 io_ctl_map_page(io_ctl, 0); 448 return 0; 449 } 450 451 if (index == 0) 452 offset = sizeof(u32) * io_ctl->num_pages; 453 454 tmp = kmap(io_ctl->pages[0]); 455 tmp += index; 456 val = *tmp; 457 kunmap(io_ctl->pages[0]); 458 459 io_ctl_map_page(io_ctl, 0); 460 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 461 PAGE_CACHE_SIZE - offset); 462 btrfs_csum_final(crc, (char *)&crc); 463 if (val != crc) { 464 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 465 "space cache\n"); 466 io_ctl_unmap_page(io_ctl); 467 return -EIO; 468 } 469 470 return 0; 471 } 472 473 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 474 void *bitmap) 475 { 476 struct btrfs_free_space_entry *entry; 477 478 if (!io_ctl->cur) 479 return -ENOSPC; 480 481 entry = io_ctl->cur; 482 entry->offset = cpu_to_le64(offset); 483 entry->bytes = cpu_to_le64(bytes); 484 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 485 BTRFS_FREE_SPACE_EXTENT; 486 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 487 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 488 489 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 490 return 0; 491 492 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 493 494 /* No more pages to map */ 495 if (io_ctl->index >= io_ctl->num_pages) 496 return 0; 497 498 /* map the next page */ 499 io_ctl_map_page(io_ctl, 1); 500 return 0; 501 } 502 503 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 504 { 505 if (!io_ctl->cur) 506 return -ENOSPC; 507 508 /* 509 * If we aren't at the start of the current page, unmap this one and 510 * map the next one if there is any left. 511 */ 512 if (io_ctl->cur != io_ctl->orig) { 513 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 514 if (io_ctl->index >= io_ctl->num_pages) 515 return -ENOSPC; 516 io_ctl_map_page(io_ctl, 0); 517 } 518 519 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 520 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 521 if (io_ctl->index < io_ctl->num_pages) 522 io_ctl_map_page(io_ctl, 0); 523 return 0; 524 } 525 526 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 527 { 528 /* 529 * If we're not on the boundary we know we've modified the page and we 530 * need to crc the page. 531 */ 532 if (io_ctl->cur != io_ctl->orig) 533 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 534 else 535 io_ctl_unmap_page(io_ctl); 536 537 while (io_ctl->index < io_ctl->num_pages) { 538 io_ctl_map_page(io_ctl, 1); 539 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 540 } 541 } 542 543 static int io_ctl_read_entry(struct io_ctl *io_ctl, 544 struct btrfs_free_space *entry, u8 *type) 545 { 546 struct btrfs_free_space_entry *e; 547 int ret; 548 549 if (!io_ctl->cur) { 550 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 551 if (ret) 552 return ret; 553 } 554 555 e = io_ctl->cur; 556 entry->offset = le64_to_cpu(e->offset); 557 entry->bytes = le64_to_cpu(e->bytes); 558 *type = e->type; 559 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 560 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 561 562 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 563 return 0; 564 565 io_ctl_unmap_page(io_ctl); 566 567 return 0; 568 } 569 570 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 571 struct btrfs_free_space *entry) 572 { 573 int ret; 574 575 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 576 if (ret) 577 return ret; 578 579 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 580 io_ctl_unmap_page(io_ctl); 581 582 return 0; 583 } 584 585 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 586 struct btrfs_free_space_ctl *ctl, 587 struct btrfs_path *path, u64 offset) 588 { 589 struct btrfs_free_space_header *header; 590 struct extent_buffer *leaf; 591 struct io_ctl io_ctl; 592 struct btrfs_key key; 593 struct btrfs_free_space *e, *n; 594 struct list_head bitmaps; 595 u64 num_entries; 596 u64 num_bitmaps; 597 u64 generation; 598 u8 type; 599 int ret = 0; 600 601 INIT_LIST_HEAD(&bitmaps); 602 603 /* Nothing in the space cache, goodbye */ 604 if (!i_size_read(inode)) 605 return 0; 606 607 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 608 key.offset = offset; 609 key.type = 0; 610 611 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 612 if (ret < 0) 613 return 0; 614 else if (ret > 0) { 615 btrfs_release_path(path); 616 return 0; 617 } 618 619 ret = -1; 620 621 leaf = path->nodes[0]; 622 header = btrfs_item_ptr(leaf, path->slots[0], 623 struct btrfs_free_space_header); 624 num_entries = btrfs_free_space_entries(leaf, header); 625 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 626 generation = btrfs_free_space_generation(leaf, header); 627 btrfs_release_path(path); 628 629 if (BTRFS_I(inode)->generation != generation) { 630 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 631 " not match free space cache generation (%llu)\n", 632 (unsigned long long)BTRFS_I(inode)->generation, 633 (unsigned long long)generation); 634 return 0; 635 } 636 637 if (!num_entries) 638 return 0; 639 640 ret = io_ctl_init(&io_ctl, inode, root); 641 if (ret) 642 return ret; 643 644 ret = readahead_cache(inode); 645 if (ret) 646 goto out; 647 648 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 649 if (ret) 650 goto out; 651 652 ret = io_ctl_check_crc(&io_ctl, 0); 653 if (ret) 654 goto free_cache; 655 656 ret = io_ctl_check_generation(&io_ctl, generation); 657 if (ret) 658 goto free_cache; 659 660 while (num_entries) { 661 e = kmem_cache_zalloc(btrfs_free_space_cachep, 662 GFP_NOFS); 663 if (!e) 664 goto free_cache; 665 666 ret = io_ctl_read_entry(&io_ctl, e, &type); 667 if (ret) { 668 kmem_cache_free(btrfs_free_space_cachep, e); 669 goto free_cache; 670 } 671 672 if (!e->bytes) { 673 kmem_cache_free(btrfs_free_space_cachep, e); 674 goto free_cache; 675 } 676 677 if (type == BTRFS_FREE_SPACE_EXTENT) { 678 spin_lock(&ctl->tree_lock); 679 ret = link_free_space(ctl, e); 680 spin_unlock(&ctl->tree_lock); 681 if (ret) { 682 printk(KERN_ERR "Duplicate entries in " 683 "free space cache, dumping\n"); 684 kmem_cache_free(btrfs_free_space_cachep, e); 685 goto free_cache; 686 } 687 } else { 688 BUG_ON(!num_bitmaps); 689 num_bitmaps--; 690 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 691 if (!e->bitmap) { 692 kmem_cache_free( 693 btrfs_free_space_cachep, e); 694 goto free_cache; 695 } 696 spin_lock(&ctl->tree_lock); 697 ret = link_free_space(ctl, e); 698 ctl->total_bitmaps++; 699 ctl->op->recalc_thresholds(ctl); 700 spin_unlock(&ctl->tree_lock); 701 if (ret) { 702 printk(KERN_ERR "Duplicate entries in " 703 "free space cache, dumping\n"); 704 kmem_cache_free(btrfs_free_space_cachep, e); 705 goto free_cache; 706 } 707 list_add_tail(&e->list, &bitmaps); 708 } 709 710 num_entries--; 711 } 712 713 io_ctl_unmap_page(&io_ctl); 714 715 /* 716 * We add the bitmaps at the end of the entries in order that 717 * the bitmap entries are added to the cache. 718 */ 719 list_for_each_entry_safe(e, n, &bitmaps, list) { 720 list_del_init(&e->list); 721 ret = io_ctl_read_bitmap(&io_ctl, e); 722 if (ret) 723 goto free_cache; 724 } 725 726 io_ctl_drop_pages(&io_ctl); 727 ret = 1; 728 out: 729 io_ctl_free(&io_ctl); 730 return ret; 731 free_cache: 732 io_ctl_drop_pages(&io_ctl); 733 __btrfs_remove_free_space_cache(ctl); 734 goto out; 735 } 736 737 int load_free_space_cache(struct btrfs_fs_info *fs_info, 738 struct btrfs_block_group_cache *block_group) 739 { 740 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 741 struct btrfs_root *root = fs_info->tree_root; 742 struct inode *inode; 743 struct btrfs_path *path; 744 int ret = 0; 745 bool matched; 746 u64 used = btrfs_block_group_used(&block_group->item); 747 748 /* 749 * If we're unmounting then just return, since this does a search on the 750 * normal root and not the commit root and we could deadlock. 751 */ 752 if (btrfs_fs_closing(fs_info)) 753 return 0; 754 755 /* 756 * If this block group has been marked to be cleared for one reason or 757 * another then we can't trust the on disk cache, so just return. 758 */ 759 spin_lock(&block_group->lock); 760 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 761 spin_unlock(&block_group->lock); 762 return 0; 763 } 764 spin_unlock(&block_group->lock); 765 766 path = btrfs_alloc_path(); 767 if (!path) 768 return 0; 769 770 inode = lookup_free_space_inode(root, block_group, path); 771 if (IS_ERR(inode)) { 772 btrfs_free_path(path); 773 return 0; 774 } 775 776 /* We may have converted the inode and made the cache invalid. */ 777 spin_lock(&block_group->lock); 778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 779 spin_unlock(&block_group->lock); 780 goto out; 781 } 782 spin_unlock(&block_group->lock); 783 784 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 785 path, block_group->key.objectid); 786 btrfs_free_path(path); 787 if (ret <= 0) 788 goto out; 789 790 spin_lock(&ctl->tree_lock); 791 matched = (ctl->free_space == (block_group->key.offset - used - 792 block_group->bytes_super)); 793 spin_unlock(&ctl->tree_lock); 794 795 if (!matched) { 796 __btrfs_remove_free_space_cache(ctl); 797 printk(KERN_ERR "block group %llu has an wrong amount of free " 798 "space\n", block_group->key.objectid); 799 ret = -1; 800 } 801 out: 802 if (ret < 0) { 803 /* This cache is bogus, make sure it gets cleared */ 804 spin_lock(&block_group->lock); 805 block_group->disk_cache_state = BTRFS_DC_CLEAR; 806 spin_unlock(&block_group->lock); 807 ret = 0; 808 809 printk(KERN_ERR "btrfs: failed to load free space cache " 810 "for block group %llu\n", block_group->key.objectid); 811 } 812 813 iput(inode); 814 return ret; 815 } 816 817 /** 818 * __btrfs_write_out_cache - write out cached info to an inode 819 * @root - the root the inode belongs to 820 * @ctl - the free space cache we are going to write out 821 * @block_group - the block_group for this cache if it belongs to a block_group 822 * @trans - the trans handle 823 * @path - the path to use 824 * @offset - the offset for the key we'll insert 825 * 826 * This function writes out a free space cache struct to disk for quick recovery 827 * on mount. This will return 0 if it was successfull in writing the cache out, 828 * and -1 if it was not. 829 */ 830 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 831 struct btrfs_free_space_ctl *ctl, 832 struct btrfs_block_group_cache *block_group, 833 struct btrfs_trans_handle *trans, 834 struct btrfs_path *path, u64 offset) 835 { 836 struct btrfs_free_space_header *header; 837 struct extent_buffer *leaf; 838 struct rb_node *node; 839 struct list_head *pos, *n; 840 struct extent_state *cached_state = NULL; 841 struct btrfs_free_cluster *cluster = NULL; 842 struct extent_io_tree *unpin = NULL; 843 struct io_ctl io_ctl; 844 struct list_head bitmap_list; 845 struct btrfs_key key; 846 u64 start, extent_start, extent_end, len; 847 int entries = 0; 848 int bitmaps = 0; 849 int ret; 850 int err = -1; 851 852 INIT_LIST_HEAD(&bitmap_list); 853 854 if (!i_size_read(inode)) 855 return -1; 856 857 ret = io_ctl_init(&io_ctl, inode, root); 858 if (ret) 859 return -1; 860 861 /* Get the cluster for this block_group if it exists */ 862 if (block_group && !list_empty(&block_group->cluster_list)) 863 cluster = list_entry(block_group->cluster_list.next, 864 struct btrfs_free_cluster, 865 block_group_list); 866 867 /* Lock all pages first so we can lock the extent safely. */ 868 io_ctl_prepare_pages(&io_ctl, inode, 0); 869 870 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 871 0, &cached_state, GFP_NOFS); 872 873 node = rb_first(&ctl->free_space_offset); 874 if (!node && cluster) { 875 node = rb_first(&cluster->root); 876 cluster = NULL; 877 } 878 879 /* Make sure we can fit our crcs into the first page */ 880 if (io_ctl.check_crcs && 881 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 882 WARN_ON(1); 883 goto out_nospc; 884 } 885 886 io_ctl_set_generation(&io_ctl, trans->transid); 887 888 /* Write out the extent entries */ 889 while (node) { 890 struct btrfs_free_space *e; 891 892 e = rb_entry(node, struct btrfs_free_space, offset_index); 893 entries++; 894 895 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, 896 e->bitmap); 897 if (ret) 898 goto out_nospc; 899 900 if (e->bitmap) { 901 list_add_tail(&e->list, &bitmap_list); 902 bitmaps++; 903 } 904 node = rb_next(node); 905 if (!node && cluster) { 906 node = rb_first(&cluster->root); 907 cluster = NULL; 908 } 909 } 910 911 /* 912 * We want to add any pinned extents to our free space cache 913 * so we don't leak the space 914 */ 915 916 /* 917 * We shouldn't have switched the pinned extents yet so this is the 918 * right one 919 */ 920 unpin = root->fs_info->pinned_extents; 921 922 if (block_group) 923 start = block_group->key.objectid; 924 925 while (block_group && (start < block_group->key.objectid + 926 block_group->key.offset)) { 927 ret = find_first_extent_bit(unpin, start, 928 &extent_start, &extent_end, 929 EXTENT_DIRTY); 930 if (ret) { 931 ret = 0; 932 break; 933 } 934 935 /* This pinned extent is out of our range */ 936 if (extent_start >= block_group->key.objectid + 937 block_group->key.offset) 938 break; 939 940 extent_start = max(extent_start, start); 941 extent_end = min(block_group->key.objectid + 942 block_group->key.offset, extent_end + 1); 943 len = extent_end - extent_start; 944 945 entries++; 946 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); 947 if (ret) 948 goto out_nospc; 949 950 start = extent_end; 951 } 952 953 /* Write out the bitmaps */ 954 list_for_each_safe(pos, n, &bitmap_list) { 955 struct btrfs_free_space *entry = 956 list_entry(pos, struct btrfs_free_space, list); 957 958 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); 959 if (ret) 960 goto out_nospc; 961 list_del_init(&entry->list); 962 } 963 964 /* Zero out the rest of the pages just to make sure */ 965 io_ctl_zero_remaining_pages(&io_ctl); 966 967 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 968 0, i_size_read(inode), &cached_state); 969 io_ctl_drop_pages(&io_ctl); 970 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 971 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 972 973 if (ret) 974 goto out; 975 976 977 ret = filemap_write_and_wait(inode->i_mapping); 978 if (ret) 979 goto out; 980 981 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 982 key.offset = offset; 983 key.type = 0; 984 985 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 986 if (ret < 0) { 987 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 988 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 989 GFP_NOFS); 990 goto out; 991 } 992 leaf = path->nodes[0]; 993 if (ret > 0) { 994 struct btrfs_key found_key; 995 BUG_ON(!path->slots[0]); 996 path->slots[0]--; 997 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 998 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 999 found_key.offset != offset) { 1000 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1001 inode->i_size - 1, 1002 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1003 NULL, GFP_NOFS); 1004 btrfs_release_path(path); 1005 goto out; 1006 } 1007 } 1008 1009 BTRFS_I(inode)->generation = trans->transid; 1010 header = btrfs_item_ptr(leaf, path->slots[0], 1011 struct btrfs_free_space_header); 1012 btrfs_set_free_space_entries(leaf, header, entries); 1013 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1014 btrfs_set_free_space_generation(leaf, header, trans->transid); 1015 btrfs_mark_buffer_dirty(leaf); 1016 btrfs_release_path(path); 1017 1018 err = 0; 1019 out: 1020 io_ctl_free(&io_ctl); 1021 if (err) { 1022 invalidate_inode_pages2(inode->i_mapping); 1023 BTRFS_I(inode)->generation = 0; 1024 } 1025 btrfs_update_inode(trans, root, inode); 1026 return err; 1027 1028 out_nospc: 1029 list_for_each_safe(pos, n, &bitmap_list) { 1030 struct btrfs_free_space *entry = 1031 list_entry(pos, struct btrfs_free_space, list); 1032 list_del_init(&entry->list); 1033 } 1034 io_ctl_drop_pages(&io_ctl); 1035 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1036 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1037 goto out; 1038 } 1039 1040 int btrfs_write_out_cache(struct btrfs_root *root, 1041 struct btrfs_trans_handle *trans, 1042 struct btrfs_block_group_cache *block_group, 1043 struct btrfs_path *path) 1044 { 1045 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1046 struct inode *inode; 1047 int ret = 0; 1048 1049 root = root->fs_info->tree_root; 1050 1051 spin_lock(&block_group->lock); 1052 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1053 spin_unlock(&block_group->lock); 1054 return 0; 1055 } 1056 spin_unlock(&block_group->lock); 1057 1058 inode = lookup_free_space_inode(root, block_group, path); 1059 if (IS_ERR(inode)) 1060 return 0; 1061 1062 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1063 path, block_group->key.objectid); 1064 if (ret) { 1065 spin_lock(&block_group->lock); 1066 block_group->disk_cache_state = BTRFS_DC_ERROR; 1067 spin_unlock(&block_group->lock); 1068 ret = 0; 1069 #ifdef DEBUG 1070 printk(KERN_ERR "btrfs: failed to write free space cace " 1071 "for block group %llu\n", block_group->key.objectid); 1072 #endif 1073 } 1074 1075 iput(inode); 1076 return ret; 1077 } 1078 1079 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1080 u64 offset) 1081 { 1082 BUG_ON(offset < bitmap_start); 1083 offset -= bitmap_start; 1084 return (unsigned long)(div_u64(offset, unit)); 1085 } 1086 1087 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1088 { 1089 return (unsigned long)(div_u64(bytes, unit)); 1090 } 1091 1092 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1093 u64 offset) 1094 { 1095 u64 bitmap_start; 1096 u64 bytes_per_bitmap; 1097 1098 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1099 bitmap_start = offset - ctl->start; 1100 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1101 bitmap_start *= bytes_per_bitmap; 1102 bitmap_start += ctl->start; 1103 1104 return bitmap_start; 1105 } 1106 1107 static int tree_insert_offset(struct rb_root *root, u64 offset, 1108 struct rb_node *node, int bitmap) 1109 { 1110 struct rb_node **p = &root->rb_node; 1111 struct rb_node *parent = NULL; 1112 struct btrfs_free_space *info; 1113 1114 while (*p) { 1115 parent = *p; 1116 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1117 1118 if (offset < info->offset) { 1119 p = &(*p)->rb_left; 1120 } else if (offset > info->offset) { 1121 p = &(*p)->rb_right; 1122 } else { 1123 /* 1124 * we could have a bitmap entry and an extent entry 1125 * share the same offset. If this is the case, we want 1126 * the extent entry to always be found first if we do a 1127 * linear search through the tree, since we want to have 1128 * the quickest allocation time, and allocating from an 1129 * extent is faster than allocating from a bitmap. So 1130 * if we're inserting a bitmap and we find an entry at 1131 * this offset, we want to go right, or after this entry 1132 * logically. If we are inserting an extent and we've 1133 * found a bitmap, we want to go left, or before 1134 * logically. 1135 */ 1136 if (bitmap) { 1137 if (info->bitmap) { 1138 WARN_ON_ONCE(1); 1139 return -EEXIST; 1140 } 1141 p = &(*p)->rb_right; 1142 } else { 1143 if (!info->bitmap) { 1144 WARN_ON_ONCE(1); 1145 return -EEXIST; 1146 } 1147 p = &(*p)->rb_left; 1148 } 1149 } 1150 } 1151 1152 rb_link_node(node, parent, p); 1153 rb_insert_color(node, root); 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * searches the tree for the given offset. 1160 * 1161 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1162 * want a section that has at least bytes size and comes at or after the given 1163 * offset. 1164 */ 1165 static struct btrfs_free_space * 1166 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1167 u64 offset, int bitmap_only, int fuzzy) 1168 { 1169 struct rb_node *n = ctl->free_space_offset.rb_node; 1170 struct btrfs_free_space *entry, *prev = NULL; 1171 1172 /* find entry that is closest to the 'offset' */ 1173 while (1) { 1174 if (!n) { 1175 entry = NULL; 1176 break; 1177 } 1178 1179 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1180 prev = entry; 1181 1182 if (offset < entry->offset) 1183 n = n->rb_left; 1184 else if (offset > entry->offset) 1185 n = n->rb_right; 1186 else 1187 break; 1188 } 1189 1190 if (bitmap_only) { 1191 if (!entry) 1192 return NULL; 1193 if (entry->bitmap) 1194 return entry; 1195 1196 /* 1197 * bitmap entry and extent entry may share same offset, 1198 * in that case, bitmap entry comes after extent entry. 1199 */ 1200 n = rb_next(n); 1201 if (!n) 1202 return NULL; 1203 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1204 if (entry->offset != offset) 1205 return NULL; 1206 1207 WARN_ON(!entry->bitmap); 1208 return entry; 1209 } else if (entry) { 1210 if (entry->bitmap) { 1211 /* 1212 * if previous extent entry covers the offset, 1213 * we should return it instead of the bitmap entry 1214 */ 1215 n = &entry->offset_index; 1216 while (1) { 1217 n = rb_prev(n); 1218 if (!n) 1219 break; 1220 prev = rb_entry(n, struct btrfs_free_space, 1221 offset_index); 1222 if (!prev->bitmap) { 1223 if (prev->offset + prev->bytes > offset) 1224 entry = prev; 1225 break; 1226 } 1227 } 1228 } 1229 return entry; 1230 } 1231 1232 if (!prev) 1233 return NULL; 1234 1235 /* find last entry before the 'offset' */ 1236 entry = prev; 1237 if (entry->offset > offset) { 1238 n = rb_prev(&entry->offset_index); 1239 if (n) { 1240 entry = rb_entry(n, struct btrfs_free_space, 1241 offset_index); 1242 BUG_ON(entry->offset > offset); 1243 } else { 1244 if (fuzzy) 1245 return entry; 1246 else 1247 return NULL; 1248 } 1249 } 1250 1251 if (entry->bitmap) { 1252 n = &entry->offset_index; 1253 while (1) { 1254 n = rb_prev(n); 1255 if (!n) 1256 break; 1257 prev = rb_entry(n, struct btrfs_free_space, 1258 offset_index); 1259 if (!prev->bitmap) { 1260 if (prev->offset + prev->bytes > offset) 1261 return prev; 1262 break; 1263 } 1264 } 1265 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1266 return entry; 1267 } else if (entry->offset + entry->bytes > offset) 1268 return entry; 1269 1270 if (!fuzzy) 1271 return NULL; 1272 1273 while (1) { 1274 if (entry->bitmap) { 1275 if (entry->offset + BITS_PER_BITMAP * 1276 ctl->unit > offset) 1277 break; 1278 } else { 1279 if (entry->offset + entry->bytes > offset) 1280 break; 1281 } 1282 1283 n = rb_next(&entry->offset_index); 1284 if (!n) 1285 return NULL; 1286 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1287 } 1288 return entry; 1289 } 1290 1291 static inline void 1292 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1293 struct btrfs_free_space *info) 1294 { 1295 rb_erase(&info->offset_index, &ctl->free_space_offset); 1296 ctl->free_extents--; 1297 } 1298 1299 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1300 struct btrfs_free_space *info) 1301 { 1302 __unlink_free_space(ctl, info); 1303 ctl->free_space -= info->bytes; 1304 } 1305 1306 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1307 struct btrfs_free_space *info) 1308 { 1309 int ret = 0; 1310 1311 BUG_ON(!info->bitmap && !info->bytes); 1312 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1313 &info->offset_index, (info->bitmap != NULL)); 1314 if (ret) 1315 return ret; 1316 1317 ctl->free_space += info->bytes; 1318 ctl->free_extents++; 1319 return ret; 1320 } 1321 1322 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1323 { 1324 struct btrfs_block_group_cache *block_group = ctl->private; 1325 u64 max_bytes; 1326 u64 bitmap_bytes; 1327 u64 extent_bytes; 1328 u64 size = block_group->key.offset; 1329 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1330 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1331 1332 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1333 1334 /* 1335 * The goal is to keep the total amount of memory used per 1gb of space 1336 * at or below 32k, so we need to adjust how much memory we allow to be 1337 * used by extent based free space tracking 1338 */ 1339 if (size < 1024 * 1024 * 1024) 1340 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1341 else 1342 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1343 div64_u64(size, 1024 * 1024 * 1024); 1344 1345 /* 1346 * we want to account for 1 more bitmap than what we have so we can make 1347 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1348 * we add more bitmaps. 1349 */ 1350 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1351 1352 if (bitmap_bytes >= max_bytes) { 1353 ctl->extents_thresh = 0; 1354 return; 1355 } 1356 1357 /* 1358 * we want the extent entry threshold to always be at most 1/2 the maxw 1359 * bytes we can have, or whatever is less than that. 1360 */ 1361 extent_bytes = max_bytes - bitmap_bytes; 1362 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1363 1364 ctl->extents_thresh = 1365 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1366 } 1367 1368 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1369 struct btrfs_free_space *info, 1370 u64 offset, u64 bytes) 1371 { 1372 unsigned long start, count; 1373 1374 start = offset_to_bit(info->offset, ctl->unit, offset); 1375 count = bytes_to_bits(bytes, ctl->unit); 1376 BUG_ON(start + count > BITS_PER_BITMAP); 1377 1378 bitmap_clear(info->bitmap, start, count); 1379 1380 info->bytes -= bytes; 1381 } 1382 1383 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1384 struct btrfs_free_space *info, u64 offset, 1385 u64 bytes) 1386 { 1387 __bitmap_clear_bits(ctl, info, offset, bytes); 1388 ctl->free_space -= bytes; 1389 } 1390 1391 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1392 struct btrfs_free_space *info, u64 offset, 1393 u64 bytes) 1394 { 1395 unsigned long start, count; 1396 1397 start = offset_to_bit(info->offset, ctl->unit, offset); 1398 count = bytes_to_bits(bytes, ctl->unit); 1399 BUG_ON(start + count > BITS_PER_BITMAP); 1400 1401 bitmap_set(info->bitmap, start, count); 1402 1403 info->bytes += bytes; 1404 ctl->free_space += bytes; 1405 } 1406 1407 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1408 struct btrfs_free_space *bitmap_info, u64 *offset, 1409 u64 *bytes) 1410 { 1411 unsigned long found_bits = 0; 1412 unsigned long bits, i; 1413 unsigned long next_zero; 1414 1415 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1416 max_t(u64, *offset, bitmap_info->offset)); 1417 bits = bytes_to_bits(*bytes, ctl->unit); 1418 1419 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1420 i < BITS_PER_BITMAP; 1421 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1422 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1423 BITS_PER_BITMAP, i); 1424 if ((next_zero - i) >= bits) { 1425 found_bits = next_zero - i; 1426 break; 1427 } 1428 i = next_zero; 1429 } 1430 1431 if (found_bits) { 1432 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1433 *bytes = (u64)(found_bits) * ctl->unit; 1434 return 0; 1435 } 1436 1437 return -1; 1438 } 1439 1440 static struct btrfs_free_space * 1441 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1442 { 1443 struct btrfs_free_space *entry; 1444 struct rb_node *node; 1445 int ret; 1446 1447 if (!ctl->free_space_offset.rb_node) 1448 return NULL; 1449 1450 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1451 if (!entry) 1452 return NULL; 1453 1454 for (node = &entry->offset_index; node; node = rb_next(node)) { 1455 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1456 if (entry->bytes < *bytes) 1457 continue; 1458 1459 if (entry->bitmap) { 1460 ret = search_bitmap(ctl, entry, offset, bytes); 1461 if (!ret) 1462 return entry; 1463 continue; 1464 } 1465 1466 *offset = entry->offset; 1467 *bytes = entry->bytes; 1468 return entry; 1469 } 1470 1471 return NULL; 1472 } 1473 1474 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1475 struct btrfs_free_space *info, u64 offset) 1476 { 1477 info->offset = offset_to_bitmap(ctl, offset); 1478 info->bytes = 0; 1479 INIT_LIST_HEAD(&info->list); 1480 link_free_space(ctl, info); 1481 ctl->total_bitmaps++; 1482 1483 ctl->op->recalc_thresholds(ctl); 1484 } 1485 1486 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1487 struct btrfs_free_space *bitmap_info) 1488 { 1489 unlink_free_space(ctl, bitmap_info); 1490 kfree(bitmap_info->bitmap); 1491 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1492 ctl->total_bitmaps--; 1493 ctl->op->recalc_thresholds(ctl); 1494 } 1495 1496 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1497 struct btrfs_free_space *bitmap_info, 1498 u64 *offset, u64 *bytes) 1499 { 1500 u64 end; 1501 u64 search_start, search_bytes; 1502 int ret; 1503 1504 again: 1505 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1506 1507 /* 1508 * XXX - this can go away after a few releases. 1509 * 1510 * since the only user of btrfs_remove_free_space is the tree logging 1511 * stuff, and the only way to test that is under crash conditions, we 1512 * want to have this debug stuff here just in case somethings not 1513 * working. Search the bitmap for the space we are trying to use to 1514 * make sure its actually there. If its not there then we need to stop 1515 * because something has gone wrong. 1516 */ 1517 search_start = *offset; 1518 search_bytes = *bytes; 1519 search_bytes = min(search_bytes, end - search_start + 1); 1520 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1521 BUG_ON(ret < 0 || search_start != *offset); 1522 1523 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1524 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1525 *bytes -= end - *offset + 1; 1526 *offset = end + 1; 1527 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1528 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1529 *bytes = 0; 1530 } 1531 1532 if (*bytes) { 1533 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1534 if (!bitmap_info->bytes) 1535 free_bitmap(ctl, bitmap_info); 1536 1537 /* 1538 * no entry after this bitmap, but we still have bytes to 1539 * remove, so something has gone wrong. 1540 */ 1541 if (!next) 1542 return -EINVAL; 1543 1544 bitmap_info = rb_entry(next, struct btrfs_free_space, 1545 offset_index); 1546 1547 /* 1548 * if the next entry isn't a bitmap we need to return to let the 1549 * extent stuff do its work. 1550 */ 1551 if (!bitmap_info->bitmap) 1552 return -EAGAIN; 1553 1554 /* 1555 * Ok the next item is a bitmap, but it may not actually hold 1556 * the information for the rest of this free space stuff, so 1557 * look for it, and if we don't find it return so we can try 1558 * everything over again. 1559 */ 1560 search_start = *offset; 1561 search_bytes = *bytes; 1562 ret = search_bitmap(ctl, bitmap_info, &search_start, 1563 &search_bytes); 1564 if (ret < 0 || search_start != *offset) 1565 return -EAGAIN; 1566 1567 goto again; 1568 } else if (!bitmap_info->bytes) 1569 free_bitmap(ctl, bitmap_info); 1570 1571 return 0; 1572 } 1573 1574 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1575 struct btrfs_free_space *info, u64 offset, 1576 u64 bytes) 1577 { 1578 u64 bytes_to_set = 0; 1579 u64 end; 1580 1581 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1582 1583 bytes_to_set = min(end - offset, bytes); 1584 1585 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1586 1587 return bytes_to_set; 1588 1589 } 1590 1591 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1592 struct btrfs_free_space *info) 1593 { 1594 struct btrfs_block_group_cache *block_group = ctl->private; 1595 1596 /* 1597 * If we are below the extents threshold then we can add this as an 1598 * extent, and don't have to deal with the bitmap 1599 */ 1600 if (ctl->free_extents < ctl->extents_thresh) { 1601 /* 1602 * If this block group has some small extents we don't want to 1603 * use up all of our free slots in the cache with them, we want 1604 * to reserve them to larger extents, however if we have plent 1605 * of cache left then go ahead an dadd them, no sense in adding 1606 * the overhead of a bitmap if we don't have to. 1607 */ 1608 if (info->bytes <= block_group->sectorsize * 4) { 1609 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1610 return false; 1611 } else { 1612 return false; 1613 } 1614 } 1615 1616 /* 1617 * some block groups are so tiny they can't be enveloped by a bitmap, so 1618 * don't even bother to create a bitmap for this 1619 */ 1620 if (BITS_PER_BITMAP * block_group->sectorsize > 1621 block_group->key.offset) 1622 return false; 1623 1624 return true; 1625 } 1626 1627 static struct btrfs_free_space_op free_space_op = { 1628 .recalc_thresholds = recalculate_thresholds, 1629 .use_bitmap = use_bitmap, 1630 }; 1631 1632 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1633 struct btrfs_free_space *info) 1634 { 1635 struct btrfs_free_space *bitmap_info; 1636 struct btrfs_block_group_cache *block_group = NULL; 1637 int added = 0; 1638 u64 bytes, offset, bytes_added; 1639 int ret; 1640 1641 bytes = info->bytes; 1642 offset = info->offset; 1643 1644 if (!ctl->op->use_bitmap(ctl, info)) 1645 return 0; 1646 1647 if (ctl->op == &free_space_op) 1648 block_group = ctl->private; 1649 again: 1650 /* 1651 * Since we link bitmaps right into the cluster we need to see if we 1652 * have a cluster here, and if so and it has our bitmap we need to add 1653 * the free space to that bitmap. 1654 */ 1655 if (block_group && !list_empty(&block_group->cluster_list)) { 1656 struct btrfs_free_cluster *cluster; 1657 struct rb_node *node; 1658 struct btrfs_free_space *entry; 1659 1660 cluster = list_entry(block_group->cluster_list.next, 1661 struct btrfs_free_cluster, 1662 block_group_list); 1663 spin_lock(&cluster->lock); 1664 node = rb_first(&cluster->root); 1665 if (!node) { 1666 spin_unlock(&cluster->lock); 1667 goto no_cluster_bitmap; 1668 } 1669 1670 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1671 if (!entry->bitmap) { 1672 spin_unlock(&cluster->lock); 1673 goto no_cluster_bitmap; 1674 } 1675 1676 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1677 bytes_added = add_bytes_to_bitmap(ctl, entry, 1678 offset, bytes); 1679 bytes -= bytes_added; 1680 offset += bytes_added; 1681 } 1682 spin_unlock(&cluster->lock); 1683 if (!bytes) { 1684 ret = 1; 1685 goto out; 1686 } 1687 } 1688 1689 no_cluster_bitmap: 1690 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1691 1, 0); 1692 if (!bitmap_info) { 1693 BUG_ON(added); 1694 goto new_bitmap; 1695 } 1696 1697 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1698 bytes -= bytes_added; 1699 offset += bytes_added; 1700 added = 0; 1701 1702 if (!bytes) { 1703 ret = 1; 1704 goto out; 1705 } else 1706 goto again; 1707 1708 new_bitmap: 1709 if (info && info->bitmap) { 1710 add_new_bitmap(ctl, info, offset); 1711 added = 1; 1712 info = NULL; 1713 goto again; 1714 } else { 1715 spin_unlock(&ctl->tree_lock); 1716 1717 /* no pre-allocated info, allocate a new one */ 1718 if (!info) { 1719 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1720 GFP_NOFS); 1721 if (!info) { 1722 spin_lock(&ctl->tree_lock); 1723 ret = -ENOMEM; 1724 goto out; 1725 } 1726 } 1727 1728 /* allocate the bitmap */ 1729 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1730 spin_lock(&ctl->tree_lock); 1731 if (!info->bitmap) { 1732 ret = -ENOMEM; 1733 goto out; 1734 } 1735 goto again; 1736 } 1737 1738 out: 1739 if (info) { 1740 if (info->bitmap) 1741 kfree(info->bitmap); 1742 kmem_cache_free(btrfs_free_space_cachep, info); 1743 } 1744 1745 return ret; 1746 } 1747 1748 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1749 struct btrfs_free_space *info, bool update_stat) 1750 { 1751 struct btrfs_free_space *left_info; 1752 struct btrfs_free_space *right_info; 1753 bool merged = false; 1754 u64 offset = info->offset; 1755 u64 bytes = info->bytes; 1756 1757 /* 1758 * first we want to see if there is free space adjacent to the range we 1759 * are adding, if there is remove that struct and add a new one to 1760 * cover the entire range 1761 */ 1762 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1763 if (right_info && rb_prev(&right_info->offset_index)) 1764 left_info = rb_entry(rb_prev(&right_info->offset_index), 1765 struct btrfs_free_space, offset_index); 1766 else 1767 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1768 1769 if (right_info && !right_info->bitmap) { 1770 if (update_stat) 1771 unlink_free_space(ctl, right_info); 1772 else 1773 __unlink_free_space(ctl, right_info); 1774 info->bytes += right_info->bytes; 1775 kmem_cache_free(btrfs_free_space_cachep, right_info); 1776 merged = true; 1777 } 1778 1779 if (left_info && !left_info->bitmap && 1780 left_info->offset + left_info->bytes == offset) { 1781 if (update_stat) 1782 unlink_free_space(ctl, left_info); 1783 else 1784 __unlink_free_space(ctl, left_info); 1785 info->offset = left_info->offset; 1786 info->bytes += left_info->bytes; 1787 kmem_cache_free(btrfs_free_space_cachep, left_info); 1788 merged = true; 1789 } 1790 1791 return merged; 1792 } 1793 1794 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1795 u64 offset, u64 bytes) 1796 { 1797 struct btrfs_free_space *info; 1798 int ret = 0; 1799 1800 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1801 if (!info) 1802 return -ENOMEM; 1803 1804 info->offset = offset; 1805 info->bytes = bytes; 1806 1807 spin_lock(&ctl->tree_lock); 1808 1809 if (try_merge_free_space(ctl, info, true)) 1810 goto link; 1811 1812 /* 1813 * There was no extent directly to the left or right of this new 1814 * extent then we know we're going to have to allocate a new extent, so 1815 * before we do that see if we need to drop this into a bitmap 1816 */ 1817 ret = insert_into_bitmap(ctl, info); 1818 if (ret < 0) { 1819 goto out; 1820 } else if (ret) { 1821 ret = 0; 1822 goto out; 1823 } 1824 link: 1825 ret = link_free_space(ctl, info); 1826 if (ret) 1827 kmem_cache_free(btrfs_free_space_cachep, info); 1828 out: 1829 spin_unlock(&ctl->tree_lock); 1830 1831 if (ret) { 1832 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1833 BUG_ON(ret == -EEXIST); 1834 } 1835 1836 return ret; 1837 } 1838 1839 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1840 u64 offset, u64 bytes) 1841 { 1842 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1843 struct btrfs_free_space *info; 1844 struct btrfs_free_space *next_info = NULL; 1845 int ret = 0; 1846 1847 spin_lock(&ctl->tree_lock); 1848 1849 again: 1850 info = tree_search_offset(ctl, offset, 0, 0); 1851 if (!info) { 1852 /* 1853 * oops didn't find an extent that matched the space we wanted 1854 * to remove, look for a bitmap instead 1855 */ 1856 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1857 1, 0); 1858 if (!info) { 1859 /* the tree logging code might be calling us before we 1860 * have fully loaded the free space rbtree for this 1861 * block group. So it is possible the entry won't 1862 * be in the rbtree yet at all. The caching code 1863 * will make sure not to put it in the rbtree if 1864 * the logging code has pinned it. 1865 */ 1866 goto out_lock; 1867 } 1868 } 1869 1870 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1871 u64 end; 1872 next_info = rb_entry(rb_next(&info->offset_index), 1873 struct btrfs_free_space, 1874 offset_index); 1875 1876 if (next_info->bitmap) 1877 end = next_info->offset + 1878 BITS_PER_BITMAP * ctl->unit - 1; 1879 else 1880 end = next_info->offset + next_info->bytes; 1881 1882 if (next_info->bytes < bytes || 1883 next_info->offset > offset || offset > end) { 1884 printk(KERN_CRIT "Found free space at %llu, size %llu," 1885 " trying to use %llu\n", 1886 (unsigned long long)info->offset, 1887 (unsigned long long)info->bytes, 1888 (unsigned long long)bytes); 1889 WARN_ON(1); 1890 ret = -EINVAL; 1891 goto out_lock; 1892 } 1893 1894 info = next_info; 1895 } 1896 1897 if (info->bytes == bytes) { 1898 unlink_free_space(ctl, info); 1899 if (info->bitmap) { 1900 kfree(info->bitmap); 1901 ctl->total_bitmaps--; 1902 } 1903 kmem_cache_free(btrfs_free_space_cachep, info); 1904 ret = 0; 1905 goto out_lock; 1906 } 1907 1908 if (!info->bitmap && info->offset == offset) { 1909 unlink_free_space(ctl, info); 1910 info->offset += bytes; 1911 info->bytes -= bytes; 1912 ret = link_free_space(ctl, info); 1913 WARN_ON(ret); 1914 goto out_lock; 1915 } 1916 1917 if (!info->bitmap && info->offset <= offset && 1918 info->offset + info->bytes >= offset + bytes) { 1919 u64 old_start = info->offset; 1920 /* 1921 * we're freeing space in the middle of the info, 1922 * this can happen during tree log replay 1923 * 1924 * first unlink the old info and then 1925 * insert it again after the hole we're creating 1926 */ 1927 unlink_free_space(ctl, info); 1928 if (offset + bytes < info->offset + info->bytes) { 1929 u64 old_end = info->offset + info->bytes; 1930 1931 info->offset = offset + bytes; 1932 info->bytes = old_end - info->offset; 1933 ret = link_free_space(ctl, info); 1934 WARN_ON(ret); 1935 if (ret) 1936 goto out_lock; 1937 } else { 1938 /* the hole we're creating ends at the end 1939 * of the info struct, just free the info 1940 */ 1941 kmem_cache_free(btrfs_free_space_cachep, info); 1942 } 1943 spin_unlock(&ctl->tree_lock); 1944 1945 /* step two, insert a new info struct to cover 1946 * anything before the hole 1947 */ 1948 ret = btrfs_add_free_space(block_group, old_start, 1949 offset - old_start); 1950 WARN_ON(ret); 1951 goto out; 1952 } 1953 1954 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1955 if (ret == -EAGAIN) 1956 goto again; 1957 BUG_ON(ret); 1958 out_lock: 1959 spin_unlock(&ctl->tree_lock); 1960 out: 1961 return ret; 1962 } 1963 1964 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1965 u64 bytes) 1966 { 1967 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1968 struct btrfs_free_space *info; 1969 struct rb_node *n; 1970 int count = 0; 1971 1972 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1973 info = rb_entry(n, struct btrfs_free_space, offset_index); 1974 if (info->bytes >= bytes) 1975 count++; 1976 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1977 (unsigned long long)info->offset, 1978 (unsigned long long)info->bytes, 1979 (info->bitmap) ? "yes" : "no"); 1980 } 1981 printk(KERN_INFO "block group has cluster?: %s\n", 1982 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1983 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1984 "\n", count); 1985 } 1986 1987 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1988 { 1989 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1990 1991 spin_lock_init(&ctl->tree_lock); 1992 ctl->unit = block_group->sectorsize; 1993 ctl->start = block_group->key.objectid; 1994 ctl->private = block_group; 1995 ctl->op = &free_space_op; 1996 1997 /* 1998 * we only want to have 32k of ram per block group for keeping 1999 * track of free space, and if we pass 1/2 of that we want to 2000 * start converting things over to using bitmaps 2001 */ 2002 ctl->extents_thresh = ((1024 * 32) / 2) / 2003 sizeof(struct btrfs_free_space); 2004 } 2005 2006 /* 2007 * for a given cluster, put all of its extents back into the free 2008 * space cache. If the block group passed doesn't match the block group 2009 * pointed to by the cluster, someone else raced in and freed the 2010 * cluster already. In that case, we just return without changing anything 2011 */ 2012 static int 2013 __btrfs_return_cluster_to_free_space( 2014 struct btrfs_block_group_cache *block_group, 2015 struct btrfs_free_cluster *cluster) 2016 { 2017 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2018 struct btrfs_free_space *entry; 2019 struct rb_node *node; 2020 2021 spin_lock(&cluster->lock); 2022 if (cluster->block_group != block_group) 2023 goto out; 2024 2025 cluster->block_group = NULL; 2026 cluster->window_start = 0; 2027 list_del_init(&cluster->block_group_list); 2028 2029 node = rb_first(&cluster->root); 2030 while (node) { 2031 bool bitmap; 2032 2033 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2034 node = rb_next(&entry->offset_index); 2035 rb_erase(&entry->offset_index, &cluster->root); 2036 2037 bitmap = (entry->bitmap != NULL); 2038 if (!bitmap) 2039 try_merge_free_space(ctl, entry, false); 2040 tree_insert_offset(&ctl->free_space_offset, 2041 entry->offset, &entry->offset_index, bitmap); 2042 } 2043 cluster->root = RB_ROOT; 2044 2045 out: 2046 spin_unlock(&cluster->lock); 2047 btrfs_put_block_group(block_group); 2048 return 0; 2049 } 2050 2051 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2052 { 2053 struct btrfs_free_space *info; 2054 struct rb_node *node; 2055 2056 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2057 info = rb_entry(node, struct btrfs_free_space, offset_index); 2058 if (!info->bitmap) { 2059 unlink_free_space(ctl, info); 2060 kmem_cache_free(btrfs_free_space_cachep, info); 2061 } else { 2062 free_bitmap(ctl, info); 2063 } 2064 if (need_resched()) { 2065 spin_unlock(&ctl->tree_lock); 2066 cond_resched(); 2067 spin_lock(&ctl->tree_lock); 2068 } 2069 } 2070 } 2071 2072 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2073 { 2074 spin_lock(&ctl->tree_lock); 2075 __btrfs_remove_free_space_cache_locked(ctl); 2076 spin_unlock(&ctl->tree_lock); 2077 } 2078 2079 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2080 { 2081 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2082 struct btrfs_free_cluster *cluster; 2083 struct list_head *head; 2084 2085 spin_lock(&ctl->tree_lock); 2086 while ((head = block_group->cluster_list.next) != 2087 &block_group->cluster_list) { 2088 cluster = list_entry(head, struct btrfs_free_cluster, 2089 block_group_list); 2090 2091 WARN_ON(cluster->block_group != block_group); 2092 __btrfs_return_cluster_to_free_space(block_group, cluster); 2093 if (need_resched()) { 2094 spin_unlock(&ctl->tree_lock); 2095 cond_resched(); 2096 spin_lock(&ctl->tree_lock); 2097 } 2098 } 2099 __btrfs_remove_free_space_cache_locked(ctl); 2100 spin_unlock(&ctl->tree_lock); 2101 2102 } 2103 2104 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2105 u64 offset, u64 bytes, u64 empty_size) 2106 { 2107 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2108 struct btrfs_free_space *entry = NULL; 2109 u64 bytes_search = bytes + empty_size; 2110 u64 ret = 0; 2111 2112 spin_lock(&ctl->tree_lock); 2113 entry = find_free_space(ctl, &offset, &bytes_search); 2114 if (!entry) 2115 goto out; 2116 2117 ret = offset; 2118 if (entry->bitmap) { 2119 bitmap_clear_bits(ctl, entry, offset, bytes); 2120 if (!entry->bytes) 2121 free_bitmap(ctl, entry); 2122 } else { 2123 unlink_free_space(ctl, entry); 2124 entry->offset += bytes; 2125 entry->bytes -= bytes; 2126 if (!entry->bytes) 2127 kmem_cache_free(btrfs_free_space_cachep, entry); 2128 else 2129 link_free_space(ctl, entry); 2130 } 2131 2132 out: 2133 spin_unlock(&ctl->tree_lock); 2134 2135 return ret; 2136 } 2137 2138 /* 2139 * given a cluster, put all of its extents back into the free space 2140 * cache. If a block group is passed, this function will only free 2141 * a cluster that belongs to the passed block group. 2142 * 2143 * Otherwise, it'll get a reference on the block group pointed to by the 2144 * cluster and remove the cluster from it. 2145 */ 2146 int btrfs_return_cluster_to_free_space( 2147 struct btrfs_block_group_cache *block_group, 2148 struct btrfs_free_cluster *cluster) 2149 { 2150 struct btrfs_free_space_ctl *ctl; 2151 int ret; 2152 2153 /* first, get a safe pointer to the block group */ 2154 spin_lock(&cluster->lock); 2155 if (!block_group) { 2156 block_group = cluster->block_group; 2157 if (!block_group) { 2158 spin_unlock(&cluster->lock); 2159 return 0; 2160 } 2161 } else if (cluster->block_group != block_group) { 2162 /* someone else has already freed it don't redo their work */ 2163 spin_unlock(&cluster->lock); 2164 return 0; 2165 } 2166 atomic_inc(&block_group->count); 2167 spin_unlock(&cluster->lock); 2168 2169 ctl = block_group->free_space_ctl; 2170 2171 /* now return any extents the cluster had on it */ 2172 spin_lock(&ctl->tree_lock); 2173 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2174 spin_unlock(&ctl->tree_lock); 2175 2176 /* finally drop our ref */ 2177 btrfs_put_block_group(block_group); 2178 return ret; 2179 } 2180 2181 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2182 struct btrfs_free_cluster *cluster, 2183 struct btrfs_free_space *entry, 2184 u64 bytes, u64 min_start) 2185 { 2186 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2187 int err; 2188 u64 search_start = cluster->window_start; 2189 u64 search_bytes = bytes; 2190 u64 ret = 0; 2191 2192 search_start = min_start; 2193 search_bytes = bytes; 2194 2195 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2196 if (err) 2197 return 0; 2198 2199 ret = search_start; 2200 __bitmap_clear_bits(ctl, entry, ret, bytes); 2201 2202 return ret; 2203 } 2204 2205 /* 2206 * given a cluster, try to allocate 'bytes' from it, returns 0 2207 * if it couldn't find anything suitably large, or a logical disk offset 2208 * if things worked out 2209 */ 2210 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2211 struct btrfs_free_cluster *cluster, u64 bytes, 2212 u64 min_start) 2213 { 2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2215 struct btrfs_free_space *entry = NULL; 2216 struct rb_node *node; 2217 u64 ret = 0; 2218 2219 spin_lock(&cluster->lock); 2220 if (bytes > cluster->max_size) 2221 goto out; 2222 2223 if (cluster->block_group != block_group) 2224 goto out; 2225 2226 node = rb_first(&cluster->root); 2227 if (!node) 2228 goto out; 2229 2230 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2231 while(1) { 2232 if (entry->bytes < bytes || 2233 (!entry->bitmap && entry->offset < min_start)) { 2234 node = rb_next(&entry->offset_index); 2235 if (!node) 2236 break; 2237 entry = rb_entry(node, struct btrfs_free_space, 2238 offset_index); 2239 continue; 2240 } 2241 2242 if (entry->bitmap) { 2243 ret = btrfs_alloc_from_bitmap(block_group, 2244 cluster, entry, bytes, 2245 cluster->window_start); 2246 if (ret == 0) { 2247 node = rb_next(&entry->offset_index); 2248 if (!node) 2249 break; 2250 entry = rb_entry(node, struct btrfs_free_space, 2251 offset_index); 2252 continue; 2253 } 2254 cluster->window_start += bytes; 2255 } else { 2256 ret = entry->offset; 2257 2258 entry->offset += bytes; 2259 entry->bytes -= bytes; 2260 } 2261 2262 if (entry->bytes == 0) 2263 rb_erase(&entry->offset_index, &cluster->root); 2264 break; 2265 } 2266 out: 2267 spin_unlock(&cluster->lock); 2268 2269 if (!ret) 2270 return 0; 2271 2272 spin_lock(&ctl->tree_lock); 2273 2274 ctl->free_space -= bytes; 2275 if (entry->bytes == 0) { 2276 ctl->free_extents--; 2277 if (entry->bitmap) { 2278 kfree(entry->bitmap); 2279 ctl->total_bitmaps--; 2280 ctl->op->recalc_thresholds(ctl); 2281 } 2282 kmem_cache_free(btrfs_free_space_cachep, entry); 2283 } 2284 2285 spin_unlock(&ctl->tree_lock); 2286 2287 return ret; 2288 } 2289 2290 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2291 struct btrfs_free_space *entry, 2292 struct btrfs_free_cluster *cluster, 2293 u64 offset, u64 bytes, 2294 u64 cont1_bytes, u64 min_bytes) 2295 { 2296 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2297 unsigned long next_zero; 2298 unsigned long i; 2299 unsigned long want_bits; 2300 unsigned long min_bits; 2301 unsigned long found_bits; 2302 unsigned long start = 0; 2303 unsigned long total_found = 0; 2304 int ret; 2305 2306 i = offset_to_bit(entry->offset, block_group->sectorsize, 2307 max_t(u64, offset, entry->offset)); 2308 want_bits = bytes_to_bits(bytes, block_group->sectorsize); 2309 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2310 2311 again: 2312 found_bits = 0; 2313 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2314 i < BITS_PER_BITMAP; 2315 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2316 next_zero = find_next_zero_bit(entry->bitmap, 2317 BITS_PER_BITMAP, i); 2318 if (next_zero - i >= min_bits) { 2319 found_bits = next_zero - i; 2320 break; 2321 } 2322 i = next_zero; 2323 } 2324 2325 if (!found_bits) 2326 return -ENOSPC; 2327 2328 if (!total_found) { 2329 start = i; 2330 cluster->max_size = 0; 2331 } 2332 2333 total_found += found_bits; 2334 2335 if (cluster->max_size < found_bits * block_group->sectorsize) 2336 cluster->max_size = found_bits * block_group->sectorsize; 2337 2338 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2339 i = next_zero + 1; 2340 goto again; 2341 } 2342 2343 cluster->window_start = start * block_group->sectorsize + 2344 entry->offset; 2345 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2346 ret = tree_insert_offset(&cluster->root, entry->offset, 2347 &entry->offset_index, 1); 2348 BUG_ON(ret); 2349 2350 trace_btrfs_setup_cluster(block_group, cluster, 2351 total_found * block_group->sectorsize, 1); 2352 return 0; 2353 } 2354 2355 /* 2356 * This searches the block group for just extents to fill the cluster with. 2357 * Try to find a cluster with at least bytes total bytes, at least one 2358 * extent of cont1_bytes, and other clusters of at least min_bytes. 2359 */ 2360 static noinline int 2361 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2362 struct btrfs_free_cluster *cluster, 2363 struct list_head *bitmaps, u64 offset, u64 bytes, 2364 u64 cont1_bytes, u64 min_bytes) 2365 { 2366 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2367 struct btrfs_free_space *first = NULL; 2368 struct btrfs_free_space *entry = NULL; 2369 struct btrfs_free_space *last; 2370 struct rb_node *node; 2371 u64 window_start; 2372 u64 window_free; 2373 u64 max_extent; 2374 u64 total_size = 0; 2375 2376 entry = tree_search_offset(ctl, offset, 0, 1); 2377 if (!entry) 2378 return -ENOSPC; 2379 2380 /* 2381 * We don't want bitmaps, so just move along until we find a normal 2382 * extent entry. 2383 */ 2384 while (entry->bitmap || entry->bytes < min_bytes) { 2385 if (entry->bitmap && list_empty(&entry->list)) 2386 list_add_tail(&entry->list, bitmaps); 2387 node = rb_next(&entry->offset_index); 2388 if (!node) 2389 return -ENOSPC; 2390 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2391 } 2392 2393 window_start = entry->offset; 2394 window_free = entry->bytes; 2395 max_extent = entry->bytes; 2396 first = entry; 2397 last = entry; 2398 2399 for (node = rb_next(&entry->offset_index); node; 2400 node = rb_next(&entry->offset_index)) { 2401 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2402 2403 if (entry->bitmap) { 2404 if (list_empty(&entry->list)) 2405 list_add_tail(&entry->list, bitmaps); 2406 continue; 2407 } 2408 2409 if (entry->bytes < min_bytes) 2410 continue; 2411 2412 last = entry; 2413 window_free += entry->bytes; 2414 if (entry->bytes > max_extent) 2415 max_extent = entry->bytes; 2416 } 2417 2418 if (window_free < bytes || max_extent < cont1_bytes) 2419 return -ENOSPC; 2420 2421 cluster->window_start = first->offset; 2422 2423 node = &first->offset_index; 2424 2425 /* 2426 * now we've found our entries, pull them out of the free space 2427 * cache and put them into the cluster rbtree 2428 */ 2429 do { 2430 int ret; 2431 2432 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2433 node = rb_next(&entry->offset_index); 2434 if (entry->bitmap || entry->bytes < min_bytes) 2435 continue; 2436 2437 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2438 ret = tree_insert_offset(&cluster->root, entry->offset, 2439 &entry->offset_index, 0); 2440 total_size += entry->bytes; 2441 BUG_ON(ret); 2442 } while (node && entry != last); 2443 2444 cluster->max_size = max_extent; 2445 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2446 return 0; 2447 } 2448 2449 /* 2450 * This specifically looks for bitmaps that may work in the cluster, we assume 2451 * that we have already failed to find extents that will work. 2452 */ 2453 static noinline int 2454 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2455 struct btrfs_free_cluster *cluster, 2456 struct list_head *bitmaps, u64 offset, u64 bytes, 2457 u64 cont1_bytes, u64 min_bytes) 2458 { 2459 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2460 struct btrfs_free_space *entry; 2461 int ret = -ENOSPC; 2462 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2463 2464 if (ctl->total_bitmaps == 0) 2465 return -ENOSPC; 2466 2467 /* 2468 * The bitmap that covers offset won't be in the list unless offset 2469 * is just its start offset. 2470 */ 2471 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2472 if (entry->offset != bitmap_offset) { 2473 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2474 if (entry && list_empty(&entry->list)) 2475 list_add(&entry->list, bitmaps); 2476 } 2477 2478 list_for_each_entry(entry, bitmaps, list) { 2479 if (entry->bytes < bytes) 2480 continue; 2481 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2482 bytes, cont1_bytes, min_bytes); 2483 if (!ret) 2484 return 0; 2485 } 2486 2487 /* 2488 * The bitmaps list has all the bitmaps that record free space 2489 * starting after offset, so no more search is required. 2490 */ 2491 return -ENOSPC; 2492 } 2493 2494 /* 2495 * here we try to find a cluster of blocks in a block group. The goal 2496 * is to find at least bytes+empty_size. 2497 * We might not find them all in one contiguous area. 2498 * 2499 * returns zero and sets up cluster if things worked out, otherwise 2500 * it returns -enospc 2501 */ 2502 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2503 struct btrfs_root *root, 2504 struct btrfs_block_group_cache *block_group, 2505 struct btrfs_free_cluster *cluster, 2506 u64 offset, u64 bytes, u64 empty_size) 2507 { 2508 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2509 struct btrfs_free_space *entry, *tmp; 2510 LIST_HEAD(bitmaps); 2511 u64 min_bytes; 2512 u64 cont1_bytes; 2513 int ret; 2514 2515 /* 2516 * Choose the minimum extent size we'll require for this 2517 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2518 * For metadata, allow allocates with smaller extents. For 2519 * data, keep it dense. 2520 */ 2521 if (btrfs_test_opt(root, SSD_SPREAD)) { 2522 cont1_bytes = min_bytes = bytes + empty_size; 2523 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2524 cont1_bytes = bytes; 2525 min_bytes = block_group->sectorsize; 2526 } else { 2527 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2528 min_bytes = block_group->sectorsize; 2529 } 2530 2531 spin_lock(&ctl->tree_lock); 2532 2533 /* 2534 * If we know we don't have enough space to make a cluster don't even 2535 * bother doing all the work to try and find one. 2536 */ 2537 if (ctl->free_space < bytes) { 2538 spin_unlock(&ctl->tree_lock); 2539 return -ENOSPC; 2540 } 2541 2542 spin_lock(&cluster->lock); 2543 2544 /* someone already found a cluster, hooray */ 2545 if (cluster->block_group) { 2546 ret = 0; 2547 goto out; 2548 } 2549 2550 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2551 min_bytes); 2552 2553 INIT_LIST_HEAD(&bitmaps); 2554 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2555 bytes + empty_size, 2556 cont1_bytes, min_bytes); 2557 if (ret) 2558 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2559 offset, bytes + empty_size, 2560 cont1_bytes, min_bytes); 2561 2562 /* Clear our temporary list */ 2563 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2564 list_del_init(&entry->list); 2565 2566 if (!ret) { 2567 atomic_inc(&block_group->count); 2568 list_add_tail(&cluster->block_group_list, 2569 &block_group->cluster_list); 2570 cluster->block_group = block_group; 2571 } else { 2572 trace_btrfs_failed_cluster_setup(block_group); 2573 } 2574 out: 2575 spin_unlock(&cluster->lock); 2576 spin_unlock(&ctl->tree_lock); 2577 2578 return ret; 2579 } 2580 2581 /* 2582 * simple code to zero out a cluster 2583 */ 2584 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2585 { 2586 spin_lock_init(&cluster->lock); 2587 spin_lock_init(&cluster->refill_lock); 2588 cluster->root = RB_ROOT; 2589 cluster->max_size = 0; 2590 INIT_LIST_HEAD(&cluster->block_group_list); 2591 cluster->block_group = NULL; 2592 } 2593 2594 static int do_trimming(struct btrfs_block_group_cache *block_group, 2595 u64 *total_trimmed, u64 start, u64 bytes, 2596 u64 reserved_start, u64 reserved_bytes) 2597 { 2598 struct btrfs_space_info *space_info = block_group->space_info; 2599 struct btrfs_fs_info *fs_info = block_group->fs_info; 2600 int ret; 2601 int update = 0; 2602 u64 trimmed = 0; 2603 2604 spin_lock(&space_info->lock); 2605 spin_lock(&block_group->lock); 2606 if (!block_group->ro) { 2607 block_group->reserved += reserved_bytes; 2608 space_info->bytes_reserved += reserved_bytes; 2609 update = 1; 2610 } 2611 spin_unlock(&block_group->lock); 2612 spin_unlock(&space_info->lock); 2613 2614 ret = btrfs_error_discard_extent(fs_info->extent_root, 2615 start, bytes, &trimmed); 2616 if (!ret) 2617 *total_trimmed += trimmed; 2618 2619 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2620 2621 if (update) { 2622 spin_lock(&space_info->lock); 2623 spin_lock(&block_group->lock); 2624 if (block_group->ro) 2625 space_info->bytes_readonly += reserved_bytes; 2626 block_group->reserved -= reserved_bytes; 2627 space_info->bytes_reserved -= reserved_bytes; 2628 spin_unlock(&space_info->lock); 2629 spin_unlock(&block_group->lock); 2630 } 2631 2632 return ret; 2633 } 2634 2635 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2636 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2637 { 2638 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2639 struct btrfs_free_space *entry; 2640 struct rb_node *node; 2641 int ret = 0; 2642 u64 extent_start; 2643 u64 extent_bytes; 2644 u64 bytes; 2645 2646 while (start < end) { 2647 spin_lock(&ctl->tree_lock); 2648 2649 if (ctl->free_space < minlen) { 2650 spin_unlock(&ctl->tree_lock); 2651 break; 2652 } 2653 2654 entry = tree_search_offset(ctl, start, 0, 1); 2655 if (!entry) { 2656 spin_unlock(&ctl->tree_lock); 2657 break; 2658 } 2659 2660 /* skip bitmaps */ 2661 while (entry->bitmap) { 2662 node = rb_next(&entry->offset_index); 2663 if (!node) { 2664 spin_unlock(&ctl->tree_lock); 2665 goto out; 2666 } 2667 entry = rb_entry(node, struct btrfs_free_space, 2668 offset_index); 2669 } 2670 2671 if (entry->offset >= end) { 2672 spin_unlock(&ctl->tree_lock); 2673 break; 2674 } 2675 2676 extent_start = entry->offset; 2677 extent_bytes = entry->bytes; 2678 start = max(start, extent_start); 2679 bytes = min(extent_start + extent_bytes, end) - start; 2680 if (bytes < minlen) { 2681 spin_unlock(&ctl->tree_lock); 2682 goto next; 2683 } 2684 2685 unlink_free_space(ctl, entry); 2686 kmem_cache_free(btrfs_free_space_cachep, entry); 2687 2688 spin_unlock(&ctl->tree_lock); 2689 2690 ret = do_trimming(block_group, total_trimmed, start, bytes, 2691 extent_start, extent_bytes); 2692 if (ret) 2693 break; 2694 next: 2695 start += bytes; 2696 2697 if (fatal_signal_pending(current)) { 2698 ret = -ERESTARTSYS; 2699 break; 2700 } 2701 2702 cond_resched(); 2703 } 2704 out: 2705 return ret; 2706 } 2707 2708 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2709 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2710 { 2711 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2712 struct btrfs_free_space *entry; 2713 int ret = 0; 2714 int ret2; 2715 u64 bytes; 2716 u64 offset = offset_to_bitmap(ctl, start); 2717 2718 while (offset < end) { 2719 bool next_bitmap = false; 2720 2721 spin_lock(&ctl->tree_lock); 2722 2723 if (ctl->free_space < minlen) { 2724 spin_unlock(&ctl->tree_lock); 2725 break; 2726 } 2727 2728 entry = tree_search_offset(ctl, offset, 1, 0); 2729 if (!entry) { 2730 spin_unlock(&ctl->tree_lock); 2731 next_bitmap = true; 2732 goto next; 2733 } 2734 2735 bytes = minlen; 2736 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2737 if (ret2 || start >= end) { 2738 spin_unlock(&ctl->tree_lock); 2739 next_bitmap = true; 2740 goto next; 2741 } 2742 2743 bytes = min(bytes, end - start); 2744 if (bytes < minlen) { 2745 spin_unlock(&ctl->tree_lock); 2746 goto next; 2747 } 2748 2749 bitmap_clear_bits(ctl, entry, start, bytes); 2750 if (entry->bytes == 0) 2751 free_bitmap(ctl, entry); 2752 2753 spin_unlock(&ctl->tree_lock); 2754 2755 ret = do_trimming(block_group, total_trimmed, start, bytes, 2756 start, bytes); 2757 if (ret) 2758 break; 2759 next: 2760 if (next_bitmap) { 2761 offset += BITS_PER_BITMAP * ctl->unit; 2762 } else { 2763 start += bytes; 2764 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2765 offset += BITS_PER_BITMAP * ctl->unit; 2766 } 2767 2768 if (fatal_signal_pending(current)) { 2769 ret = -ERESTARTSYS; 2770 break; 2771 } 2772 2773 cond_resched(); 2774 } 2775 2776 return ret; 2777 } 2778 2779 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2780 u64 *trimmed, u64 start, u64 end, u64 minlen) 2781 { 2782 int ret; 2783 2784 *trimmed = 0; 2785 2786 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2787 if (ret) 2788 return ret; 2789 2790 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2791 2792 return ret; 2793 } 2794 2795 /* 2796 * Find the left-most item in the cache tree, and then return the 2797 * smallest inode number in the item. 2798 * 2799 * Note: the returned inode number may not be the smallest one in 2800 * the tree, if the left-most item is a bitmap. 2801 */ 2802 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2803 { 2804 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2805 struct btrfs_free_space *entry = NULL; 2806 u64 ino = 0; 2807 2808 spin_lock(&ctl->tree_lock); 2809 2810 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2811 goto out; 2812 2813 entry = rb_entry(rb_first(&ctl->free_space_offset), 2814 struct btrfs_free_space, offset_index); 2815 2816 if (!entry->bitmap) { 2817 ino = entry->offset; 2818 2819 unlink_free_space(ctl, entry); 2820 entry->offset++; 2821 entry->bytes--; 2822 if (!entry->bytes) 2823 kmem_cache_free(btrfs_free_space_cachep, entry); 2824 else 2825 link_free_space(ctl, entry); 2826 } else { 2827 u64 offset = 0; 2828 u64 count = 1; 2829 int ret; 2830 2831 ret = search_bitmap(ctl, entry, &offset, &count); 2832 BUG_ON(ret); 2833 2834 ino = offset; 2835 bitmap_clear_bits(ctl, entry, offset, 1); 2836 if (entry->bytes == 0) 2837 free_bitmap(ctl, entry); 2838 } 2839 out: 2840 spin_unlock(&ctl->tree_lock); 2841 2842 return ino; 2843 } 2844 2845 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2846 struct btrfs_path *path) 2847 { 2848 struct inode *inode = NULL; 2849 2850 spin_lock(&root->cache_lock); 2851 if (root->cache_inode) 2852 inode = igrab(root->cache_inode); 2853 spin_unlock(&root->cache_lock); 2854 if (inode) 2855 return inode; 2856 2857 inode = __lookup_free_space_inode(root, path, 0); 2858 if (IS_ERR(inode)) 2859 return inode; 2860 2861 spin_lock(&root->cache_lock); 2862 if (!btrfs_fs_closing(root->fs_info)) 2863 root->cache_inode = igrab(inode); 2864 spin_unlock(&root->cache_lock); 2865 2866 return inode; 2867 } 2868 2869 int create_free_ino_inode(struct btrfs_root *root, 2870 struct btrfs_trans_handle *trans, 2871 struct btrfs_path *path) 2872 { 2873 return __create_free_space_inode(root, trans, path, 2874 BTRFS_FREE_INO_OBJECTID, 0); 2875 } 2876 2877 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2878 { 2879 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2880 struct btrfs_path *path; 2881 struct inode *inode; 2882 int ret = 0; 2883 u64 root_gen = btrfs_root_generation(&root->root_item); 2884 2885 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2886 return 0; 2887 2888 /* 2889 * If we're unmounting then just return, since this does a search on the 2890 * normal root and not the commit root and we could deadlock. 2891 */ 2892 if (btrfs_fs_closing(fs_info)) 2893 return 0; 2894 2895 path = btrfs_alloc_path(); 2896 if (!path) 2897 return 0; 2898 2899 inode = lookup_free_ino_inode(root, path); 2900 if (IS_ERR(inode)) 2901 goto out; 2902 2903 if (root_gen != BTRFS_I(inode)->generation) 2904 goto out_put; 2905 2906 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2907 2908 if (ret < 0) 2909 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2910 "root %llu\n", root->root_key.objectid); 2911 out_put: 2912 iput(inode); 2913 out: 2914 btrfs_free_path(path); 2915 return ret; 2916 } 2917 2918 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2919 struct btrfs_trans_handle *trans, 2920 struct btrfs_path *path) 2921 { 2922 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2923 struct inode *inode; 2924 int ret; 2925 2926 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2927 return 0; 2928 2929 inode = lookup_free_ino_inode(root, path); 2930 if (IS_ERR(inode)) 2931 return 0; 2932 2933 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2934 if (ret) { 2935 btrfs_delalloc_release_metadata(inode, inode->i_size); 2936 #ifdef DEBUG 2937 printk(KERN_ERR "btrfs: failed to write free ino cache " 2938 "for root %llu\n", root->root_key.objectid); 2939 #endif 2940 } 2941 2942 iput(inode); 2943 return ret; 2944 } 2945