1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *info); 38 39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 40 struct btrfs_path *path, 41 u64 offset) 42 { 43 struct btrfs_key key; 44 struct btrfs_key location; 45 struct btrfs_disk_key disk_key; 46 struct btrfs_free_space_header *header; 47 struct extent_buffer *leaf; 48 struct inode *inode = NULL; 49 int ret; 50 51 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 52 key.offset = offset; 53 key.type = 0; 54 55 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 56 if (ret < 0) 57 return ERR_PTR(ret); 58 if (ret > 0) { 59 btrfs_release_path(path); 60 return ERR_PTR(-ENOENT); 61 } 62 63 leaf = path->nodes[0]; 64 header = btrfs_item_ptr(leaf, path->slots[0], 65 struct btrfs_free_space_header); 66 btrfs_free_space_key(leaf, header, &disk_key); 67 btrfs_disk_key_to_cpu(&location, &disk_key); 68 btrfs_release_path(path); 69 70 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 71 if (!inode) 72 return ERR_PTR(-ENOENT); 73 if (IS_ERR(inode)) 74 return inode; 75 if (is_bad_inode(inode)) { 76 iput(inode); 77 return ERR_PTR(-ENOENT); 78 } 79 80 mapping_set_gfp_mask(inode->i_mapping, 81 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 82 83 return inode; 84 } 85 86 struct inode *lookup_free_space_inode(struct btrfs_root *root, 87 struct btrfs_block_group_cache 88 *block_group, struct btrfs_path *path) 89 { 90 struct inode *inode = NULL; 91 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 92 93 spin_lock(&block_group->lock); 94 if (block_group->inode) 95 inode = igrab(block_group->inode); 96 spin_unlock(&block_group->lock); 97 if (inode) 98 return inode; 99 100 inode = __lookup_free_space_inode(root, path, 101 block_group->key.objectid); 102 if (IS_ERR(inode)) 103 return inode; 104 105 spin_lock(&block_group->lock); 106 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 107 btrfs_info(root->fs_info, 108 "Old style space inode found, converting."); 109 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 110 BTRFS_INODE_NODATACOW; 111 block_group->disk_cache_state = BTRFS_DC_CLEAR; 112 } 113 114 if (!block_group->iref) { 115 block_group->inode = igrab(inode); 116 block_group->iref = 1; 117 } 118 spin_unlock(&block_group->lock); 119 120 return inode; 121 } 122 123 static int __create_free_space_inode(struct btrfs_root *root, 124 struct btrfs_trans_handle *trans, 125 struct btrfs_path *path, 126 u64 ino, u64 offset) 127 { 128 struct btrfs_key key; 129 struct btrfs_disk_key disk_key; 130 struct btrfs_free_space_header *header; 131 struct btrfs_inode_item *inode_item; 132 struct extent_buffer *leaf; 133 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 134 int ret; 135 136 ret = btrfs_insert_empty_inode(trans, root, path, ino); 137 if (ret) 138 return ret; 139 140 /* We inline crc's for the free disk space cache */ 141 if (ino != BTRFS_FREE_INO_OBJECTID) 142 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 143 144 leaf = path->nodes[0]; 145 inode_item = btrfs_item_ptr(leaf, path->slots[0], 146 struct btrfs_inode_item); 147 btrfs_item_key(leaf, &disk_key, path->slots[0]); 148 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 149 sizeof(*inode_item)); 150 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 151 btrfs_set_inode_size(leaf, inode_item, 0); 152 btrfs_set_inode_nbytes(leaf, inode_item, 0); 153 btrfs_set_inode_uid(leaf, inode_item, 0); 154 btrfs_set_inode_gid(leaf, inode_item, 0); 155 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 156 btrfs_set_inode_flags(leaf, inode_item, flags); 157 btrfs_set_inode_nlink(leaf, inode_item, 1); 158 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 159 btrfs_set_inode_block_group(leaf, inode_item, offset); 160 btrfs_mark_buffer_dirty(leaf); 161 btrfs_release_path(path); 162 163 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 164 key.offset = offset; 165 key.type = 0; 166 167 ret = btrfs_insert_empty_item(trans, root, path, &key, 168 sizeof(struct btrfs_free_space_header)); 169 if (ret < 0) { 170 btrfs_release_path(path); 171 return ret; 172 } 173 leaf = path->nodes[0]; 174 header = btrfs_item_ptr(leaf, path->slots[0], 175 struct btrfs_free_space_header); 176 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 177 btrfs_set_free_space_key(leaf, header, &disk_key); 178 btrfs_mark_buffer_dirty(leaf); 179 btrfs_release_path(path); 180 181 return 0; 182 } 183 184 int create_free_space_inode(struct btrfs_root *root, 185 struct btrfs_trans_handle *trans, 186 struct btrfs_block_group_cache *block_group, 187 struct btrfs_path *path) 188 { 189 int ret; 190 u64 ino; 191 192 ret = btrfs_find_free_objectid(root, &ino); 193 if (ret < 0) 194 return ret; 195 196 return __create_free_space_inode(root, trans, path, ino, 197 block_group->key.objectid); 198 } 199 200 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 201 struct btrfs_trans_handle *trans, 202 struct btrfs_path *path, 203 struct inode *inode) 204 { 205 struct btrfs_block_rsv *rsv; 206 u64 needed_bytes; 207 loff_t oldsize; 208 int ret = 0; 209 210 rsv = trans->block_rsv; 211 trans->block_rsv = &root->fs_info->global_block_rsv; 212 213 /* 1 for slack space, 1 for updating the inode */ 214 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 215 btrfs_calc_trans_metadata_size(root, 1); 216 217 spin_lock(&trans->block_rsv->lock); 218 if (trans->block_rsv->reserved < needed_bytes) { 219 spin_unlock(&trans->block_rsv->lock); 220 trans->block_rsv = rsv; 221 return -ENOSPC; 222 } 223 spin_unlock(&trans->block_rsv->lock); 224 225 oldsize = i_size_read(inode); 226 btrfs_i_size_write(inode, 0); 227 truncate_pagecache(inode, oldsize, 0); 228 229 /* 230 * We don't need an orphan item because truncating the free space cache 231 * will never be split across transactions. 232 */ 233 ret = btrfs_truncate_inode_items(trans, root, inode, 234 0, BTRFS_EXTENT_DATA_KEY); 235 236 if (ret) { 237 trans->block_rsv = rsv; 238 btrfs_abort_transaction(trans, root, ret); 239 return ret; 240 } 241 242 ret = btrfs_update_inode(trans, root, inode); 243 if (ret) 244 btrfs_abort_transaction(trans, root, ret); 245 trans->block_rsv = rsv; 246 247 return ret; 248 } 249 250 static int readahead_cache(struct inode *inode) 251 { 252 struct file_ra_state *ra; 253 unsigned long last_index; 254 255 ra = kzalloc(sizeof(*ra), GFP_NOFS); 256 if (!ra) 257 return -ENOMEM; 258 259 file_ra_state_init(ra, inode->i_mapping); 260 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 261 262 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 263 264 kfree(ra); 265 266 return 0; 267 } 268 269 struct io_ctl { 270 void *cur, *orig; 271 struct page *page; 272 struct page **pages; 273 struct btrfs_root *root; 274 unsigned long size; 275 int index; 276 int num_pages; 277 unsigned check_crcs:1; 278 }; 279 280 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 281 struct btrfs_root *root) 282 { 283 memset(io_ctl, 0, sizeof(struct io_ctl)); 284 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 285 PAGE_CACHE_SHIFT; 286 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 287 GFP_NOFS); 288 if (!io_ctl->pages) 289 return -ENOMEM; 290 io_ctl->root = root; 291 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 292 io_ctl->check_crcs = 1; 293 return 0; 294 } 295 296 static void io_ctl_free(struct io_ctl *io_ctl) 297 { 298 kfree(io_ctl->pages); 299 } 300 301 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 302 { 303 if (io_ctl->cur) { 304 kunmap(io_ctl->page); 305 io_ctl->cur = NULL; 306 io_ctl->orig = NULL; 307 } 308 } 309 310 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 311 { 312 BUG_ON(io_ctl->index >= io_ctl->num_pages); 313 io_ctl->page = io_ctl->pages[io_ctl->index++]; 314 io_ctl->cur = kmap(io_ctl->page); 315 io_ctl->orig = io_ctl->cur; 316 io_ctl->size = PAGE_CACHE_SIZE; 317 if (clear) 318 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 319 } 320 321 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 322 { 323 int i; 324 325 io_ctl_unmap_page(io_ctl); 326 327 for (i = 0; i < io_ctl->num_pages; i++) { 328 if (io_ctl->pages[i]) { 329 ClearPageChecked(io_ctl->pages[i]); 330 unlock_page(io_ctl->pages[i]); 331 page_cache_release(io_ctl->pages[i]); 332 } 333 } 334 } 335 336 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 337 int uptodate) 338 { 339 struct page *page; 340 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 341 int i; 342 343 for (i = 0; i < io_ctl->num_pages; i++) { 344 page = find_or_create_page(inode->i_mapping, i, mask); 345 if (!page) { 346 io_ctl_drop_pages(io_ctl); 347 return -ENOMEM; 348 } 349 io_ctl->pages[i] = page; 350 if (uptodate && !PageUptodate(page)) { 351 btrfs_readpage(NULL, page); 352 lock_page(page); 353 if (!PageUptodate(page)) { 354 printk(KERN_ERR "btrfs: error reading free " 355 "space cache\n"); 356 io_ctl_drop_pages(io_ctl); 357 return -EIO; 358 } 359 } 360 } 361 362 for (i = 0; i < io_ctl->num_pages; i++) { 363 clear_page_dirty_for_io(io_ctl->pages[i]); 364 set_page_extent_mapped(io_ctl->pages[i]); 365 } 366 367 return 0; 368 } 369 370 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 371 { 372 __le64 *val; 373 374 io_ctl_map_page(io_ctl, 1); 375 376 /* 377 * Skip the csum areas. If we don't check crcs then we just have a 378 * 64bit chunk at the front of the first page. 379 */ 380 if (io_ctl->check_crcs) { 381 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 382 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 383 } else { 384 io_ctl->cur += sizeof(u64); 385 io_ctl->size -= sizeof(u64) * 2; 386 } 387 388 val = io_ctl->cur; 389 *val = cpu_to_le64(generation); 390 io_ctl->cur += sizeof(u64); 391 } 392 393 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 394 { 395 __le64 *gen; 396 397 /* 398 * Skip the crc area. If we don't check crcs then we just have a 64bit 399 * chunk at the front of the first page. 400 */ 401 if (io_ctl->check_crcs) { 402 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 403 io_ctl->size -= sizeof(u64) + 404 (sizeof(u32) * io_ctl->num_pages); 405 } else { 406 io_ctl->cur += sizeof(u64); 407 io_ctl->size -= sizeof(u64) * 2; 408 } 409 410 gen = io_ctl->cur; 411 if (le64_to_cpu(*gen) != generation) { 412 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 413 "(%Lu) does not match inode (%Lu)\n", *gen, 414 generation); 415 io_ctl_unmap_page(io_ctl); 416 return -EIO; 417 } 418 io_ctl->cur += sizeof(u64); 419 return 0; 420 } 421 422 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 423 { 424 u32 *tmp; 425 u32 crc = ~(u32)0; 426 unsigned offset = 0; 427 428 if (!io_ctl->check_crcs) { 429 io_ctl_unmap_page(io_ctl); 430 return; 431 } 432 433 if (index == 0) 434 offset = sizeof(u32) * io_ctl->num_pages; 435 436 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 437 PAGE_CACHE_SIZE - offset); 438 btrfs_csum_final(crc, (char *)&crc); 439 io_ctl_unmap_page(io_ctl); 440 tmp = kmap(io_ctl->pages[0]); 441 tmp += index; 442 *tmp = crc; 443 kunmap(io_ctl->pages[0]); 444 } 445 446 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 447 { 448 u32 *tmp, val; 449 u32 crc = ~(u32)0; 450 unsigned offset = 0; 451 452 if (!io_ctl->check_crcs) { 453 io_ctl_map_page(io_ctl, 0); 454 return 0; 455 } 456 457 if (index == 0) 458 offset = sizeof(u32) * io_ctl->num_pages; 459 460 tmp = kmap(io_ctl->pages[0]); 461 tmp += index; 462 val = *tmp; 463 kunmap(io_ctl->pages[0]); 464 465 io_ctl_map_page(io_ctl, 0); 466 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 467 PAGE_CACHE_SIZE - offset); 468 btrfs_csum_final(crc, (char *)&crc); 469 if (val != crc) { 470 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 471 "space cache\n"); 472 io_ctl_unmap_page(io_ctl); 473 return -EIO; 474 } 475 476 return 0; 477 } 478 479 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 480 void *bitmap) 481 { 482 struct btrfs_free_space_entry *entry; 483 484 if (!io_ctl->cur) 485 return -ENOSPC; 486 487 entry = io_ctl->cur; 488 entry->offset = cpu_to_le64(offset); 489 entry->bytes = cpu_to_le64(bytes); 490 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 491 BTRFS_FREE_SPACE_EXTENT; 492 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 493 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 494 495 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 496 return 0; 497 498 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 499 500 /* No more pages to map */ 501 if (io_ctl->index >= io_ctl->num_pages) 502 return 0; 503 504 /* map the next page */ 505 io_ctl_map_page(io_ctl, 1); 506 return 0; 507 } 508 509 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 510 { 511 if (!io_ctl->cur) 512 return -ENOSPC; 513 514 /* 515 * If we aren't at the start of the current page, unmap this one and 516 * map the next one if there is any left. 517 */ 518 if (io_ctl->cur != io_ctl->orig) { 519 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 520 if (io_ctl->index >= io_ctl->num_pages) 521 return -ENOSPC; 522 io_ctl_map_page(io_ctl, 0); 523 } 524 525 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 526 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 527 if (io_ctl->index < io_ctl->num_pages) 528 io_ctl_map_page(io_ctl, 0); 529 return 0; 530 } 531 532 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 533 { 534 /* 535 * If we're not on the boundary we know we've modified the page and we 536 * need to crc the page. 537 */ 538 if (io_ctl->cur != io_ctl->orig) 539 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 540 else 541 io_ctl_unmap_page(io_ctl); 542 543 while (io_ctl->index < io_ctl->num_pages) { 544 io_ctl_map_page(io_ctl, 1); 545 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 546 } 547 } 548 549 static int io_ctl_read_entry(struct io_ctl *io_ctl, 550 struct btrfs_free_space *entry, u8 *type) 551 { 552 struct btrfs_free_space_entry *e; 553 int ret; 554 555 if (!io_ctl->cur) { 556 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 557 if (ret) 558 return ret; 559 } 560 561 e = io_ctl->cur; 562 entry->offset = le64_to_cpu(e->offset); 563 entry->bytes = le64_to_cpu(e->bytes); 564 *type = e->type; 565 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 566 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 567 568 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 569 return 0; 570 571 io_ctl_unmap_page(io_ctl); 572 573 return 0; 574 } 575 576 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 577 struct btrfs_free_space *entry) 578 { 579 int ret; 580 581 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 582 if (ret) 583 return ret; 584 585 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 586 io_ctl_unmap_page(io_ctl); 587 588 return 0; 589 } 590 591 /* 592 * Since we attach pinned extents after the fact we can have contiguous sections 593 * of free space that are split up in entries. This poses a problem with the 594 * tree logging stuff since it could have allocated across what appears to be 2 595 * entries since we would have merged the entries when adding the pinned extents 596 * back to the free space cache. So run through the space cache that we just 597 * loaded and merge contiguous entries. This will make the log replay stuff not 598 * blow up and it will make for nicer allocator behavior. 599 */ 600 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 601 { 602 struct btrfs_free_space *e, *prev = NULL; 603 struct rb_node *n; 604 605 again: 606 spin_lock(&ctl->tree_lock); 607 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 608 e = rb_entry(n, struct btrfs_free_space, offset_index); 609 if (!prev) 610 goto next; 611 if (e->bitmap || prev->bitmap) 612 goto next; 613 if (prev->offset + prev->bytes == e->offset) { 614 unlink_free_space(ctl, prev); 615 unlink_free_space(ctl, e); 616 prev->bytes += e->bytes; 617 kmem_cache_free(btrfs_free_space_cachep, e); 618 link_free_space(ctl, prev); 619 prev = NULL; 620 spin_unlock(&ctl->tree_lock); 621 goto again; 622 } 623 next: 624 prev = e; 625 } 626 spin_unlock(&ctl->tree_lock); 627 } 628 629 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 630 struct btrfs_free_space_ctl *ctl, 631 struct btrfs_path *path, u64 offset) 632 { 633 struct btrfs_free_space_header *header; 634 struct extent_buffer *leaf; 635 struct io_ctl io_ctl; 636 struct btrfs_key key; 637 struct btrfs_free_space *e, *n; 638 struct list_head bitmaps; 639 u64 num_entries; 640 u64 num_bitmaps; 641 u64 generation; 642 u8 type; 643 int ret = 0; 644 645 INIT_LIST_HEAD(&bitmaps); 646 647 /* Nothing in the space cache, goodbye */ 648 if (!i_size_read(inode)) 649 return 0; 650 651 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 652 key.offset = offset; 653 key.type = 0; 654 655 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 656 if (ret < 0) 657 return 0; 658 else if (ret > 0) { 659 btrfs_release_path(path); 660 return 0; 661 } 662 663 ret = -1; 664 665 leaf = path->nodes[0]; 666 header = btrfs_item_ptr(leaf, path->slots[0], 667 struct btrfs_free_space_header); 668 num_entries = btrfs_free_space_entries(leaf, header); 669 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 670 generation = btrfs_free_space_generation(leaf, header); 671 btrfs_release_path(path); 672 673 if (BTRFS_I(inode)->generation != generation) { 674 btrfs_err(root->fs_info, 675 "free space inode generation (%llu) " 676 "did not match free space cache generation (%llu)", 677 (unsigned long long)BTRFS_I(inode)->generation, 678 (unsigned long long)generation); 679 return 0; 680 } 681 682 if (!num_entries) 683 return 0; 684 685 ret = io_ctl_init(&io_ctl, inode, root); 686 if (ret) 687 return ret; 688 689 ret = readahead_cache(inode); 690 if (ret) 691 goto out; 692 693 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 694 if (ret) 695 goto out; 696 697 ret = io_ctl_check_crc(&io_ctl, 0); 698 if (ret) 699 goto free_cache; 700 701 ret = io_ctl_check_generation(&io_ctl, generation); 702 if (ret) 703 goto free_cache; 704 705 while (num_entries) { 706 e = kmem_cache_zalloc(btrfs_free_space_cachep, 707 GFP_NOFS); 708 if (!e) 709 goto free_cache; 710 711 ret = io_ctl_read_entry(&io_ctl, e, &type); 712 if (ret) { 713 kmem_cache_free(btrfs_free_space_cachep, e); 714 goto free_cache; 715 } 716 717 if (!e->bytes) { 718 kmem_cache_free(btrfs_free_space_cachep, e); 719 goto free_cache; 720 } 721 722 if (type == BTRFS_FREE_SPACE_EXTENT) { 723 spin_lock(&ctl->tree_lock); 724 ret = link_free_space(ctl, e); 725 spin_unlock(&ctl->tree_lock); 726 if (ret) { 727 btrfs_err(root->fs_info, 728 "Duplicate entries in free space cache, dumping"); 729 kmem_cache_free(btrfs_free_space_cachep, e); 730 goto free_cache; 731 } 732 } else { 733 BUG_ON(!num_bitmaps); 734 num_bitmaps--; 735 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 736 if (!e->bitmap) { 737 kmem_cache_free( 738 btrfs_free_space_cachep, e); 739 goto free_cache; 740 } 741 spin_lock(&ctl->tree_lock); 742 ret = link_free_space(ctl, e); 743 ctl->total_bitmaps++; 744 ctl->op->recalc_thresholds(ctl); 745 spin_unlock(&ctl->tree_lock); 746 if (ret) { 747 btrfs_err(root->fs_info, 748 "Duplicate entries in free space cache, dumping"); 749 kmem_cache_free(btrfs_free_space_cachep, e); 750 goto free_cache; 751 } 752 list_add_tail(&e->list, &bitmaps); 753 } 754 755 num_entries--; 756 } 757 758 io_ctl_unmap_page(&io_ctl); 759 760 /* 761 * We add the bitmaps at the end of the entries in order that 762 * the bitmap entries are added to the cache. 763 */ 764 list_for_each_entry_safe(e, n, &bitmaps, list) { 765 list_del_init(&e->list); 766 ret = io_ctl_read_bitmap(&io_ctl, e); 767 if (ret) 768 goto free_cache; 769 } 770 771 io_ctl_drop_pages(&io_ctl); 772 merge_space_tree(ctl); 773 ret = 1; 774 out: 775 io_ctl_free(&io_ctl); 776 return ret; 777 free_cache: 778 io_ctl_drop_pages(&io_ctl); 779 __btrfs_remove_free_space_cache(ctl); 780 goto out; 781 } 782 783 int load_free_space_cache(struct btrfs_fs_info *fs_info, 784 struct btrfs_block_group_cache *block_group) 785 { 786 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 787 struct btrfs_root *root = fs_info->tree_root; 788 struct inode *inode; 789 struct btrfs_path *path; 790 int ret = 0; 791 bool matched; 792 u64 used = btrfs_block_group_used(&block_group->item); 793 794 /* 795 * If this block group has been marked to be cleared for one reason or 796 * another then we can't trust the on disk cache, so just return. 797 */ 798 spin_lock(&block_group->lock); 799 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 800 spin_unlock(&block_group->lock); 801 return 0; 802 } 803 spin_unlock(&block_group->lock); 804 805 path = btrfs_alloc_path(); 806 if (!path) 807 return 0; 808 path->search_commit_root = 1; 809 path->skip_locking = 1; 810 811 inode = lookup_free_space_inode(root, block_group, path); 812 if (IS_ERR(inode)) { 813 btrfs_free_path(path); 814 return 0; 815 } 816 817 /* We may have converted the inode and made the cache invalid. */ 818 spin_lock(&block_group->lock); 819 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 820 spin_unlock(&block_group->lock); 821 btrfs_free_path(path); 822 goto out; 823 } 824 spin_unlock(&block_group->lock); 825 826 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 827 path, block_group->key.objectid); 828 btrfs_free_path(path); 829 if (ret <= 0) 830 goto out; 831 832 spin_lock(&ctl->tree_lock); 833 matched = (ctl->free_space == (block_group->key.offset - used - 834 block_group->bytes_super)); 835 spin_unlock(&ctl->tree_lock); 836 837 if (!matched) { 838 __btrfs_remove_free_space_cache(ctl); 839 btrfs_err(fs_info, "block group %llu has wrong amount of free space", 840 block_group->key.objectid); 841 ret = -1; 842 } 843 out: 844 if (ret < 0) { 845 /* This cache is bogus, make sure it gets cleared */ 846 spin_lock(&block_group->lock); 847 block_group->disk_cache_state = BTRFS_DC_CLEAR; 848 spin_unlock(&block_group->lock); 849 ret = 0; 850 851 btrfs_err(fs_info, "failed to load free space cache for block group %llu", 852 block_group->key.objectid); 853 } 854 855 iput(inode); 856 return ret; 857 } 858 859 /** 860 * __btrfs_write_out_cache - write out cached info to an inode 861 * @root - the root the inode belongs to 862 * @ctl - the free space cache we are going to write out 863 * @block_group - the block_group for this cache if it belongs to a block_group 864 * @trans - the trans handle 865 * @path - the path to use 866 * @offset - the offset for the key we'll insert 867 * 868 * This function writes out a free space cache struct to disk for quick recovery 869 * on mount. This will return 0 if it was successfull in writing the cache out, 870 * and -1 if it was not. 871 */ 872 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 873 struct btrfs_free_space_ctl *ctl, 874 struct btrfs_block_group_cache *block_group, 875 struct btrfs_trans_handle *trans, 876 struct btrfs_path *path, u64 offset) 877 { 878 struct btrfs_free_space_header *header; 879 struct extent_buffer *leaf; 880 struct rb_node *node; 881 struct list_head *pos, *n; 882 struct extent_state *cached_state = NULL; 883 struct btrfs_free_cluster *cluster = NULL; 884 struct extent_io_tree *unpin = NULL; 885 struct io_ctl io_ctl; 886 struct list_head bitmap_list; 887 struct btrfs_key key; 888 u64 start, extent_start, extent_end, len; 889 int entries = 0; 890 int bitmaps = 0; 891 int ret; 892 int err = -1; 893 894 INIT_LIST_HEAD(&bitmap_list); 895 896 if (!i_size_read(inode)) 897 return -1; 898 899 ret = io_ctl_init(&io_ctl, inode, root); 900 if (ret) 901 return -1; 902 903 /* Get the cluster for this block_group if it exists */ 904 if (block_group && !list_empty(&block_group->cluster_list)) 905 cluster = list_entry(block_group->cluster_list.next, 906 struct btrfs_free_cluster, 907 block_group_list); 908 909 /* Lock all pages first so we can lock the extent safely. */ 910 io_ctl_prepare_pages(&io_ctl, inode, 0); 911 912 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 913 0, &cached_state); 914 915 node = rb_first(&ctl->free_space_offset); 916 if (!node && cluster) { 917 node = rb_first(&cluster->root); 918 cluster = NULL; 919 } 920 921 /* Make sure we can fit our crcs into the first page */ 922 if (io_ctl.check_crcs && 923 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 924 WARN_ON(1); 925 goto out_nospc; 926 } 927 928 io_ctl_set_generation(&io_ctl, trans->transid); 929 930 /* Write out the extent entries */ 931 while (node) { 932 struct btrfs_free_space *e; 933 934 e = rb_entry(node, struct btrfs_free_space, offset_index); 935 entries++; 936 937 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, 938 e->bitmap); 939 if (ret) 940 goto out_nospc; 941 942 if (e->bitmap) { 943 list_add_tail(&e->list, &bitmap_list); 944 bitmaps++; 945 } 946 node = rb_next(node); 947 if (!node && cluster) { 948 node = rb_first(&cluster->root); 949 cluster = NULL; 950 } 951 } 952 953 /* 954 * We want to add any pinned extents to our free space cache 955 * so we don't leak the space 956 */ 957 958 /* 959 * We shouldn't have switched the pinned extents yet so this is the 960 * right one 961 */ 962 unpin = root->fs_info->pinned_extents; 963 964 if (block_group) 965 start = block_group->key.objectid; 966 967 while (block_group && (start < block_group->key.objectid + 968 block_group->key.offset)) { 969 ret = find_first_extent_bit(unpin, start, 970 &extent_start, &extent_end, 971 EXTENT_DIRTY, NULL); 972 if (ret) { 973 ret = 0; 974 break; 975 } 976 977 /* This pinned extent is out of our range */ 978 if (extent_start >= block_group->key.objectid + 979 block_group->key.offset) 980 break; 981 982 extent_start = max(extent_start, start); 983 extent_end = min(block_group->key.objectid + 984 block_group->key.offset, extent_end + 1); 985 len = extent_end - extent_start; 986 987 entries++; 988 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); 989 if (ret) 990 goto out_nospc; 991 992 start = extent_end; 993 } 994 995 /* Write out the bitmaps */ 996 list_for_each_safe(pos, n, &bitmap_list) { 997 struct btrfs_free_space *entry = 998 list_entry(pos, struct btrfs_free_space, list); 999 1000 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); 1001 if (ret) 1002 goto out_nospc; 1003 list_del_init(&entry->list); 1004 } 1005 1006 /* Zero out the rest of the pages just to make sure */ 1007 io_ctl_zero_remaining_pages(&io_ctl); 1008 1009 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 1010 0, i_size_read(inode), &cached_state); 1011 io_ctl_drop_pages(&io_ctl); 1012 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1013 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1014 1015 if (ret) 1016 goto out; 1017 1018 1019 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1020 1021 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1022 key.offset = offset; 1023 key.type = 0; 1024 1025 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1026 if (ret < 0) { 1027 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1028 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1029 GFP_NOFS); 1030 goto out; 1031 } 1032 leaf = path->nodes[0]; 1033 if (ret > 0) { 1034 struct btrfs_key found_key; 1035 BUG_ON(!path->slots[0]); 1036 path->slots[0]--; 1037 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1038 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1039 found_key.offset != offset) { 1040 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1041 inode->i_size - 1, 1042 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1043 NULL, GFP_NOFS); 1044 btrfs_release_path(path); 1045 goto out; 1046 } 1047 } 1048 1049 BTRFS_I(inode)->generation = trans->transid; 1050 header = btrfs_item_ptr(leaf, path->slots[0], 1051 struct btrfs_free_space_header); 1052 btrfs_set_free_space_entries(leaf, header, entries); 1053 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1054 btrfs_set_free_space_generation(leaf, header, trans->transid); 1055 btrfs_mark_buffer_dirty(leaf); 1056 btrfs_release_path(path); 1057 1058 err = 0; 1059 out: 1060 io_ctl_free(&io_ctl); 1061 if (err) { 1062 invalidate_inode_pages2(inode->i_mapping); 1063 BTRFS_I(inode)->generation = 0; 1064 } 1065 btrfs_update_inode(trans, root, inode); 1066 return err; 1067 1068 out_nospc: 1069 list_for_each_safe(pos, n, &bitmap_list) { 1070 struct btrfs_free_space *entry = 1071 list_entry(pos, struct btrfs_free_space, list); 1072 list_del_init(&entry->list); 1073 } 1074 io_ctl_drop_pages(&io_ctl); 1075 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1076 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1077 goto out; 1078 } 1079 1080 int btrfs_write_out_cache(struct btrfs_root *root, 1081 struct btrfs_trans_handle *trans, 1082 struct btrfs_block_group_cache *block_group, 1083 struct btrfs_path *path) 1084 { 1085 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1086 struct inode *inode; 1087 int ret = 0; 1088 1089 root = root->fs_info->tree_root; 1090 1091 spin_lock(&block_group->lock); 1092 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1093 spin_unlock(&block_group->lock); 1094 return 0; 1095 } 1096 spin_unlock(&block_group->lock); 1097 1098 inode = lookup_free_space_inode(root, block_group, path); 1099 if (IS_ERR(inode)) 1100 return 0; 1101 1102 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1103 path, block_group->key.objectid); 1104 if (ret) { 1105 spin_lock(&block_group->lock); 1106 block_group->disk_cache_state = BTRFS_DC_ERROR; 1107 spin_unlock(&block_group->lock); 1108 ret = 0; 1109 #ifdef DEBUG 1110 btrfs_err(root->fs_info, 1111 "failed to write free space cache for block group %llu", 1112 block_group->key.objectid); 1113 #endif 1114 } 1115 1116 iput(inode); 1117 return ret; 1118 } 1119 1120 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1121 u64 offset) 1122 { 1123 BUG_ON(offset < bitmap_start); 1124 offset -= bitmap_start; 1125 return (unsigned long)(div_u64(offset, unit)); 1126 } 1127 1128 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1129 { 1130 return (unsigned long)(div_u64(bytes, unit)); 1131 } 1132 1133 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1134 u64 offset) 1135 { 1136 u64 bitmap_start; 1137 u64 bytes_per_bitmap; 1138 1139 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1140 bitmap_start = offset - ctl->start; 1141 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1142 bitmap_start *= bytes_per_bitmap; 1143 bitmap_start += ctl->start; 1144 1145 return bitmap_start; 1146 } 1147 1148 static int tree_insert_offset(struct rb_root *root, u64 offset, 1149 struct rb_node *node, int bitmap) 1150 { 1151 struct rb_node **p = &root->rb_node; 1152 struct rb_node *parent = NULL; 1153 struct btrfs_free_space *info; 1154 1155 while (*p) { 1156 parent = *p; 1157 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1158 1159 if (offset < info->offset) { 1160 p = &(*p)->rb_left; 1161 } else if (offset > info->offset) { 1162 p = &(*p)->rb_right; 1163 } else { 1164 /* 1165 * we could have a bitmap entry and an extent entry 1166 * share the same offset. If this is the case, we want 1167 * the extent entry to always be found first if we do a 1168 * linear search through the tree, since we want to have 1169 * the quickest allocation time, and allocating from an 1170 * extent is faster than allocating from a bitmap. So 1171 * if we're inserting a bitmap and we find an entry at 1172 * this offset, we want to go right, or after this entry 1173 * logically. If we are inserting an extent and we've 1174 * found a bitmap, we want to go left, or before 1175 * logically. 1176 */ 1177 if (bitmap) { 1178 if (info->bitmap) { 1179 WARN_ON_ONCE(1); 1180 return -EEXIST; 1181 } 1182 p = &(*p)->rb_right; 1183 } else { 1184 if (!info->bitmap) { 1185 WARN_ON_ONCE(1); 1186 return -EEXIST; 1187 } 1188 p = &(*p)->rb_left; 1189 } 1190 } 1191 } 1192 1193 rb_link_node(node, parent, p); 1194 rb_insert_color(node, root); 1195 1196 return 0; 1197 } 1198 1199 /* 1200 * searches the tree for the given offset. 1201 * 1202 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1203 * want a section that has at least bytes size and comes at or after the given 1204 * offset. 1205 */ 1206 static struct btrfs_free_space * 1207 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1208 u64 offset, int bitmap_only, int fuzzy) 1209 { 1210 struct rb_node *n = ctl->free_space_offset.rb_node; 1211 struct btrfs_free_space *entry, *prev = NULL; 1212 1213 /* find entry that is closest to the 'offset' */ 1214 while (1) { 1215 if (!n) { 1216 entry = NULL; 1217 break; 1218 } 1219 1220 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1221 prev = entry; 1222 1223 if (offset < entry->offset) 1224 n = n->rb_left; 1225 else if (offset > entry->offset) 1226 n = n->rb_right; 1227 else 1228 break; 1229 } 1230 1231 if (bitmap_only) { 1232 if (!entry) 1233 return NULL; 1234 if (entry->bitmap) 1235 return entry; 1236 1237 /* 1238 * bitmap entry and extent entry may share same offset, 1239 * in that case, bitmap entry comes after extent entry. 1240 */ 1241 n = rb_next(n); 1242 if (!n) 1243 return NULL; 1244 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1245 if (entry->offset != offset) 1246 return NULL; 1247 1248 WARN_ON(!entry->bitmap); 1249 return entry; 1250 } else if (entry) { 1251 if (entry->bitmap) { 1252 /* 1253 * if previous extent entry covers the offset, 1254 * we should return it instead of the bitmap entry 1255 */ 1256 n = rb_prev(&entry->offset_index); 1257 if (n) { 1258 prev = rb_entry(n, struct btrfs_free_space, 1259 offset_index); 1260 if (!prev->bitmap && 1261 prev->offset + prev->bytes > offset) 1262 entry = prev; 1263 } 1264 } 1265 return entry; 1266 } 1267 1268 if (!prev) 1269 return NULL; 1270 1271 /* find last entry before the 'offset' */ 1272 entry = prev; 1273 if (entry->offset > offset) { 1274 n = rb_prev(&entry->offset_index); 1275 if (n) { 1276 entry = rb_entry(n, struct btrfs_free_space, 1277 offset_index); 1278 BUG_ON(entry->offset > offset); 1279 } else { 1280 if (fuzzy) 1281 return entry; 1282 else 1283 return NULL; 1284 } 1285 } 1286 1287 if (entry->bitmap) { 1288 n = rb_prev(&entry->offset_index); 1289 if (n) { 1290 prev = rb_entry(n, struct btrfs_free_space, 1291 offset_index); 1292 if (!prev->bitmap && 1293 prev->offset + prev->bytes > offset) 1294 return prev; 1295 } 1296 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1297 return entry; 1298 } else if (entry->offset + entry->bytes > offset) 1299 return entry; 1300 1301 if (!fuzzy) 1302 return NULL; 1303 1304 while (1) { 1305 if (entry->bitmap) { 1306 if (entry->offset + BITS_PER_BITMAP * 1307 ctl->unit > offset) 1308 break; 1309 } else { 1310 if (entry->offset + entry->bytes > offset) 1311 break; 1312 } 1313 1314 n = rb_next(&entry->offset_index); 1315 if (!n) 1316 return NULL; 1317 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1318 } 1319 return entry; 1320 } 1321 1322 static inline void 1323 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1324 struct btrfs_free_space *info) 1325 { 1326 rb_erase(&info->offset_index, &ctl->free_space_offset); 1327 ctl->free_extents--; 1328 } 1329 1330 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1331 struct btrfs_free_space *info) 1332 { 1333 __unlink_free_space(ctl, info); 1334 ctl->free_space -= info->bytes; 1335 } 1336 1337 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1338 struct btrfs_free_space *info) 1339 { 1340 int ret = 0; 1341 1342 BUG_ON(!info->bitmap && !info->bytes); 1343 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1344 &info->offset_index, (info->bitmap != NULL)); 1345 if (ret) 1346 return ret; 1347 1348 ctl->free_space += info->bytes; 1349 ctl->free_extents++; 1350 return ret; 1351 } 1352 1353 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1354 { 1355 struct btrfs_block_group_cache *block_group = ctl->private; 1356 u64 max_bytes; 1357 u64 bitmap_bytes; 1358 u64 extent_bytes; 1359 u64 size = block_group->key.offset; 1360 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1361 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1362 1363 max_bitmaps = max(max_bitmaps, 1); 1364 1365 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1366 1367 /* 1368 * The goal is to keep the total amount of memory used per 1gb of space 1369 * at or below 32k, so we need to adjust how much memory we allow to be 1370 * used by extent based free space tracking 1371 */ 1372 if (size < 1024 * 1024 * 1024) 1373 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1374 else 1375 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1376 div64_u64(size, 1024 * 1024 * 1024); 1377 1378 /* 1379 * we want to account for 1 more bitmap than what we have so we can make 1380 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1381 * we add more bitmaps. 1382 */ 1383 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1384 1385 if (bitmap_bytes >= max_bytes) { 1386 ctl->extents_thresh = 0; 1387 return; 1388 } 1389 1390 /* 1391 * we want the extent entry threshold to always be at most 1/2 the maxw 1392 * bytes we can have, or whatever is less than that. 1393 */ 1394 extent_bytes = max_bytes - bitmap_bytes; 1395 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1396 1397 ctl->extents_thresh = 1398 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1399 } 1400 1401 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1402 struct btrfs_free_space *info, 1403 u64 offset, u64 bytes) 1404 { 1405 unsigned long start, count; 1406 1407 start = offset_to_bit(info->offset, ctl->unit, offset); 1408 count = bytes_to_bits(bytes, ctl->unit); 1409 BUG_ON(start + count > BITS_PER_BITMAP); 1410 1411 bitmap_clear(info->bitmap, start, count); 1412 1413 info->bytes -= bytes; 1414 } 1415 1416 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1417 struct btrfs_free_space *info, u64 offset, 1418 u64 bytes) 1419 { 1420 __bitmap_clear_bits(ctl, info, offset, bytes); 1421 ctl->free_space -= bytes; 1422 } 1423 1424 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1425 struct btrfs_free_space *info, u64 offset, 1426 u64 bytes) 1427 { 1428 unsigned long start, count; 1429 1430 start = offset_to_bit(info->offset, ctl->unit, offset); 1431 count = bytes_to_bits(bytes, ctl->unit); 1432 BUG_ON(start + count > BITS_PER_BITMAP); 1433 1434 bitmap_set(info->bitmap, start, count); 1435 1436 info->bytes += bytes; 1437 ctl->free_space += bytes; 1438 } 1439 1440 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1441 struct btrfs_free_space *bitmap_info, u64 *offset, 1442 u64 *bytes) 1443 { 1444 unsigned long found_bits = 0; 1445 unsigned long bits, i; 1446 unsigned long next_zero; 1447 1448 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1449 max_t(u64, *offset, bitmap_info->offset)); 1450 bits = bytes_to_bits(*bytes, ctl->unit); 1451 1452 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1453 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1454 BITS_PER_BITMAP, i); 1455 if ((next_zero - i) >= bits) { 1456 found_bits = next_zero - i; 1457 break; 1458 } 1459 i = next_zero; 1460 } 1461 1462 if (found_bits) { 1463 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1464 *bytes = (u64)(found_bits) * ctl->unit; 1465 return 0; 1466 } 1467 1468 return -1; 1469 } 1470 1471 static struct btrfs_free_space * 1472 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1473 unsigned long align) 1474 { 1475 struct btrfs_free_space *entry; 1476 struct rb_node *node; 1477 u64 ctl_off; 1478 u64 tmp; 1479 u64 align_off; 1480 int ret; 1481 1482 if (!ctl->free_space_offset.rb_node) 1483 return NULL; 1484 1485 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1486 if (!entry) 1487 return NULL; 1488 1489 for (node = &entry->offset_index; node; node = rb_next(node)) { 1490 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1491 if (entry->bytes < *bytes) 1492 continue; 1493 1494 /* make sure the space returned is big enough 1495 * to match our requested alignment 1496 */ 1497 if (*bytes >= align) { 1498 ctl_off = entry->offset - ctl->start; 1499 tmp = ctl_off + align - 1;; 1500 do_div(tmp, align); 1501 tmp = tmp * align + ctl->start; 1502 align_off = tmp - entry->offset; 1503 } else { 1504 align_off = 0; 1505 tmp = entry->offset; 1506 } 1507 1508 if (entry->bytes < *bytes + align_off) 1509 continue; 1510 1511 if (entry->bitmap) { 1512 ret = search_bitmap(ctl, entry, &tmp, bytes); 1513 if (!ret) { 1514 *offset = tmp; 1515 return entry; 1516 } 1517 continue; 1518 } 1519 1520 *offset = tmp; 1521 *bytes = entry->bytes - align_off; 1522 return entry; 1523 } 1524 1525 return NULL; 1526 } 1527 1528 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1529 struct btrfs_free_space *info, u64 offset) 1530 { 1531 info->offset = offset_to_bitmap(ctl, offset); 1532 info->bytes = 0; 1533 INIT_LIST_HEAD(&info->list); 1534 link_free_space(ctl, info); 1535 ctl->total_bitmaps++; 1536 1537 ctl->op->recalc_thresholds(ctl); 1538 } 1539 1540 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1541 struct btrfs_free_space *bitmap_info) 1542 { 1543 unlink_free_space(ctl, bitmap_info); 1544 kfree(bitmap_info->bitmap); 1545 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1546 ctl->total_bitmaps--; 1547 ctl->op->recalc_thresholds(ctl); 1548 } 1549 1550 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1551 struct btrfs_free_space *bitmap_info, 1552 u64 *offset, u64 *bytes) 1553 { 1554 u64 end; 1555 u64 search_start, search_bytes; 1556 int ret; 1557 1558 again: 1559 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1560 1561 /* 1562 * We need to search for bits in this bitmap. We could only cover some 1563 * of the extent in this bitmap thanks to how we add space, so we need 1564 * to search for as much as it as we can and clear that amount, and then 1565 * go searching for the next bit. 1566 */ 1567 search_start = *offset; 1568 search_bytes = ctl->unit; 1569 search_bytes = min(search_bytes, end - search_start + 1); 1570 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1571 if (ret < 0 || search_start != *offset) 1572 return -EINVAL; 1573 1574 /* We may have found more bits than what we need */ 1575 search_bytes = min(search_bytes, *bytes); 1576 1577 /* Cannot clear past the end of the bitmap */ 1578 search_bytes = min(search_bytes, end - search_start + 1); 1579 1580 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1581 *offset += search_bytes; 1582 *bytes -= search_bytes; 1583 1584 if (*bytes) { 1585 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1586 if (!bitmap_info->bytes) 1587 free_bitmap(ctl, bitmap_info); 1588 1589 /* 1590 * no entry after this bitmap, but we still have bytes to 1591 * remove, so something has gone wrong. 1592 */ 1593 if (!next) 1594 return -EINVAL; 1595 1596 bitmap_info = rb_entry(next, struct btrfs_free_space, 1597 offset_index); 1598 1599 /* 1600 * if the next entry isn't a bitmap we need to return to let the 1601 * extent stuff do its work. 1602 */ 1603 if (!bitmap_info->bitmap) 1604 return -EAGAIN; 1605 1606 /* 1607 * Ok the next item is a bitmap, but it may not actually hold 1608 * the information for the rest of this free space stuff, so 1609 * look for it, and if we don't find it return so we can try 1610 * everything over again. 1611 */ 1612 search_start = *offset; 1613 search_bytes = ctl->unit; 1614 ret = search_bitmap(ctl, bitmap_info, &search_start, 1615 &search_bytes); 1616 if (ret < 0 || search_start != *offset) 1617 return -EAGAIN; 1618 1619 goto again; 1620 } else if (!bitmap_info->bytes) 1621 free_bitmap(ctl, bitmap_info); 1622 1623 return 0; 1624 } 1625 1626 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1627 struct btrfs_free_space *info, u64 offset, 1628 u64 bytes) 1629 { 1630 u64 bytes_to_set = 0; 1631 u64 end; 1632 1633 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1634 1635 bytes_to_set = min(end - offset, bytes); 1636 1637 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1638 1639 return bytes_to_set; 1640 1641 } 1642 1643 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1644 struct btrfs_free_space *info) 1645 { 1646 struct btrfs_block_group_cache *block_group = ctl->private; 1647 1648 /* 1649 * If we are below the extents threshold then we can add this as an 1650 * extent, and don't have to deal with the bitmap 1651 */ 1652 if (ctl->free_extents < ctl->extents_thresh) { 1653 /* 1654 * If this block group has some small extents we don't want to 1655 * use up all of our free slots in the cache with them, we want 1656 * to reserve them to larger extents, however if we have plent 1657 * of cache left then go ahead an dadd them, no sense in adding 1658 * the overhead of a bitmap if we don't have to. 1659 */ 1660 if (info->bytes <= block_group->sectorsize * 4) { 1661 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1662 return false; 1663 } else { 1664 return false; 1665 } 1666 } 1667 1668 /* 1669 * The original block groups from mkfs can be really small, like 8 1670 * megabytes, so don't bother with a bitmap for those entries. However 1671 * some block groups can be smaller than what a bitmap would cover but 1672 * are still large enough that they could overflow the 32k memory limit, 1673 * so allow those block groups to still be allowed to have a bitmap 1674 * entry. 1675 */ 1676 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 1677 return false; 1678 1679 return true; 1680 } 1681 1682 static struct btrfs_free_space_op free_space_op = { 1683 .recalc_thresholds = recalculate_thresholds, 1684 .use_bitmap = use_bitmap, 1685 }; 1686 1687 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1688 struct btrfs_free_space *info) 1689 { 1690 struct btrfs_free_space *bitmap_info; 1691 struct btrfs_block_group_cache *block_group = NULL; 1692 int added = 0; 1693 u64 bytes, offset, bytes_added; 1694 int ret; 1695 1696 bytes = info->bytes; 1697 offset = info->offset; 1698 1699 if (!ctl->op->use_bitmap(ctl, info)) 1700 return 0; 1701 1702 if (ctl->op == &free_space_op) 1703 block_group = ctl->private; 1704 again: 1705 /* 1706 * Since we link bitmaps right into the cluster we need to see if we 1707 * have a cluster here, and if so and it has our bitmap we need to add 1708 * the free space to that bitmap. 1709 */ 1710 if (block_group && !list_empty(&block_group->cluster_list)) { 1711 struct btrfs_free_cluster *cluster; 1712 struct rb_node *node; 1713 struct btrfs_free_space *entry; 1714 1715 cluster = list_entry(block_group->cluster_list.next, 1716 struct btrfs_free_cluster, 1717 block_group_list); 1718 spin_lock(&cluster->lock); 1719 node = rb_first(&cluster->root); 1720 if (!node) { 1721 spin_unlock(&cluster->lock); 1722 goto no_cluster_bitmap; 1723 } 1724 1725 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1726 if (!entry->bitmap) { 1727 spin_unlock(&cluster->lock); 1728 goto no_cluster_bitmap; 1729 } 1730 1731 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1732 bytes_added = add_bytes_to_bitmap(ctl, entry, 1733 offset, bytes); 1734 bytes -= bytes_added; 1735 offset += bytes_added; 1736 } 1737 spin_unlock(&cluster->lock); 1738 if (!bytes) { 1739 ret = 1; 1740 goto out; 1741 } 1742 } 1743 1744 no_cluster_bitmap: 1745 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1746 1, 0); 1747 if (!bitmap_info) { 1748 BUG_ON(added); 1749 goto new_bitmap; 1750 } 1751 1752 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1753 bytes -= bytes_added; 1754 offset += bytes_added; 1755 added = 0; 1756 1757 if (!bytes) { 1758 ret = 1; 1759 goto out; 1760 } else 1761 goto again; 1762 1763 new_bitmap: 1764 if (info && info->bitmap) { 1765 add_new_bitmap(ctl, info, offset); 1766 added = 1; 1767 info = NULL; 1768 goto again; 1769 } else { 1770 spin_unlock(&ctl->tree_lock); 1771 1772 /* no pre-allocated info, allocate a new one */ 1773 if (!info) { 1774 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1775 GFP_NOFS); 1776 if (!info) { 1777 spin_lock(&ctl->tree_lock); 1778 ret = -ENOMEM; 1779 goto out; 1780 } 1781 } 1782 1783 /* allocate the bitmap */ 1784 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1785 spin_lock(&ctl->tree_lock); 1786 if (!info->bitmap) { 1787 ret = -ENOMEM; 1788 goto out; 1789 } 1790 goto again; 1791 } 1792 1793 out: 1794 if (info) { 1795 if (info->bitmap) 1796 kfree(info->bitmap); 1797 kmem_cache_free(btrfs_free_space_cachep, info); 1798 } 1799 1800 return ret; 1801 } 1802 1803 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1804 struct btrfs_free_space *info, bool update_stat) 1805 { 1806 struct btrfs_free_space *left_info; 1807 struct btrfs_free_space *right_info; 1808 bool merged = false; 1809 u64 offset = info->offset; 1810 u64 bytes = info->bytes; 1811 1812 /* 1813 * first we want to see if there is free space adjacent to the range we 1814 * are adding, if there is remove that struct and add a new one to 1815 * cover the entire range 1816 */ 1817 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1818 if (right_info && rb_prev(&right_info->offset_index)) 1819 left_info = rb_entry(rb_prev(&right_info->offset_index), 1820 struct btrfs_free_space, offset_index); 1821 else 1822 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1823 1824 if (right_info && !right_info->bitmap) { 1825 if (update_stat) 1826 unlink_free_space(ctl, right_info); 1827 else 1828 __unlink_free_space(ctl, right_info); 1829 info->bytes += right_info->bytes; 1830 kmem_cache_free(btrfs_free_space_cachep, right_info); 1831 merged = true; 1832 } 1833 1834 if (left_info && !left_info->bitmap && 1835 left_info->offset + left_info->bytes == offset) { 1836 if (update_stat) 1837 unlink_free_space(ctl, left_info); 1838 else 1839 __unlink_free_space(ctl, left_info); 1840 info->offset = left_info->offset; 1841 info->bytes += left_info->bytes; 1842 kmem_cache_free(btrfs_free_space_cachep, left_info); 1843 merged = true; 1844 } 1845 1846 return merged; 1847 } 1848 1849 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1850 u64 offset, u64 bytes) 1851 { 1852 struct btrfs_free_space *info; 1853 int ret = 0; 1854 1855 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1856 if (!info) 1857 return -ENOMEM; 1858 1859 info->offset = offset; 1860 info->bytes = bytes; 1861 1862 spin_lock(&ctl->tree_lock); 1863 1864 if (try_merge_free_space(ctl, info, true)) 1865 goto link; 1866 1867 /* 1868 * There was no extent directly to the left or right of this new 1869 * extent then we know we're going to have to allocate a new extent, so 1870 * before we do that see if we need to drop this into a bitmap 1871 */ 1872 ret = insert_into_bitmap(ctl, info); 1873 if (ret < 0) { 1874 goto out; 1875 } else if (ret) { 1876 ret = 0; 1877 goto out; 1878 } 1879 link: 1880 ret = link_free_space(ctl, info); 1881 if (ret) 1882 kmem_cache_free(btrfs_free_space_cachep, info); 1883 out: 1884 spin_unlock(&ctl->tree_lock); 1885 1886 if (ret) { 1887 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1888 BUG_ON(ret == -EEXIST); 1889 } 1890 1891 return ret; 1892 } 1893 1894 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1895 u64 offset, u64 bytes) 1896 { 1897 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1898 struct btrfs_free_space *info; 1899 int ret; 1900 bool re_search = false; 1901 1902 spin_lock(&ctl->tree_lock); 1903 1904 again: 1905 ret = 0; 1906 if (!bytes) 1907 goto out_lock; 1908 1909 info = tree_search_offset(ctl, offset, 0, 0); 1910 if (!info) { 1911 /* 1912 * oops didn't find an extent that matched the space we wanted 1913 * to remove, look for a bitmap instead 1914 */ 1915 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1916 1, 0); 1917 if (!info) { 1918 /* 1919 * If we found a partial bit of our free space in a 1920 * bitmap but then couldn't find the other part this may 1921 * be a problem, so WARN about it. 1922 */ 1923 WARN_ON(re_search); 1924 goto out_lock; 1925 } 1926 } 1927 1928 re_search = false; 1929 if (!info->bitmap) { 1930 unlink_free_space(ctl, info); 1931 if (offset == info->offset) { 1932 u64 to_free = min(bytes, info->bytes); 1933 1934 info->bytes -= to_free; 1935 info->offset += to_free; 1936 if (info->bytes) { 1937 ret = link_free_space(ctl, info); 1938 WARN_ON(ret); 1939 } else { 1940 kmem_cache_free(btrfs_free_space_cachep, info); 1941 } 1942 1943 offset += to_free; 1944 bytes -= to_free; 1945 goto again; 1946 } else { 1947 u64 old_end = info->bytes + info->offset; 1948 1949 info->bytes = offset - info->offset; 1950 ret = link_free_space(ctl, info); 1951 WARN_ON(ret); 1952 if (ret) 1953 goto out_lock; 1954 1955 /* Not enough bytes in this entry to satisfy us */ 1956 if (old_end < offset + bytes) { 1957 bytes -= old_end - offset; 1958 offset = old_end; 1959 goto again; 1960 } else if (old_end == offset + bytes) { 1961 /* all done */ 1962 goto out_lock; 1963 } 1964 spin_unlock(&ctl->tree_lock); 1965 1966 ret = btrfs_add_free_space(block_group, offset + bytes, 1967 old_end - (offset + bytes)); 1968 WARN_ON(ret); 1969 goto out; 1970 } 1971 } 1972 1973 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1974 if (ret == -EAGAIN) { 1975 re_search = true; 1976 goto again; 1977 } 1978 out_lock: 1979 spin_unlock(&ctl->tree_lock); 1980 out: 1981 return ret; 1982 } 1983 1984 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1985 u64 bytes) 1986 { 1987 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1988 struct btrfs_free_space *info; 1989 struct rb_node *n; 1990 int count = 0; 1991 1992 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1993 info = rb_entry(n, struct btrfs_free_space, offset_index); 1994 if (info->bytes >= bytes && !block_group->ro) 1995 count++; 1996 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1997 (unsigned long long)info->offset, 1998 (unsigned long long)info->bytes, 1999 (info->bitmap) ? "yes" : "no"); 2000 } 2001 printk(KERN_INFO "block group has cluster?: %s\n", 2002 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2003 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 2004 "\n", count); 2005 } 2006 2007 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2008 { 2009 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2010 2011 spin_lock_init(&ctl->tree_lock); 2012 ctl->unit = block_group->sectorsize; 2013 ctl->start = block_group->key.objectid; 2014 ctl->private = block_group; 2015 ctl->op = &free_space_op; 2016 2017 /* 2018 * we only want to have 32k of ram per block group for keeping 2019 * track of free space, and if we pass 1/2 of that we want to 2020 * start converting things over to using bitmaps 2021 */ 2022 ctl->extents_thresh = ((1024 * 32) / 2) / 2023 sizeof(struct btrfs_free_space); 2024 } 2025 2026 /* 2027 * for a given cluster, put all of its extents back into the free 2028 * space cache. If the block group passed doesn't match the block group 2029 * pointed to by the cluster, someone else raced in and freed the 2030 * cluster already. In that case, we just return without changing anything 2031 */ 2032 static int 2033 __btrfs_return_cluster_to_free_space( 2034 struct btrfs_block_group_cache *block_group, 2035 struct btrfs_free_cluster *cluster) 2036 { 2037 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2038 struct btrfs_free_space *entry; 2039 struct rb_node *node; 2040 2041 spin_lock(&cluster->lock); 2042 if (cluster->block_group != block_group) 2043 goto out; 2044 2045 cluster->block_group = NULL; 2046 cluster->window_start = 0; 2047 list_del_init(&cluster->block_group_list); 2048 2049 node = rb_first(&cluster->root); 2050 while (node) { 2051 bool bitmap; 2052 2053 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2054 node = rb_next(&entry->offset_index); 2055 rb_erase(&entry->offset_index, &cluster->root); 2056 2057 bitmap = (entry->bitmap != NULL); 2058 if (!bitmap) 2059 try_merge_free_space(ctl, entry, false); 2060 tree_insert_offset(&ctl->free_space_offset, 2061 entry->offset, &entry->offset_index, bitmap); 2062 } 2063 cluster->root = RB_ROOT; 2064 2065 out: 2066 spin_unlock(&cluster->lock); 2067 btrfs_put_block_group(block_group); 2068 return 0; 2069 } 2070 2071 static void __btrfs_remove_free_space_cache_locked( 2072 struct btrfs_free_space_ctl *ctl) 2073 { 2074 struct btrfs_free_space *info; 2075 struct rb_node *node; 2076 2077 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2078 info = rb_entry(node, struct btrfs_free_space, offset_index); 2079 if (!info->bitmap) { 2080 unlink_free_space(ctl, info); 2081 kmem_cache_free(btrfs_free_space_cachep, info); 2082 } else { 2083 free_bitmap(ctl, info); 2084 } 2085 if (need_resched()) { 2086 spin_unlock(&ctl->tree_lock); 2087 cond_resched(); 2088 spin_lock(&ctl->tree_lock); 2089 } 2090 } 2091 } 2092 2093 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2094 { 2095 spin_lock(&ctl->tree_lock); 2096 __btrfs_remove_free_space_cache_locked(ctl); 2097 spin_unlock(&ctl->tree_lock); 2098 } 2099 2100 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2101 { 2102 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2103 struct btrfs_free_cluster *cluster; 2104 struct list_head *head; 2105 2106 spin_lock(&ctl->tree_lock); 2107 while ((head = block_group->cluster_list.next) != 2108 &block_group->cluster_list) { 2109 cluster = list_entry(head, struct btrfs_free_cluster, 2110 block_group_list); 2111 2112 WARN_ON(cluster->block_group != block_group); 2113 __btrfs_return_cluster_to_free_space(block_group, cluster); 2114 if (need_resched()) { 2115 spin_unlock(&ctl->tree_lock); 2116 cond_resched(); 2117 spin_lock(&ctl->tree_lock); 2118 } 2119 } 2120 __btrfs_remove_free_space_cache_locked(ctl); 2121 spin_unlock(&ctl->tree_lock); 2122 2123 } 2124 2125 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2126 u64 offset, u64 bytes, u64 empty_size) 2127 { 2128 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2129 struct btrfs_free_space *entry = NULL; 2130 u64 bytes_search = bytes + empty_size; 2131 u64 ret = 0; 2132 u64 align_gap = 0; 2133 u64 align_gap_len = 0; 2134 2135 spin_lock(&ctl->tree_lock); 2136 entry = find_free_space(ctl, &offset, &bytes_search, 2137 block_group->full_stripe_len); 2138 if (!entry) 2139 goto out; 2140 2141 ret = offset; 2142 if (entry->bitmap) { 2143 bitmap_clear_bits(ctl, entry, offset, bytes); 2144 if (!entry->bytes) 2145 free_bitmap(ctl, entry); 2146 } else { 2147 2148 unlink_free_space(ctl, entry); 2149 align_gap_len = offset - entry->offset; 2150 align_gap = entry->offset; 2151 2152 entry->offset = offset + bytes; 2153 WARN_ON(entry->bytes < bytes + align_gap_len); 2154 2155 entry->bytes -= bytes + align_gap_len; 2156 if (!entry->bytes) 2157 kmem_cache_free(btrfs_free_space_cachep, entry); 2158 else 2159 link_free_space(ctl, entry); 2160 } 2161 2162 out: 2163 spin_unlock(&ctl->tree_lock); 2164 2165 if (align_gap_len) 2166 __btrfs_add_free_space(ctl, align_gap, align_gap_len); 2167 return ret; 2168 } 2169 2170 /* 2171 * given a cluster, put all of its extents back into the free space 2172 * cache. If a block group is passed, this function will only free 2173 * a cluster that belongs to the passed block group. 2174 * 2175 * Otherwise, it'll get a reference on the block group pointed to by the 2176 * cluster and remove the cluster from it. 2177 */ 2178 int btrfs_return_cluster_to_free_space( 2179 struct btrfs_block_group_cache *block_group, 2180 struct btrfs_free_cluster *cluster) 2181 { 2182 struct btrfs_free_space_ctl *ctl; 2183 int ret; 2184 2185 /* first, get a safe pointer to the block group */ 2186 spin_lock(&cluster->lock); 2187 if (!block_group) { 2188 block_group = cluster->block_group; 2189 if (!block_group) { 2190 spin_unlock(&cluster->lock); 2191 return 0; 2192 } 2193 } else if (cluster->block_group != block_group) { 2194 /* someone else has already freed it don't redo their work */ 2195 spin_unlock(&cluster->lock); 2196 return 0; 2197 } 2198 atomic_inc(&block_group->count); 2199 spin_unlock(&cluster->lock); 2200 2201 ctl = block_group->free_space_ctl; 2202 2203 /* now return any extents the cluster had on it */ 2204 spin_lock(&ctl->tree_lock); 2205 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2206 spin_unlock(&ctl->tree_lock); 2207 2208 /* finally drop our ref */ 2209 btrfs_put_block_group(block_group); 2210 return ret; 2211 } 2212 2213 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2214 struct btrfs_free_cluster *cluster, 2215 struct btrfs_free_space *entry, 2216 u64 bytes, u64 min_start) 2217 { 2218 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2219 int err; 2220 u64 search_start = cluster->window_start; 2221 u64 search_bytes = bytes; 2222 u64 ret = 0; 2223 2224 search_start = min_start; 2225 search_bytes = bytes; 2226 2227 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2228 if (err) 2229 return 0; 2230 2231 ret = search_start; 2232 __bitmap_clear_bits(ctl, entry, ret, bytes); 2233 2234 return ret; 2235 } 2236 2237 /* 2238 * given a cluster, try to allocate 'bytes' from it, returns 0 2239 * if it couldn't find anything suitably large, or a logical disk offset 2240 * if things worked out 2241 */ 2242 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2243 struct btrfs_free_cluster *cluster, u64 bytes, 2244 u64 min_start) 2245 { 2246 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2247 struct btrfs_free_space *entry = NULL; 2248 struct rb_node *node; 2249 u64 ret = 0; 2250 2251 spin_lock(&cluster->lock); 2252 if (bytes > cluster->max_size) 2253 goto out; 2254 2255 if (cluster->block_group != block_group) 2256 goto out; 2257 2258 node = rb_first(&cluster->root); 2259 if (!node) 2260 goto out; 2261 2262 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2263 while(1) { 2264 if (entry->bytes < bytes || 2265 (!entry->bitmap && entry->offset < min_start)) { 2266 node = rb_next(&entry->offset_index); 2267 if (!node) 2268 break; 2269 entry = rb_entry(node, struct btrfs_free_space, 2270 offset_index); 2271 continue; 2272 } 2273 2274 if (entry->bitmap) { 2275 ret = btrfs_alloc_from_bitmap(block_group, 2276 cluster, entry, bytes, 2277 cluster->window_start); 2278 if (ret == 0) { 2279 node = rb_next(&entry->offset_index); 2280 if (!node) 2281 break; 2282 entry = rb_entry(node, struct btrfs_free_space, 2283 offset_index); 2284 continue; 2285 } 2286 cluster->window_start += bytes; 2287 } else { 2288 ret = entry->offset; 2289 2290 entry->offset += bytes; 2291 entry->bytes -= bytes; 2292 } 2293 2294 if (entry->bytes == 0) 2295 rb_erase(&entry->offset_index, &cluster->root); 2296 break; 2297 } 2298 out: 2299 spin_unlock(&cluster->lock); 2300 2301 if (!ret) 2302 return 0; 2303 2304 spin_lock(&ctl->tree_lock); 2305 2306 ctl->free_space -= bytes; 2307 if (entry->bytes == 0) { 2308 ctl->free_extents--; 2309 if (entry->bitmap) { 2310 kfree(entry->bitmap); 2311 ctl->total_bitmaps--; 2312 ctl->op->recalc_thresholds(ctl); 2313 } 2314 kmem_cache_free(btrfs_free_space_cachep, entry); 2315 } 2316 2317 spin_unlock(&ctl->tree_lock); 2318 2319 return ret; 2320 } 2321 2322 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2323 struct btrfs_free_space *entry, 2324 struct btrfs_free_cluster *cluster, 2325 u64 offset, u64 bytes, 2326 u64 cont1_bytes, u64 min_bytes) 2327 { 2328 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2329 unsigned long next_zero; 2330 unsigned long i; 2331 unsigned long want_bits; 2332 unsigned long min_bits; 2333 unsigned long found_bits; 2334 unsigned long start = 0; 2335 unsigned long total_found = 0; 2336 int ret; 2337 2338 i = offset_to_bit(entry->offset, ctl->unit, 2339 max_t(u64, offset, entry->offset)); 2340 want_bits = bytes_to_bits(bytes, ctl->unit); 2341 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2342 2343 again: 2344 found_bits = 0; 2345 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2346 next_zero = find_next_zero_bit(entry->bitmap, 2347 BITS_PER_BITMAP, i); 2348 if (next_zero - i >= min_bits) { 2349 found_bits = next_zero - i; 2350 break; 2351 } 2352 i = next_zero; 2353 } 2354 2355 if (!found_bits) 2356 return -ENOSPC; 2357 2358 if (!total_found) { 2359 start = i; 2360 cluster->max_size = 0; 2361 } 2362 2363 total_found += found_bits; 2364 2365 if (cluster->max_size < found_bits * ctl->unit) 2366 cluster->max_size = found_bits * ctl->unit; 2367 2368 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2369 i = next_zero + 1; 2370 goto again; 2371 } 2372 2373 cluster->window_start = start * ctl->unit + entry->offset; 2374 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2375 ret = tree_insert_offset(&cluster->root, entry->offset, 2376 &entry->offset_index, 1); 2377 BUG_ON(ret); /* -EEXIST; Logic error */ 2378 2379 trace_btrfs_setup_cluster(block_group, cluster, 2380 total_found * ctl->unit, 1); 2381 return 0; 2382 } 2383 2384 /* 2385 * This searches the block group for just extents to fill the cluster with. 2386 * Try to find a cluster with at least bytes total bytes, at least one 2387 * extent of cont1_bytes, and other clusters of at least min_bytes. 2388 */ 2389 static noinline int 2390 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2391 struct btrfs_free_cluster *cluster, 2392 struct list_head *bitmaps, u64 offset, u64 bytes, 2393 u64 cont1_bytes, u64 min_bytes) 2394 { 2395 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2396 struct btrfs_free_space *first = NULL; 2397 struct btrfs_free_space *entry = NULL; 2398 struct btrfs_free_space *last; 2399 struct rb_node *node; 2400 u64 window_start; 2401 u64 window_free; 2402 u64 max_extent; 2403 u64 total_size = 0; 2404 2405 entry = tree_search_offset(ctl, offset, 0, 1); 2406 if (!entry) 2407 return -ENOSPC; 2408 2409 /* 2410 * We don't want bitmaps, so just move along until we find a normal 2411 * extent entry. 2412 */ 2413 while (entry->bitmap || entry->bytes < min_bytes) { 2414 if (entry->bitmap && list_empty(&entry->list)) 2415 list_add_tail(&entry->list, bitmaps); 2416 node = rb_next(&entry->offset_index); 2417 if (!node) 2418 return -ENOSPC; 2419 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2420 } 2421 2422 window_start = entry->offset; 2423 window_free = entry->bytes; 2424 max_extent = entry->bytes; 2425 first = entry; 2426 last = entry; 2427 2428 for (node = rb_next(&entry->offset_index); node; 2429 node = rb_next(&entry->offset_index)) { 2430 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2431 2432 if (entry->bitmap) { 2433 if (list_empty(&entry->list)) 2434 list_add_tail(&entry->list, bitmaps); 2435 continue; 2436 } 2437 2438 if (entry->bytes < min_bytes) 2439 continue; 2440 2441 last = entry; 2442 window_free += entry->bytes; 2443 if (entry->bytes > max_extent) 2444 max_extent = entry->bytes; 2445 } 2446 2447 if (window_free < bytes || max_extent < cont1_bytes) 2448 return -ENOSPC; 2449 2450 cluster->window_start = first->offset; 2451 2452 node = &first->offset_index; 2453 2454 /* 2455 * now we've found our entries, pull them out of the free space 2456 * cache and put them into the cluster rbtree 2457 */ 2458 do { 2459 int ret; 2460 2461 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2462 node = rb_next(&entry->offset_index); 2463 if (entry->bitmap || entry->bytes < min_bytes) 2464 continue; 2465 2466 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2467 ret = tree_insert_offset(&cluster->root, entry->offset, 2468 &entry->offset_index, 0); 2469 total_size += entry->bytes; 2470 BUG_ON(ret); /* -EEXIST; Logic error */ 2471 } while (node && entry != last); 2472 2473 cluster->max_size = max_extent; 2474 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2475 return 0; 2476 } 2477 2478 /* 2479 * This specifically looks for bitmaps that may work in the cluster, we assume 2480 * that we have already failed to find extents that will work. 2481 */ 2482 static noinline int 2483 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2484 struct btrfs_free_cluster *cluster, 2485 struct list_head *bitmaps, u64 offset, u64 bytes, 2486 u64 cont1_bytes, u64 min_bytes) 2487 { 2488 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2489 struct btrfs_free_space *entry; 2490 int ret = -ENOSPC; 2491 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2492 2493 if (ctl->total_bitmaps == 0) 2494 return -ENOSPC; 2495 2496 /* 2497 * The bitmap that covers offset won't be in the list unless offset 2498 * is just its start offset. 2499 */ 2500 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2501 if (entry->offset != bitmap_offset) { 2502 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2503 if (entry && list_empty(&entry->list)) 2504 list_add(&entry->list, bitmaps); 2505 } 2506 2507 list_for_each_entry(entry, bitmaps, list) { 2508 if (entry->bytes < bytes) 2509 continue; 2510 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2511 bytes, cont1_bytes, min_bytes); 2512 if (!ret) 2513 return 0; 2514 } 2515 2516 /* 2517 * The bitmaps list has all the bitmaps that record free space 2518 * starting after offset, so no more search is required. 2519 */ 2520 return -ENOSPC; 2521 } 2522 2523 /* 2524 * here we try to find a cluster of blocks in a block group. The goal 2525 * is to find at least bytes+empty_size. 2526 * We might not find them all in one contiguous area. 2527 * 2528 * returns zero and sets up cluster if things worked out, otherwise 2529 * it returns -enospc 2530 */ 2531 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2532 struct btrfs_root *root, 2533 struct btrfs_block_group_cache *block_group, 2534 struct btrfs_free_cluster *cluster, 2535 u64 offset, u64 bytes, u64 empty_size) 2536 { 2537 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2538 struct btrfs_free_space *entry, *tmp; 2539 LIST_HEAD(bitmaps); 2540 u64 min_bytes; 2541 u64 cont1_bytes; 2542 int ret; 2543 2544 /* 2545 * Choose the minimum extent size we'll require for this 2546 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2547 * For metadata, allow allocates with smaller extents. For 2548 * data, keep it dense. 2549 */ 2550 if (btrfs_test_opt(root, SSD_SPREAD)) { 2551 cont1_bytes = min_bytes = bytes + empty_size; 2552 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2553 cont1_bytes = bytes; 2554 min_bytes = block_group->sectorsize; 2555 } else { 2556 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2557 min_bytes = block_group->sectorsize; 2558 } 2559 2560 spin_lock(&ctl->tree_lock); 2561 2562 /* 2563 * If we know we don't have enough space to make a cluster don't even 2564 * bother doing all the work to try and find one. 2565 */ 2566 if (ctl->free_space < bytes) { 2567 spin_unlock(&ctl->tree_lock); 2568 return -ENOSPC; 2569 } 2570 2571 spin_lock(&cluster->lock); 2572 2573 /* someone already found a cluster, hooray */ 2574 if (cluster->block_group) { 2575 ret = 0; 2576 goto out; 2577 } 2578 2579 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2580 min_bytes); 2581 2582 INIT_LIST_HEAD(&bitmaps); 2583 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2584 bytes + empty_size, 2585 cont1_bytes, min_bytes); 2586 if (ret) 2587 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2588 offset, bytes + empty_size, 2589 cont1_bytes, min_bytes); 2590 2591 /* Clear our temporary list */ 2592 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2593 list_del_init(&entry->list); 2594 2595 if (!ret) { 2596 atomic_inc(&block_group->count); 2597 list_add_tail(&cluster->block_group_list, 2598 &block_group->cluster_list); 2599 cluster->block_group = block_group; 2600 } else { 2601 trace_btrfs_failed_cluster_setup(block_group); 2602 } 2603 out: 2604 spin_unlock(&cluster->lock); 2605 spin_unlock(&ctl->tree_lock); 2606 2607 return ret; 2608 } 2609 2610 /* 2611 * simple code to zero out a cluster 2612 */ 2613 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2614 { 2615 spin_lock_init(&cluster->lock); 2616 spin_lock_init(&cluster->refill_lock); 2617 cluster->root = RB_ROOT; 2618 cluster->max_size = 0; 2619 INIT_LIST_HEAD(&cluster->block_group_list); 2620 cluster->block_group = NULL; 2621 } 2622 2623 static int do_trimming(struct btrfs_block_group_cache *block_group, 2624 u64 *total_trimmed, u64 start, u64 bytes, 2625 u64 reserved_start, u64 reserved_bytes) 2626 { 2627 struct btrfs_space_info *space_info = block_group->space_info; 2628 struct btrfs_fs_info *fs_info = block_group->fs_info; 2629 int ret; 2630 int update = 0; 2631 u64 trimmed = 0; 2632 2633 spin_lock(&space_info->lock); 2634 spin_lock(&block_group->lock); 2635 if (!block_group->ro) { 2636 block_group->reserved += reserved_bytes; 2637 space_info->bytes_reserved += reserved_bytes; 2638 update = 1; 2639 } 2640 spin_unlock(&block_group->lock); 2641 spin_unlock(&space_info->lock); 2642 2643 ret = btrfs_error_discard_extent(fs_info->extent_root, 2644 start, bytes, &trimmed); 2645 if (!ret) 2646 *total_trimmed += trimmed; 2647 2648 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2649 2650 if (update) { 2651 spin_lock(&space_info->lock); 2652 spin_lock(&block_group->lock); 2653 if (block_group->ro) 2654 space_info->bytes_readonly += reserved_bytes; 2655 block_group->reserved -= reserved_bytes; 2656 space_info->bytes_reserved -= reserved_bytes; 2657 spin_unlock(&space_info->lock); 2658 spin_unlock(&block_group->lock); 2659 } 2660 2661 return ret; 2662 } 2663 2664 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2665 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2666 { 2667 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2668 struct btrfs_free_space *entry; 2669 struct rb_node *node; 2670 int ret = 0; 2671 u64 extent_start; 2672 u64 extent_bytes; 2673 u64 bytes; 2674 2675 while (start < end) { 2676 spin_lock(&ctl->tree_lock); 2677 2678 if (ctl->free_space < minlen) { 2679 spin_unlock(&ctl->tree_lock); 2680 break; 2681 } 2682 2683 entry = tree_search_offset(ctl, start, 0, 1); 2684 if (!entry) { 2685 spin_unlock(&ctl->tree_lock); 2686 break; 2687 } 2688 2689 /* skip bitmaps */ 2690 while (entry->bitmap) { 2691 node = rb_next(&entry->offset_index); 2692 if (!node) { 2693 spin_unlock(&ctl->tree_lock); 2694 goto out; 2695 } 2696 entry = rb_entry(node, struct btrfs_free_space, 2697 offset_index); 2698 } 2699 2700 if (entry->offset >= end) { 2701 spin_unlock(&ctl->tree_lock); 2702 break; 2703 } 2704 2705 extent_start = entry->offset; 2706 extent_bytes = entry->bytes; 2707 start = max(start, extent_start); 2708 bytes = min(extent_start + extent_bytes, end) - start; 2709 if (bytes < minlen) { 2710 spin_unlock(&ctl->tree_lock); 2711 goto next; 2712 } 2713 2714 unlink_free_space(ctl, entry); 2715 kmem_cache_free(btrfs_free_space_cachep, entry); 2716 2717 spin_unlock(&ctl->tree_lock); 2718 2719 ret = do_trimming(block_group, total_trimmed, start, bytes, 2720 extent_start, extent_bytes); 2721 if (ret) 2722 break; 2723 next: 2724 start += bytes; 2725 2726 if (fatal_signal_pending(current)) { 2727 ret = -ERESTARTSYS; 2728 break; 2729 } 2730 2731 cond_resched(); 2732 } 2733 out: 2734 return ret; 2735 } 2736 2737 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2738 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2739 { 2740 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2741 struct btrfs_free_space *entry; 2742 int ret = 0; 2743 int ret2; 2744 u64 bytes; 2745 u64 offset = offset_to_bitmap(ctl, start); 2746 2747 while (offset < end) { 2748 bool next_bitmap = false; 2749 2750 spin_lock(&ctl->tree_lock); 2751 2752 if (ctl->free_space < minlen) { 2753 spin_unlock(&ctl->tree_lock); 2754 break; 2755 } 2756 2757 entry = tree_search_offset(ctl, offset, 1, 0); 2758 if (!entry) { 2759 spin_unlock(&ctl->tree_lock); 2760 next_bitmap = true; 2761 goto next; 2762 } 2763 2764 bytes = minlen; 2765 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2766 if (ret2 || start >= end) { 2767 spin_unlock(&ctl->tree_lock); 2768 next_bitmap = true; 2769 goto next; 2770 } 2771 2772 bytes = min(bytes, end - start); 2773 if (bytes < minlen) { 2774 spin_unlock(&ctl->tree_lock); 2775 goto next; 2776 } 2777 2778 bitmap_clear_bits(ctl, entry, start, bytes); 2779 if (entry->bytes == 0) 2780 free_bitmap(ctl, entry); 2781 2782 spin_unlock(&ctl->tree_lock); 2783 2784 ret = do_trimming(block_group, total_trimmed, start, bytes, 2785 start, bytes); 2786 if (ret) 2787 break; 2788 next: 2789 if (next_bitmap) { 2790 offset += BITS_PER_BITMAP * ctl->unit; 2791 } else { 2792 start += bytes; 2793 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2794 offset += BITS_PER_BITMAP * ctl->unit; 2795 } 2796 2797 if (fatal_signal_pending(current)) { 2798 ret = -ERESTARTSYS; 2799 break; 2800 } 2801 2802 cond_resched(); 2803 } 2804 2805 return ret; 2806 } 2807 2808 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2809 u64 *trimmed, u64 start, u64 end, u64 minlen) 2810 { 2811 int ret; 2812 2813 *trimmed = 0; 2814 2815 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2816 if (ret) 2817 return ret; 2818 2819 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2820 2821 return ret; 2822 } 2823 2824 /* 2825 * Find the left-most item in the cache tree, and then return the 2826 * smallest inode number in the item. 2827 * 2828 * Note: the returned inode number may not be the smallest one in 2829 * the tree, if the left-most item is a bitmap. 2830 */ 2831 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2832 { 2833 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2834 struct btrfs_free_space *entry = NULL; 2835 u64 ino = 0; 2836 2837 spin_lock(&ctl->tree_lock); 2838 2839 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2840 goto out; 2841 2842 entry = rb_entry(rb_first(&ctl->free_space_offset), 2843 struct btrfs_free_space, offset_index); 2844 2845 if (!entry->bitmap) { 2846 ino = entry->offset; 2847 2848 unlink_free_space(ctl, entry); 2849 entry->offset++; 2850 entry->bytes--; 2851 if (!entry->bytes) 2852 kmem_cache_free(btrfs_free_space_cachep, entry); 2853 else 2854 link_free_space(ctl, entry); 2855 } else { 2856 u64 offset = 0; 2857 u64 count = 1; 2858 int ret; 2859 2860 ret = search_bitmap(ctl, entry, &offset, &count); 2861 /* Logic error; Should be empty if it can't find anything */ 2862 BUG_ON(ret); 2863 2864 ino = offset; 2865 bitmap_clear_bits(ctl, entry, offset, 1); 2866 if (entry->bytes == 0) 2867 free_bitmap(ctl, entry); 2868 } 2869 out: 2870 spin_unlock(&ctl->tree_lock); 2871 2872 return ino; 2873 } 2874 2875 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2876 struct btrfs_path *path) 2877 { 2878 struct inode *inode = NULL; 2879 2880 spin_lock(&root->cache_lock); 2881 if (root->cache_inode) 2882 inode = igrab(root->cache_inode); 2883 spin_unlock(&root->cache_lock); 2884 if (inode) 2885 return inode; 2886 2887 inode = __lookup_free_space_inode(root, path, 0); 2888 if (IS_ERR(inode)) 2889 return inode; 2890 2891 spin_lock(&root->cache_lock); 2892 if (!btrfs_fs_closing(root->fs_info)) 2893 root->cache_inode = igrab(inode); 2894 spin_unlock(&root->cache_lock); 2895 2896 return inode; 2897 } 2898 2899 int create_free_ino_inode(struct btrfs_root *root, 2900 struct btrfs_trans_handle *trans, 2901 struct btrfs_path *path) 2902 { 2903 return __create_free_space_inode(root, trans, path, 2904 BTRFS_FREE_INO_OBJECTID, 0); 2905 } 2906 2907 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2908 { 2909 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2910 struct btrfs_path *path; 2911 struct inode *inode; 2912 int ret = 0; 2913 u64 root_gen = btrfs_root_generation(&root->root_item); 2914 2915 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2916 return 0; 2917 2918 /* 2919 * If we're unmounting then just return, since this does a search on the 2920 * normal root and not the commit root and we could deadlock. 2921 */ 2922 if (btrfs_fs_closing(fs_info)) 2923 return 0; 2924 2925 path = btrfs_alloc_path(); 2926 if (!path) 2927 return 0; 2928 2929 inode = lookup_free_ino_inode(root, path); 2930 if (IS_ERR(inode)) 2931 goto out; 2932 2933 if (root_gen != BTRFS_I(inode)->generation) 2934 goto out_put; 2935 2936 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2937 2938 if (ret < 0) 2939 btrfs_err(fs_info, 2940 "failed to load free ino cache for root %llu", 2941 root->root_key.objectid); 2942 out_put: 2943 iput(inode); 2944 out: 2945 btrfs_free_path(path); 2946 return ret; 2947 } 2948 2949 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2950 struct btrfs_trans_handle *trans, 2951 struct btrfs_path *path) 2952 { 2953 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2954 struct inode *inode; 2955 int ret; 2956 2957 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2958 return 0; 2959 2960 inode = lookup_free_ino_inode(root, path); 2961 if (IS_ERR(inode)) 2962 return 0; 2963 2964 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2965 if (ret) { 2966 btrfs_delalloc_release_metadata(inode, inode->i_size); 2967 #ifdef DEBUG 2968 btrfs_err(root->fs_info, 2969 "failed to write free ino cache for root %llu", 2970 root->root_key.objectid); 2971 #endif 2972 } 2973 2974 iput(inode); 2975 return ret; 2976 } 2977 2978 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 2979 static struct btrfs_block_group_cache *init_test_block_group(void) 2980 { 2981 struct btrfs_block_group_cache *cache; 2982 2983 cache = kzalloc(sizeof(*cache), GFP_NOFS); 2984 if (!cache) 2985 return NULL; 2986 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 2987 GFP_NOFS); 2988 if (!cache->free_space_ctl) { 2989 kfree(cache); 2990 return NULL; 2991 } 2992 2993 cache->key.objectid = 0; 2994 cache->key.offset = 1024 * 1024 * 1024; 2995 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 2996 cache->sectorsize = 4096; 2997 2998 spin_lock_init(&cache->lock); 2999 INIT_LIST_HEAD(&cache->list); 3000 INIT_LIST_HEAD(&cache->cluster_list); 3001 INIT_LIST_HEAD(&cache->new_bg_list); 3002 3003 btrfs_init_free_space_ctl(cache); 3004 3005 return cache; 3006 } 3007 3008 /* 3009 * Checks to see if the given range is in the free space cache. This is really 3010 * just used to check the absence of space, so if there is free space in the 3011 * range at all we will return 1. 3012 */ 3013 static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, 3014 u64 bytes) 3015 { 3016 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3017 struct btrfs_free_space *info; 3018 int ret = 0; 3019 3020 spin_lock(&ctl->tree_lock); 3021 info = tree_search_offset(ctl, offset, 0, 0); 3022 if (!info) { 3023 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3024 1, 0); 3025 if (!info) 3026 goto out; 3027 } 3028 3029 have_info: 3030 if (info->bitmap) { 3031 u64 bit_off, bit_bytes; 3032 struct rb_node *n; 3033 struct btrfs_free_space *tmp; 3034 3035 bit_off = offset; 3036 bit_bytes = ctl->unit; 3037 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); 3038 if (!ret) { 3039 if (bit_off == offset) { 3040 ret = 1; 3041 goto out; 3042 } else if (bit_off > offset && 3043 offset + bytes > bit_off) { 3044 ret = 1; 3045 goto out; 3046 } 3047 } 3048 3049 n = rb_prev(&info->offset_index); 3050 while (n) { 3051 tmp = rb_entry(n, struct btrfs_free_space, 3052 offset_index); 3053 if (tmp->offset + tmp->bytes < offset) 3054 break; 3055 if (offset + bytes < tmp->offset) { 3056 n = rb_prev(&info->offset_index); 3057 continue; 3058 } 3059 info = tmp; 3060 goto have_info; 3061 } 3062 3063 n = rb_next(&info->offset_index); 3064 while (n) { 3065 tmp = rb_entry(n, struct btrfs_free_space, 3066 offset_index); 3067 if (offset + bytes < tmp->offset) 3068 break; 3069 if (tmp->offset + tmp->bytes < offset) { 3070 n = rb_next(&info->offset_index); 3071 continue; 3072 } 3073 info = tmp; 3074 goto have_info; 3075 } 3076 3077 goto out; 3078 } 3079 3080 if (info->offset == offset) { 3081 ret = 1; 3082 goto out; 3083 } 3084 3085 if (offset > info->offset && offset < info->offset + info->bytes) 3086 ret = 1; 3087 out: 3088 spin_unlock(&ctl->tree_lock); 3089 return ret; 3090 } 3091 3092 /* 3093 * Use this if you need to make a bitmap or extent entry specifically, it 3094 * doesn't do any of the merging that add_free_space does, this acts a lot like 3095 * how the free space cache loading stuff works, so you can get really weird 3096 * configurations. 3097 */ 3098 static int add_free_space_entry(struct btrfs_block_group_cache *cache, 3099 u64 offset, u64 bytes, bool bitmap) 3100 { 3101 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3102 struct btrfs_free_space *info = NULL, *bitmap_info; 3103 void *map = NULL; 3104 u64 bytes_added; 3105 int ret; 3106 3107 again: 3108 if (!info) { 3109 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3110 if (!info) 3111 return -ENOMEM; 3112 } 3113 3114 if (!bitmap) { 3115 spin_lock(&ctl->tree_lock); 3116 info->offset = offset; 3117 info->bytes = bytes; 3118 ret = link_free_space(ctl, info); 3119 spin_unlock(&ctl->tree_lock); 3120 if (ret) 3121 kmem_cache_free(btrfs_free_space_cachep, info); 3122 return ret; 3123 } 3124 3125 if (!map) { 3126 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 3127 if (!map) { 3128 kmem_cache_free(btrfs_free_space_cachep, info); 3129 return -ENOMEM; 3130 } 3131 } 3132 3133 spin_lock(&ctl->tree_lock); 3134 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3135 1, 0); 3136 if (!bitmap_info) { 3137 info->bitmap = map; 3138 map = NULL; 3139 add_new_bitmap(ctl, info, offset); 3140 bitmap_info = info; 3141 } 3142 3143 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3144 bytes -= bytes_added; 3145 offset += bytes_added; 3146 spin_unlock(&ctl->tree_lock); 3147 3148 if (bytes) 3149 goto again; 3150 3151 if (map) 3152 kfree(map); 3153 return 0; 3154 } 3155 3156 /* 3157 * This test just does basic sanity checking, making sure we can add an exten 3158 * entry and remove space from either end and the middle, and make sure we can 3159 * remove space that covers adjacent extent entries. 3160 */ 3161 static int test_extents(struct btrfs_block_group_cache *cache) 3162 { 3163 int ret = 0; 3164 3165 printk(KERN_ERR "Running extent only tests\n"); 3166 3167 /* First just make sure we can remove an entire entry */ 3168 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); 3169 if (ret) { 3170 printk(KERN_ERR "Error adding initial extents %d\n", ret); 3171 return ret; 3172 } 3173 3174 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); 3175 if (ret) { 3176 printk(KERN_ERR "Error removing extent %d\n", ret); 3177 return ret; 3178 } 3179 3180 if (check_exists(cache, 0, 4 * 1024 * 1024)) { 3181 printk(KERN_ERR "Full remove left some lingering space\n"); 3182 return -1; 3183 } 3184 3185 /* Ok edge and middle cases now */ 3186 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); 3187 if (ret) { 3188 printk(KERN_ERR "Error adding half extent %d\n", ret); 3189 return ret; 3190 } 3191 3192 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); 3193 if (ret) { 3194 printk(KERN_ERR "Error removing tail end %d\n", ret); 3195 return ret; 3196 } 3197 3198 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); 3199 if (ret) { 3200 printk(KERN_ERR "Error removing front end %d\n", ret); 3201 return ret; 3202 } 3203 3204 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); 3205 if (ret) { 3206 printk(KERN_ERR "Error removing middle peice %d\n", ret); 3207 return ret; 3208 } 3209 3210 if (check_exists(cache, 0, 1 * 1024 * 1024)) { 3211 printk(KERN_ERR "Still have space at the front\n"); 3212 return -1; 3213 } 3214 3215 if (check_exists(cache, 2 * 1024 * 1024, 4096)) { 3216 printk(KERN_ERR "Still have space in the middle\n"); 3217 return -1; 3218 } 3219 3220 if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { 3221 printk(KERN_ERR "Still have space at the end\n"); 3222 return -1; 3223 } 3224 3225 /* Cleanup */ 3226 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3227 3228 return 0; 3229 } 3230 3231 static int test_bitmaps(struct btrfs_block_group_cache *cache) 3232 { 3233 u64 next_bitmap_offset; 3234 int ret; 3235 3236 printk(KERN_ERR "Running bitmap only tests\n"); 3237 3238 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); 3239 if (ret) { 3240 printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret); 3241 return ret; 3242 } 3243 3244 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); 3245 if (ret) { 3246 printk(KERN_ERR "Error removing bitmap full range %d\n", ret); 3247 return ret; 3248 } 3249 3250 if (check_exists(cache, 0, 4 * 1024 * 1024)) { 3251 printk(KERN_ERR "Left some space in bitmap\n"); 3252 return -1; 3253 } 3254 3255 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); 3256 if (ret) { 3257 printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret); 3258 return ret; 3259 } 3260 3261 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); 3262 if (ret) { 3263 printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret); 3264 return ret; 3265 } 3266 3267 /* 3268 * The first bitmap we have starts at offset 0 so the next one is just 3269 * at the end of the first bitmap. 3270 */ 3271 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); 3272 3273 /* Test a bit straddling two bitmaps */ 3274 ret = add_free_space_entry(cache, next_bitmap_offset - 3275 (2 * 1024 * 1024), 4 * 1024 * 1024, 1); 3276 if (ret) { 3277 printk(KERN_ERR "Couldn't add space that straddles two bitmaps" 3278 " %d\n", ret); 3279 return ret; 3280 } 3281 3282 ret = btrfs_remove_free_space(cache, next_bitmap_offset - 3283 (1 * 1024 * 1024), 2 * 1024 * 1024); 3284 if (ret) { 3285 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); 3286 return ret; 3287 } 3288 3289 if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), 3290 2 * 1024 * 1024)) { 3291 printk(KERN_ERR "Left some space when removing overlapping\n"); 3292 return -1; 3293 } 3294 3295 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3296 3297 return 0; 3298 } 3299 3300 /* This is the high grade jackassery */ 3301 static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) 3302 { 3303 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); 3304 int ret; 3305 3306 printk(KERN_ERR "Running bitmap and extent tests\n"); 3307 3308 /* 3309 * First let's do something simple, an extent at the same offset as the 3310 * bitmap, but the free space completely in the extent and then 3311 * completely in the bitmap. 3312 */ 3313 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); 3314 if (ret) { 3315 printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret); 3316 return ret; 3317 } 3318 3319 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); 3320 if (ret) { 3321 printk(KERN_ERR "Couldn't add extent entry %d\n", ret); 3322 return ret; 3323 } 3324 3325 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); 3326 if (ret) { 3327 printk(KERN_ERR "Couldn't remove extent entry %d\n", ret); 3328 return ret; 3329 } 3330 3331 if (check_exists(cache, 0, 1 * 1024 * 1024)) { 3332 printk(KERN_ERR "Left remnants after our remove\n"); 3333 return -1; 3334 } 3335 3336 /* Now to add back the extent entry and remove from the bitmap */ 3337 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); 3338 if (ret) { 3339 printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret); 3340 return ret; 3341 } 3342 3343 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); 3344 if (ret) { 3345 printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret); 3346 return ret; 3347 } 3348 3349 if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { 3350 printk(KERN_ERR "Left remnants in the bitmap\n"); 3351 return -1; 3352 } 3353 3354 /* 3355 * Ok so a little more evil, extent entry and bitmap at the same offset, 3356 * removing an overlapping chunk. 3357 */ 3358 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); 3359 if (ret) { 3360 printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret); 3361 return ret; 3362 } 3363 3364 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); 3365 if (ret) { 3366 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret); 3367 return ret; 3368 } 3369 3370 if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { 3371 printk(KERN_ERR "Left over peices after removing " 3372 "overlapping\n"); 3373 return -1; 3374 } 3375 3376 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3377 3378 /* Now with the extent entry offset into the bitmap */ 3379 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); 3380 if (ret) { 3381 printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret); 3382 return ret; 3383 } 3384 3385 ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); 3386 if (ret) { 3387 printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret); 3388 return ret; 3389 } 3390 3391 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); 3392 if (ret) { 3393 printk(KERN_ERR "Problem removing overlapping space %d\n", ret); 3394 return ret; 3395 } 3396 3397 if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { 3398 printk(KERN_ERR "Left something behind when removing space"); 3399 return -1; 3400 } 3401 3402 /* 3403 * This has blown up in the past, the extent entry starts before the 3404 * bitmap entry, but we're trying to remove an offset that falls 3405 * completely within the bitmap range and is in both the extent entry 3406 * and the bitmap entry, looks like this 3407 * 3408 * [ extent ] 3409 * [ bitmap ] 3410 * [ del ] 3411 */ 3412 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3413 ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, 3414 4 * 1024 * 1024, 1); 3415 if (ret) { 3416 printk(KERN_ERR "Couldn't add bitmap %d\n", ret); 3417 return ret; 3418 } 3419 3420 ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, 3421 5 * 1024 * 1024, 0); 3422 if (ret) { 3423 printk(KERN_ERR "Couldn't add extent entry %d\n", ret); 3424 return ret; 3425 } 3426 3427 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, 3428 5 * 1024 * 1024); 3429 if (ret) { 3430 printk(KERN_ERR "Failed to free our space %d\n", ret); 3431 return ret; 3432 } 3433 3434 if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024, 3435 5 * 1024 * 1024)) { 3436 printk(KERN_ERR "Left stuff over\n"); 3437 return -1; 3438 } 3439 3440 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3441 3442 /* 3443 * This blew up before, we have part of the free space in a bitmap and 3444 * then the entirety of the rest of the space in an extent. This used 3445 * to return -EAGAIN back from btrfs_remove_extent, make sure this 3446 * doesn't happen. 3447 */ 3448 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); 3449 if (ret) { 3450 printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret); 3451 return ret; 3452 } 3453 3454 ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); 3455 if (ret) { 3456 printk(KERN_ERR "Couldn't add extent entry %d\n", ret); 3457 return ret; 3458 } 3459 3460 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); 3461 if (ret) { 3462 printk(KERN_ERR "Error removing bitmap and extent " 3463 "overlapping %d\n", ret); 3464 return ret; 3465 } 3466 3467 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3468 return 0; 3469 } 3470 3471 void btrfs_test_free_space_cache(void) 3472 { 3473 struct btrfs_block_group_cache *cache; 3474 3475 printk(KERN_ERR "Running btrfs free space cache tests\n"); 3476 3477 cache = init_test_block_group(); 3478 if (!cache) { 3479 printk(KERN_ERR "Couldn't run the tests\n"); 3480 return; 3481 } 3482 3483 if (test_extents(cache)) 3484 goto out; 3485 if (test_bitmaps(cache)) 3486 goto out; 3487 if (test_bitmaps_and_extents(cache)) 3488 goto out; 3489 out: 3490 __btrfs_remove_free_space_cache(cache->free_space_ctl); 3491 kfree(cache->free_space_ctl); 3492 kfree(cache); 3493 printk(KERN_ERR "Free space cache tests finished\n"); 3494 } 3495 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3496