xref: /linux/fs/btrfs/free-space-cache.c (revision a1087ef6abedf0bfd60e5e3fddf33192cb2c1325)
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include "ctree.h"
24 #include "free-space-cache.h"
25 #include "transaction.h"
26 #include "disk-io.h"
27 
28 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
29 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
30 
31 static void recalculate_thresholds(struct btrfs_block_group_cache
32 				   *block_group);
33 static int link_free_space(struct btrfs_block_group_cache *block_group,
34 			   struct btrfs_free_space *info);
35 
36 struct inode *lookup_free_space_inode(struct btrfs_root *root,
37 				      struct btrfs_block_group_cache
38 				      *block_group, struct btrfs_path *path)
39 {
40 	struct btrfs_key key;
41 	struct btrfs_key location;
42 	struct btrfs_disk_key disk_key;
43 	struct btrfs_free_space_header *header;
44 	struct extent_buffer *leaf;
45 	struct inode *inode = NULL;
46 	int ret;
47 
48 	spin_lock(&block_group->lock);
49 	if (block_group->inode)
50 		inode = igrab(block_group->inode);
51 	spin_unlock(&block_group->lock);
52 	if (inode)
53 		return inode;
54 
55 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
56 	key.offset = block_group->key.objectid;
57 	key.type = 0;
58 
59 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
60 	if (ret < 0)
61 		return ERR_PTR(ret);
62 	if (ret > 0) {
63 		btrfs_release_path(root, path);
64 		return ERR_PTR(-ENOENT);
65 	}
66 
67 	leaf = path->nodes[0];
68 	header = btrfs_item_ptr(leaf, path->slots[0],
69 				struct btrfs_free_space_header);
70 	btrfs_free_space_key(leaf, header, &disk_key);
71 	btrfs_disk_key_to_cpu(&location, &disk_key);
72 	btrfs_release_path(root, path);
73 
74 	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
75 	if (!inode)
76 		return ERR_PTR(-ENOENT);
77 	if (IS_ERR(inode))
78 		return inode;
79 	if (is_bad_inode(inode)) {
80 		iput(inode);
81 		return ERR_PTR(-ENOENT);
82 	}
83 
84 	spin_lock(&block_group->lock);
85 	if (!root->fs_info->closing) {
86 		block_group->inode = igrab(inode);
87 		block_group->iref = 1;
88 	}
89 	spin_unlock(&block_group->lock);
90 
91 	return inode;
92 }
93 
94 int create_free_space_inode(struct btrfs_root *root,
95 			    struct btrfs_trans_handle *trans,
96 			    struct btrfs_block_group_cache *block_group,
97 			    struct btrfs_path *path)
98 {
99 	struct btrfs_key key;
100 	struct btrfs_disk_key disk_key;
101 	struct btrfs_free_space_header *header;
102 	struct btrfs_inode_item *inode_item;
103 	struct extent_buffer *leaf;
104 	u64 objectid;
105 	int ret;
106 
107 	ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
108 	if (ret < 0)
109 		return ret;
110 
111 	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
112 	if (ret)
113 		return ret;
114 
115 	leaf = path->nodes[0];
116 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
117 				    struct btrfs_inode_item);
118 	btrfs_item_key(leaf, &disk_key, path->slots[0]);
119 	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
120 			     sizeof(*inode_item));
121 	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
122 	btrfs_set_inode_size(leaf, inode_item, 0);
123 	btrfs_set_inode_nbytes(leaf, inode_item, 0);
124 	btrfs_set_inode_uid(leaf, inode_item, 0);
125 	btrfs_set_inode_gid(leaf, inode_item, 0);
126 	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
127 	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
128 			      BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
129 	btrfs_set_inode_nlink(leaf, inode_item, 1);
130 	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
131 	btrfs_set_inode_block_group(leaf, inode_item,
132 				    block_group->key.objectid);
133 	btrfs_mark_buffer_dirty(leaf);
134 	btrfs_release_path(root, path);
135 
136 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
137 	key.offset = block_group->key.objectid;
138 	key.type = 0;
139 
140 	ret = btrfs_insert_empty_item(trans, root, path, &key,
141 				      sizeof(struct btrfs_free_space_header));
142 	if (ret < 0) {
143 		btrfs_release_path(root, path);
144 		return ret;
145 	}
146 	leaf = path->nodes[0];
147 	header = btrfs_item_ptr(leaf, path->slots[0],
148 				struct btrfs_free_space_header);
149 	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
150 	btrfs_set_free_space_key(leaf, header, &disk_key);
151 	btrfs_mark_buffer_dirty(leaf);
152 	btrfs_release_path(root, path);
153 
154 	return 0;
155 }
156 
157 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
158 				    struct btrfs_trans_handle *trans,
159 				    struct btrfs_path *path,
160 				    struct inode *inode)
161 {
162 	loff_t oldsize;
163 	int ret = 0;
164 
165 	trans->block_rsv = root->orphan_block_rsv;
166 	ret = btrfs_block_rsv_check(trans, root,
167 				    root->orphan_block_rsv,
168 				    0, 5);
169 	if (ret)
170 		return ret;
171 
172 	oldsize = i_size_read(inode);
173 	btrfs_i_size_write(inode, 0);
174 	truncate_pagecache(inode, oldsize, 0);
175 
176 	/*
177 	 * We don't need an orphan item because truncating the free space cache
178 	 * will never be split across transactions.
179 	 */
180 	ret = btrfs_truncate_inode_items(trans, root, inode,
181 					 0, BTRFS_EXTENT_DATA_KEY);
182 	if (ret) {
183 		WARN_ON(1);
184 		return ret;
185 	}
186 
187 	return btrfs_update_inode(trans, root, inode);
188 }
189 
190 static int readahead_cache(struct inode *inode)
191 {
192 	struct file_ra_state *ra;
193 	unsigned long last_index;
194 
195 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
196 	if (!ra)
197 		return -ENOMEM;
198 
199 	file_ra_state_init(ra, inode->i_mapping);
200 	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201 
202 	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203 
204 	kfree(ra);
205 
206 	return 0;
207 }
208 
209 int load_free_space_cache(struct btrfs_fs_info *fs_info,
210 			  struct btrfs_block_group_cache *block_group)
211 {
212 	struct btrfs_root *root = fs_info->tree_root;
213 	struct inode *inode;
214 	struct btrfs_free_space_header *header;
215 	struct extent_buffer *leaf;
216 	struct page *page;
217 	struct btrfs_path *path;
218 	u32 *checksums = NULL, *crc;
219 	char *disk_crcs = NULL;
220 	struct btrfs_key key;
221 	struct list_head bitmaps;
222 	u64 num_entries;
223 	u64 num_bitmaps;
224 	u64 generation;
225 	u32 cur_crc = ~(u32)0;
226 	pgoff_t index = 0;
227 	unsigned long first_page_offset;
228 	int num_checksums;
229 	int ret = 0;
230 
231 	/*
232 	 * If we're unmounting then just return, since this does a search on the
233 	 * normal root and not the commit root and we could deadlock.
234 	 */
235 	smp_mb();
236 	if (fs_info->closing)
237 		return 0;
238 
239 	/*
240 	 * If this block group has been marked to be cleared for one reason or
241 	 * another then we can't trust the on disk cache, so just return.
242 	 */
243 	spin_lock(&block_group->lock);
244 	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245 		spin_unlock(&block_group->lock);
246 		return 0;
247 	}
248 	spin_unlock(&block_group->lock);
249 
250 	INIT_LIST_HEAD(&bitmaps);
251 
252 	path = btrfs_alloc_path();
253 	if (!path)
254 		return 0;
255 
256 	inode = lookup_free_space_inode(root, block_group, path);
257 	if (IS_ERR(inode)) {
258 		btrfs_free_path(path);
259 		return 0;
260 	}
261 
262 	/* Nothing in the space cache, goodbye */
263 	if (!i_size_read(inode)) {
264 		btrfs_free_path(path);
265 		goto out;
266 	}
267 
268 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
269 	key.offset = block_group->key.objectid;
270 	key.type = 0;
271 
272 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 	if (ret) {
274 		btrfs_free_path(path);
275 		goto out;
276 	}
277 
278 	leaf = path->nodes[0];
279 	header = btrfs_item_ptr(leaf, path->slots[0],
280 				struct btrfs_free_space_header);
281 	num_entries = btrfs_free_space_entries(leaf, header);
282 	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
283 	generation = btrfs_free_space_generation(leaf, header);
284 	btrfs_free_path(path);
285 
286 	if (BTRFS_I(inode)->generation != generation) {
287 		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
288 		       " not match free space cache generation (%llu) for "
289 		       "block group %llu\n",
290 		       (unsigned long long)BTRFS_I(inode)->generation,
291 		       (unsigned long long)generation,
292 		       (unsigned long long)block_group->key.objectid);
293 		goto out;
294 	}
295 
296 	if (!num_entries)
297 		goto out;
298 
299 	/* Setup everything for doing checksumming */
300 	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
301 	checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
302 	if (!checksums)
303 		goto out;
304 	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
305 	disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
306 	if (!disk_crcs)
307 		goto out;
308 
309 	ret = readahead_cache(inode);
310 	if (ret) {
311 		ret = 0;
312 		goto out;
313 	}
314 
315 	while (1) {
316 		struct btrfs_free_space_entry *entry;
317 		struct btrfs_free_space *e;
318 		void *addr;
319 		unsigned long offset = 0;
320 		unsigned long start_offset = 0;
321 		int need_loop = 0;
322 
323 		if (!num_entries && !num_bitmaps)
324 			break;
325 
326 		if (index == 0) {
327 			start_offset = first_page_offset;
328 			offset = start_offset;
329 		}
330 
331 		page = grab_cache_page(inode->i_mapping, index);
332 		if (!page) {
333 			ret = 0;
334 			goto free_cache;
335 		}
336 
337 		if (!PageUptodate(page)) {
338 			btrfs_readpage(NULL, page);
339 			lock_page(page);
340 			if (!PageUptodate(page)) {
341 				unlock_page(page);
342 				page_cache_release(page);
343 				printk(KERN_ERR "btrfs: error reading free "
344 				       "space cache: %llu\n",
345 				       (unsigned long long)
346 				       block_group->key.objectid);
347 				goto free_cache;
348 			}
349 		}
350 		addr = kmap(page);
351 
352 		if (index == 0) {
353 			u64 *gen;
354 
355 			memcpy(disk_crcs, addr, first_page_offset);
356 			gen = addr + (sizeof(u32) * num_checksums);
357 			if (*gen != BTRFS_I(inode)->generation) {
358 				printk(KERN_ERR "btrfs: space cache generation"
359 				       " (%llu) does not match inode (%llu) "
360 				       "for block group %llu\n",
361 				       (unsigned long long)*gen,
362 				       (unsigned long long)
363 				       BTRFS_I(inode)->generation,
364 				       (unsigned long long)
365 				       block_group->key.objectid);
366 				kunmap(page);
367 				unlock_page(page);
368 				page_cache_release(page);
369 				goto free_cache;
370 			}
371 			crc = (u32 *)disk_crcs;
372 		}
373 		entry = addr + start_offset;
374 
375 		/* First lets check our crc before we do anything fun */
376 		cur_crc = ~(u32)0;
377 		cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
378 					  PAGE_CACHE_SIZE - start_offset);
379 		btrfs_csum_final(cur_crc, (char *)&cur_crc);
380 		if (cur_crc != *crc) {
381 			printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
382 			       "block group %llu\n", index,
383 			       (unsigned long long)block_group->key.objectid);
384 			kunmap(page);
385 			unlock_page(page);
386 			page_cache_release(page);
387 			goto free_cache;
388 		}
389 		crc++;
390 
391 		while (1) {
392 			if (!num_entries)
393 				break;
394 
395 			need_loop = 1;
396 			e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
397 			if (!e) {
398 				kunmap(page);
399 				unlock_page(page);
400 				page_cache_release(page);
401 				goto free_cache;
402 			}
403 
404 			e->offset = le64_to_cpu(entry->offset);
405 			e->bytes = le64_to_cpu(entry->bytes);
406 			if (!e->bytes) {
407 				kunmap(page);
408 				kfree(e);
409 				unlock_page(page);
410 				page_cache_release(page);
411 				goto free_cache;
412 			}
413 
414 			if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
415 				spin_lock(&block_group->tree_lock);
416 				ret = link_free_space(block_group, e);
417 				spin_unlock(&block_group->tree_lock);
418 				BUG_ON(ret);
419 			} else {
420 				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 				if (!e->bitmap) {
422 					kunmap(page);
423 					kfree(e);
424 					unlock_page(page);
425 					page_cache_release(page);
426 					goto free_cache;
427 				}
428 				spin_lock(&block_group->tree_lock);
429 				ret = link_free_space(block_group, e);
430 				block_group->total_bitmaps++;
431 				recalculate_thresholds(block_group);
432 				spin_unlock(&block_group->tree_lock);
433 				list_add_tail(&e->list, &bitmaps);
434 			}
435 
436 			num_entries--;
437 			offset += sizeof(struct btrfs_free_space_entry);
438 			if (offset + sizeof(struct btrfs_free_space_entry) >=
439 			    PAGE_CACHE_SIZE)
440 				break;
441 			entry++;
442 		}
443 
444 		/*
445 		 * We read an entry out of this page, we need to move on to the
446 		 * next page.
447 		 */
448 		if (need_loop) {
449 			kunmap(page);
450 			goto next;
451 		}
452 
453 		/*
454 		 * We add the bitmaps at the end of the entries in order that
455 		 * the bitmap entries are added to the cache.
456 		 */
457 		e = list_entry(bitmaps.next, struct btrfs_free_space, list);
458 		list_del_init(&e->list);
459 		memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
460 		kunmap(page);
461 		num_bitmaps--;
462 next:
463 		unlock_page(page);
464 		page_cache_release(page);
465 		index++;
466 	}
467 
468 	ret = 1;
469 out:
470 	kfree(checksums);
471 	kfree(disk_crcs);
472 	iput(inode);
473 	return ret;
474 
475 free_cache:
476 	/* This cache is bogus, make sure it gets cleared */
477 	spin_lock(&block_group->lock);
478 	block_group->disk_cache_state = BTRFS_DC_CLEAR;
479 	spin_unlock(&block_group->lock);
480 	btrfs_remove_free_space_cache(block_group);
481 	goto out;
482 }
483 
484 int btrfs_write_out_cache(struct btrfs_root *root,
485 			  struct btrfs_trans_handle *trans,
486 			  struct btrfs_block_group_cache *block_group,
487 			  struct btrfs_path *path)
488 {
489 	struct btrfs_free_space_header *header;
490 	struct extent_buffer *leaf;
491 	struct inode *inode;
492 	struct rb_node *node;
493 	struct list_head *pos, *n;
494 	struct page *page;
495 	struct extent_state *cached_state = NULL;
496 	struct list_head bitmap_list;
497 	struct btrfs_key key;
498 	u64 bytes = 0;
499 	u32 *crc, *checksums;
500 	pgoff_t index = 0, last_index = 0;
501 	unsigned long first_page_offset;
502 	int num_checksums;
503 	int entries = 0;
504 	int bitmaps = 0;
505 	int ret = 0;
506 
507 	root = root->fs_info->tree_root;
508 
509 	INIT_LIST_HEAD(&bitmap_list);
510 
511 	spin_lock(&block_group->lock);
512 	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
513 		spin_unlock(&block_group->lock);
514 		return 0;
515 	}
516 	spin_unlock(&block_group->lock);
517 
518 	inode = lookup_free_space_inode(root, block_group, path);
519 	if (IS_ERR(inode))
520 		return 0;
521 
522 	if (!i_size_read(inode)) {
523 		iput(inode);
524 		return 0;
525 	}
526 
527 	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 	filemap_write_and_wait(inode->i_mapping);
529 	btrfs_wait_ordered_range(inode, inode->i_size &
530 				 ~(root->sectorsize - 1), (u64)-1);
531 
532 	/* We need a checksum per page. */
533 	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
534 	crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
535 	if (!crc) {
536 		iput(inode);
537 		return 0;
538 	}
539 
540 	/* Since the first page has all of our checksums and our generation we
541 	 * need to calculate the offset into the page that we can start writing
542 	 * our entries.
543 	 */
544 	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545 
546 	node = rb_first(&block_group->free_space_offset);
547 	if (!node)
548 		goto out_free;
549 
550 	/*
551 	 * Lock all pages first so we can lock the extent safely.
552 	 *
553 	 * NOTE: Because we hold the ref the entire time we're going to write to
554 	 * the page find_get_page should never fail, so we don't do a check
555 	 * after find_get_page at this point.  Just putting this here so people
556 	 * know and don't freak out.
557 	 */
558 	while (index <= last_index) {
559 		page = grab_cache_page(inode->i_mapping, index);
560 		if (!page) {
561 			pgoff_t i = 0;
562 
563 			while (i < index) {
564 				page = find_get_page(inode->i_mapping, i);
565 				unlock_page(page);
566 				page_cache_release(page);
567 				page_cache_release(page);
568 				i++;
569 			}
570 			goto out_free;
571 		}
572 		index++;
573 	}
574 
575 	index = 0;
576 	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
577 			 0, &cached_state, GFP_NOFS);
578 
579 	/* Write out the extent entries */
580 	do {
581 		struct btrfs_free_space_entry *entry;
582 		void *addr;
583 		unsigned long offset = 0;
584 		unsigned long start_offset = 0;
585 
586 		if (index == 0) {
587 			start_offset = first_page_offset;
588 			offset = start_offset;
589 		}
590 
591 		page = find_get_page(inode->i_mapping, index);
592 
593 		addr = kmap(page);
594 		entry = addr + start_offset;
595 
596 		memset(addr, 0, PAGE_CACHE_SIZE);
597 		while (1) {
598 			struct btrfs_free_space *e;
599 
600 			e = rb_entry(node, struct btrfs_free_space, offset_index);
601 			entries++;
602 
603 			entry->offset = cpu_to_le64(e->offset);
604 			entry->bytes = cpu_to_le64(e->bytes);
605 			if (e->bitmap) {
606 				entry->type = BTRFS_FREE_SPACE_BITMAP;
607 				list_add_tail(&e->list, &bitmap_list);
608 				bitmaps++;
609 			} else {
610 				entry->type = BTRFS_FREE_SPACE_EXTENT;
611 			}
612 			node = rb_next(node);
613 			if (!node)
614 				break;
615 			offset += sizeof(struct btrfs_free_space_entry);
616 			if (offset + sizeof(struct btrfs_free_space_entry) >=
617 			    PAGE_CACHE_SIZE)
618 				break;
619 			entry++;
620 		}
621 		*crc = ~(u32)0;
622 		*crc = btrfs_csum_data(root, addr + start_offset, *crc,
623 				       PAGE_CACHE_SIZE - start_offset);
624 		kunmap(page);
625 
626 		btrfs_csum_final(*crc, (char *)crc);
627 		crc++;
628 
629 		bytes += PAGE_CACHE_SIZE;
630 
631 		ClearPageChecked(page);
632 		set_page_extent_mapped(page);
633 		SetPageUptodate(page);
634 		set_page_dirty(page);
635 
636 		/*
637 		 * We need to release our reference we got for grab_cache_page,
638 		 * except for the first page which will hold our checksums, we
639 		 * do that below.
640 		 */
641 		if (index != 0) {
642 			unlock_page(page);
643 			page_cache_release(page);
644 		}
645 
646 		page_cache_release(page);
647 
648 		index++;
649 	} while (node);
650 
651 	/* Write out the bitmaps */
652 	list_for_each_safe(pos, n, &bitmap_list) {
653 		void *addr;
654 		struct btrfs_free_space *entry =
655 			list_entry(pos, struct btrfs_free_space, list);
656 
657 		page = find_get_page(inode->i_mapping, index);
658 
659 		addr = kmap(page);
660 		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
661 		*crc = ~(u32)0;
662 		*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
663 		kunmap(page);
664 		btrfs_csum_final(*crc, (char *)crc);
665 		crc++;
666 		bytes += PAGE_CACHE_SIZE;
667 
668 		ClearPageChecked(page);
669 		set_page_extent_mapped(page);
670 		SetPageUptodate(page);
671 		set_page_dirty(page);
672 		unlock_page(page);
673 		page_cache_release(page);
674 		page_cache_release(page);
675 		list_del_init(&entry->list);
676 		index++;
677 	}
678 
679 	/* Zero out the rest of the pages just to make sure */
680 	while (index <= last_index) {
681 		void *addr;
682 
683 		page = find_get_page(inode->i_mapping, index);
684 
685 		addr = kmap(page);
686 		memset(addr, 0, PAGE_CACHE_SIZE);
687 		kunmap(page);
688 		ClearPageChecked(page);
689 		set_page_extent_mapped(page);
690 		SetPageUptodate(page);
691 		set_page_dirty(page);
692 		unlock_page(page);
693 		page_cache_release(page);
694 		page_cache_release(page);
695 		bytes += PAGE_CACHE_SIZE;
696 		index++;
697 	}
698 
699 	btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
700 
701 	/* Write the checksums and trans id to the first page */
702 	{
703 		void *addr;
704 		u64 *gen;
705 
706 		page = find_get_page(inode->i_mapping, 0);
707 
708 		addr = kmap(page);
709 		memcpy(addr, checksums, sizeof(u32) * num_checksums);
710 		gen = addr + (sizeof(u32) * num_checksums);
711 		*gen = trans->transid;
712 		kunmap(page);
713 		ClearPageChecked(page);
714 		set_page_extent_mapped(page);
715 		SetPageUptodate(page);
716 		set_page_dirty(page);
717 		unlock_page(page);
718 		page_cache_release(page);
719 		page_cache_release(page);
720 	}
721 	BTRFS_I(inode)->generation = trans->transid;
722 
723 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
724 			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
725 
726 	filemap_write_and_wait(inode->i_mapping);
727 
728 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
729 	key.offset = block_group->key.objectid;
730 	key.type = 0;
731 
732 	ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
733 	if (ret < 0) {
734 		ret = 0;
735 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
736 				 EXTENT_DIRTY | EXTENT_DELALLOC |
737 				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
738 		goto out_free;
739 	}
740 	leaf = path->nodes[0];
741 	if (ret > 0) {
742 		struct btrfs_key found_key;
743 		BUG_ON(!path->slots[0]);
744 		path->slots[0]--;
745 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
746 		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
747 		    found_key.offset != block_group->key.objectid) {
748 			ret = 0;
749 			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
750 					 EXTENT_DIRTY | EXTENT_DELALLOC |
751 					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
752 					 GFP_NOFS);
753 			btrfs_release_path(root, path);
754 			goto out_free;
755 		}
756 	}
757 	header = btrfs_item_ptr(leaf, path->slots[0],
758 				struct btrfs_free_space_header);
759 	btrfs_set_free_space_entries(leaf, header, entries);
760 	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
761 	btrfs_set_free_space_generation(leaf, header, trans->transid);
762 	btrfs_mark_buffer_dirty(leaf);
763 	btrfs_release_path(root, path);
764 
765 	ret = 1;
766 
767 out_free:
768 	if (ret == 0) {
769 		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
770 		spin_lock(&block_group->lock);
771 		block_group->disk_cache_state = BTRFS_DC_ERROR;
772 		spin_unlock(&block_group->lock);
773 		BTRFS_I(inode)->generation = 0;
774 	}
775 	kfree(checksums);
776 	btrfs_update_inode(trans, root, inode);
777 	iput(inode);
778 	return ret;
779 }
780 
781 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
782 					  u64 offset)
783 {
784 	BUG_ON(offset < bitmap_start);
785 	offset -= bitmap_start;
786 	return (unsigned long)(div64_u64(offset, sectorsize));
787 }
788 
789 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
790 {
791 	return (unsigned long)(div64_u64(bytes, sectorsize));
792 }
793 
794 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
795 				   u64 offset)
796 {
797 	u64 bitmap_start;
798 	u64 bytes_per_bitmap;
799 
800 	bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
801 	bitmap_start = offset - block_group->key.objectid;
802 	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
803 	bitmap_start *= bytes_per_bitmap;
804 	bitmap_start += block_group->key.objectid;
805 
806 	return bitmap_start;
807 }
808 
809 static int tree_insert_offset(struct rb_root *root, u64 offset,
810 			      struct rb_node *node, int bitmap)
811 {
812 	struct rb_node **p = &root->rb_node;
813 	struct rb_node *parent = NULL;
814 	struct btrfs_free_space *info;
815 
816 	while (*p) {
817 		parent = *p;
818 		info = rb_entry(parent, struct btrfs_free_space, offset_index);
819 
820 		if (offset < info->offset) {
821 			p = &(*p)->rb_left;
822 		} else if (offset > info->offset) {
823 			p = &(*p)->rb_right;
824 		} else {
825 			/*
826 			 * we could have a bitmap entry and an extent entry
827 			 * share the same offset.  If this is the case, we want
828 			 * the extent entry to always be found first if we do a
829 			 * linear search through the tree, since we want to have
830 			 * the quickest allocation time, and allocating from an
831 			 * extent is faster than allocating from a bitmap.  So
832 			 * if we're inserting a bitmap and we find an entry at
833 			 * this offset, we want to go right, or after this entry
834 			 * logically.  If we are inserting an extent and we've
835 			 * found a bitmap, we want to go left, or before
836 			 * logically.
837 			 */
838 			if (bitmap) {
839 				WARN_ON(info->bitmap);
840 				p = &(*p)->rb_right;
841 			} else {
842 				WARN_ON(!info->bitmap);
843 				p = &(*p)->rb_left;
844 			}
845 		}
846 	}
847 
848 	rb_link_node(node, parent, p);
849 	rb_insert_color(node, root);
850 
851 	return 0;
852 }
853 
854 /*
855  * searches the tree for the given offset.
856  *
857  * fuzzy - If this is set, then we are trying to make an allocation, and we just
858  * want a section that has at least bytes size and comes at or after the given
859  * offset.
860  */
861 static struct btrfs_free_space *
862 tree_search_offset(struct btrfs_block_group_cache *block_group,
863 		   u64 offset, int bitmap_only, int fuzzy)
864 {
865 	struct rb_node *n = block_group->free_space_offset.rb_node;
866 	struct btrfs_free_space *entry, *prev = NULL;
867 
868 	/* find entry that is closest to the 'offset' */
869 	while (1) {
870 		if (!n) {
871 			entry = NULL;
872 			break;
873 		}
874 
875 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
876 		prev = entry;
877 
878 		if (offset < entry->offset)
879 			n = n->rb_left;
880 		else if (offset > entry->offset)
881 			n = n->rb_right;
882 		else
883 			break;
884 	}
885 
886 	if (bitmap_only) {
887 		if (!entry)
888 			return NULL;
889 		if (entry->bitmap)
890 			return entry;
891 
892 		/*
893 		 * bitmap entry and extent entry may share same offset,
894 		 * in that case, bitmap entry comes after extent entry.
895 		 */
896 		n = rb_next(n);
897 		if (!n)
898 			return NULL;
899 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
900 		if (entry->offset != offset)
901 			return NULL;
902 
903 		WARN_ON(!entry->bitmap);
904 		return entry;
905 	} else if (entry) {
906 		if (entry->bitmap) {
907 			/*
908 			 * if previous extent entry covers the offset,
909 			 * we should return it instead of the bitmap entry
910 			 */
911 			n = &entry->offset_index;
912 			while (1) {
913 				n = rb_prev(n);
914 				if (!n)
915 					break;
916 				prev = rb_entry(n, struct btrfs_free_space,
917 						offset_index);
918 				if (!prev->bitmap) {
919 					if (prev->offset + prev->bytes > offset)
920 						entry = prev;
921 					break;
922 				}
923 			}
924 		}
925 		return entry;
926 	}
927 
928 	if (!prev)
929 		return NULL;
930 
931 	/* find last entry before the 'offset' */
932 	entry = prev;
933 	if (entry->offset > offset) {
934 		n = rb_prev(&entry->offset_index);
935 		if (n) {
936 			entry = rb_entry(n, struct btrfs_free_space,
937 					offset_index);
938 			BUG_ON(entry->offset > offset);
939 		} else {
940 			if (fuzzy)
941 				return entry;
942 			else
943 				return NULL;
944 		}
945 	}
946 
947 	if (entry->bitmap) {
948 		n = &entry->offset_index;
949 		while (1) {
950 			n = rb_prev(n);
951 			if (!n)
952 				break;
953 			prev = rb_entry(n, struct btrfs_free_space,
954 					offset_index);
955 			if (!prev->bitmap) {
956 				if (prev->offset + prev->bytes > offset)
957 					return prev;
958 				break;
959 			}
960 		}
961 		if (entry->offset + BITS_PER_BITMAP *
962 		    block_group->sectorsize > offset)
963 			return entry;
964 	} else if (entry->offset + entry->bytes > offset)
965 		return entry;
966 
967 	if (!fuzzy)
968 		return NULL;
969 
970 	while (1) {
971 		if (entry->bitmap) {
972 			if (entry->offset + BITS_PER_BITMAP *
973 			    block_group->sectorsize > offset)
974 				break;
975 		} else {
976 			if (entry->offset + entry->bytes > offset)
977 				break;
978 		}
979 
980 		n = rb_next(&entry->offset_index);
981 		if (!n)
982 			return NULL;
983 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
984 	}
985 	return entry;
986 }
987 
988 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
989 			      struct btrfs_free_space *info)
990 {
991 	rb_erase(&info->offset_index, &block_group->free_space_offset);
992 	block_group->free_extents--;
993 	block_group->free_space -= info->bytes;
994 }
995 
996 static int link_free_space(struct btrfs_block_group_cache *block_group,
997 			   struct btrfs_free_space *info)
998 {
999 	int ret = 0;
1000 
1001 	BUG_ON(!info->bitmap && !info->bytes);
1002 	ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
1003 				 &info->offset_index, (info->bitmap != NULL));
1004 	if (ret)
1005 		return ret;
1006 
1007 	block_group->free_space += info->bytes;
1008 	block_group->free_extents++;
1009 	return ret;
1010 }
1011 
1012 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1013 {
1014 	u64 max_bytes;
1015 	u64 bitmap_bytes;
1016 	u64 extent_bytes;
1017 
1018 	/*
1019 	 * The goal is to keep the total amount of memory used per 1gb of space
1020 	 * at or below 32k, so we need to adjust how much memory we allow to be
1021 	 * used by extent based free space tracking
1022 	 */
1023 	max_bytes = MAX_CACHE_BYTES_PER_GIG *
1024 		(div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
1025 
1026 	/*
1027 	 * we want to account for 1 more bitmap than what we have so we can make
1028 	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1029 	 * we add more bitmaps.
1030 	 */
1031 	bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1032 
1033 	if (bitmap_bytes >= max_bytes) {
1034 		block_group->extents_thresh = 0;
1035 		return;
1036 	}
1037 
1038 	/*
1039 	 * we want the extent entry threshold to always be at most 1/2 the maxw
1040 	 * bytes we can have, or whatever is less than that.
1041 	 */
1042 	extent_bytes = max_bytes - bitmap_bytes;
1043 	extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1044 
1045 	block_group->extents_thresh =
1046 		div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1047 }
1048 
1049 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
1050 			      struct btrfs_free_space *info, u64 offset,
1051 			      u64 bytes)
1052 {
1053 	unsigned long start, end;
1054 	unsigned long i;
1055 
1056 	start = offset_to_bit(info->offset, block_group->sectorsize, offset);
1057 	end = start + bytes_to_bits(bytes, block_group->sectorsize);
1058 	BUG_ON(end > BITS_PER_BITMAP);
1059 
1060 	for (i = start; i < end; i++)
1061 		clear_bit(i, info->bitmap);
1062 
1063 	info->bytes -= bytes;
1064 	block_group->free_space -= bytes;
1065 }
1066 
1067 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
1068 			    struct btrfs_free_space *info, u64 offset,
1069 			    u64 bytes)
1070 {
1071 	unsigned long start, end;
1072 	unsigned long i;
1073 
1074 	start = offset_to_bit(info->offset, block_group->sectorsize, offset);
1075 	end = start + bytes_to_bits(bytes, block_group->sectorsize);
1076 	BUG_ON(end > BITS_PER_BITMAP);
1077 
1078 	for (i = start; i < end; i++)
1079 		set_bit(i, info->bitmap);
1080 
1081 	info->bytes += bytes;
1082 	block_group->free_space += bytes;
1083 }
1084 
1085 static int search_bitmap(struct btrfs_block_group_cache *block_group,
1086 			 struct btrfs_free_space *bitmap_info, u64 *offset,
1087 			 u64 *bytes)
1088 {
1089 	unsigned long found_bits = 0;
1090 	unsigned long bits, i;
1091 	unsigned long next_zero;
1092 
1093 	i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
1094 			  max_t(u64, *offset, bitmap_info->offset));
1095 	bits = bytes_to_bits(*bytes, block_group->sectorsize);
1096 
1097 	for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1098 	     i < BITS_PER_BITMAP;
1099 	     i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1100 		next_zero = find_next_zero_bit(bitmap_info->bitmap,
1101 					       BITS_PER_BITMAP, i);
1102 		if ((next_zero - i) >= bits) {
1103 			found_bits = next_zero - i;
1104 			break;
1105 		}
1106 		i = next_zero;
1107 	}
1108 
1109 	if (found_bits) {
1110 		*offset = (u64)(i * block_group->sectorsize) +
1111 			bitmap_info->offset;
1112 		*bytes = (u64)(found_bits) * block_group->sectorsize;
1113 		return 0;
1114 	}
1115 
1116 	return -1;
1117 }
1118 
1119 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1120 						*block_group, u64 *offset,
1121 						u64 *bytes, int debug)
1122 {
1123 	struct btrfs_free_space *entry;
1124 	struct rb_node *node;
1125 	int ret;
1126 
1127 	if (!block_group->free_space_offset.rb_node)
1128 		return NULL;
1129 
1130 	entry = tree_search_offset(block_group,
1131 				   offset_to_bitmap(block_group, *offset),
1132 				   0, 1);
1133 	if (!entry)
1134 		return NULL;
1135 
1136 	for (node = &entry->offset_index; node; node = rb_next(node)) {
1137 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1138 		if (entry->bytes < *bytes)
1139 			continue;
1140 
1141 		if (entry->bitmap) {
1142 			ret = search_bitmap(block_group, entry, offset, bytes);
1143 			if (!ret)
1144 				return entry;
1145 			continue;
1146 		}
1147 
1148 		*offset = entry->offset;
1149 		*bytes = entry->bytes;
1150 		return entry;
1151 	}
1152 
1153 	return NULL;
1154 }
1155 
1156 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
1157 			   struct btrfs_free_space *info, u64 offset)
1158 {
1159 	u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1160 	int max_bitmaps = (int)div64_u64(block_group->key.offset +
1161 					 bytes_per_bg - 1, bytes_per_bg);
1162 	BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1163 
1164 	info->offset = offset_to_bitmap(block_group, offset);
1165 	info->bytes = 0;
1166 	link_free_space(block_group, info);
1167 	block_group->total_bitmaps++;
1168 
1169 	recalculate_thresholds(block_group);
1170 }
1171 
1172 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
1173 			      struct btrfs_free_space *bitmap_info,
1174 			      u64 *offset, u64 *bytes)
1175 {
1176 	u64 end;
1177 	u64 search_start, search_bytes;
1178 	int ret;
1179 
1180 again:
1181 	end = bitmap_info->offset +
1182 		(u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1183 
1184 	/*
1185 	 * XXX - this can go away after a few releases.
1186 	 *
1187 	 * since the only user of btrfs_remove_free_space is the tree logging
1188 	 * stuff, and the only way to test that is under crash conditions, we
1189 	 * want to have this debug stuff here just in case somethings not
1190 	 * working.  Search the bitmap for the space we are trying to use to
1191 	 * make sure its actually there.  If its not there then we need to stop
1192 	 * because something has gone wrong.
1193 	 */
1194 	search_start = *offset;
1195 	search_bytes = *bytes;
1196 	ret = search_bitmap(block_group, bitmap_info, &search_start,
1197 			    &search_bytes);
1198 	BUG_ON(ret < 0 || search_start != *offset);
1199 
1200 	if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1201 		bitmap_clear_bits(block_group, bitmap_info, *offset,
1202 				  end - *offset + 1);
1203 		*bytes -= end - *offset + 1;
1204 		*offset = end + 1;
1205 	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1206 		bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
1207 		*bytes = 0;
1208 	}
1209 
1210 	if (*bytes) {
1211 		struct rb_node *next = rb_next(&bitmap_info->offset_index);
1212 		if (!bitmap_info->bytes) {
1213 			unlink_free_space(block_group, bitmap_info);
1214 			kfree(bitmap_info->bitmap);
1215 			kfree(bitmap_info);
1216 			block_group->total_bitmaps--;
1217 			recalculate_thresholds(block_group);
1218 		}
1219 
1220 		/*
1221 		 * no entry after this bitmap, but we still have bytes to
1222 		 * remove, so something has gone wrong.
1223 		 */
1224 		if (!next)
1225 			return -EINVAL;
1226 
1227 		bitmap_info = rb_entry(next, struct btrfs_free_space,
1228 				       offset_index);
1229 
1230 		/*
1231 		 * if the next entry isn't a bitmap we need to return to let the
1232 		 * extent stuff do its work.
1233 		 */
1234 		if (!bitmap_info->bitmap)
1235 			return -EAGAIN;
1236 
1237 		/*
1238 		 * Ok the next item is a bitmap, but it may not actually hold
1239 		 * the information for the rest of this free space stuff, so
1240 		 * look for it, and if we don't find it return so we can try
1241 		 * everything over again.
1242 		 */
1243 		search_start = *offset;
1244 		search_bytes = *bytes;
1245 		ret = search_bitmap(block_group, bitmap_info, &search_start,
1246 				    &search_bytes);
1247 		if (ret < 0 || search_start != *offset)
1248 			return -EAGAIN;
1249 
1250 		goto again;
1251 	} else if (!bitmap_info->bytes) {
1252 		unlink_free_space(block_group, bitmap_info);
1253 		kfree(bitmap_info->bitmap);
1254 		kfree(bitmap_info);
1255 		block_group->total_bitmaps--;
1256 		recalculate_thresholds(block_group);
1257 	}
1258 
1259 	return 0;
1260 }
1261 
1262 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1263 			      struct btrfs_free_space *info)
1264 {
1265 	struct btrfs_free_space *bitmap_info;
1266 	int added = 0;
1267 	u64 bytes, offset, end;
1268 	int ret;
1269 
1270 	/*
1271 	 * If we are below the extents threshold then we can add this as an
1272 	 * extent, and don't have to deal with the bitmap
1273 	 */
1274 	if (block_group->free_extents < block_group->extents_thresh &&
1275 	    info->bytes > block_group->sectorsize * 4)
1276 		return 0;
1277 
1278 	/*
1279 	 * some block groups are so tiny they can't be enveloped by a bitmap, so
1280 	 * don't even bother to create a bitmap for this
1281 	 */
1282 	if (BITS_PER_BITMAP * block_group->sectorsize >
1283 	    block_group->key.offset)
1284 		return 0;
1285 
1286 	bytes = info->bytes;
1287 	offset = info->offset;
1288 
1289 again:
1290 	bitmap_info = tree_search_offset(block_group,
1291 					 offset_to_bitmap(block_group, offset),
1292 					 1, 0);
1293 	if (!bitmap_info) {
1294 		BUG_ON(added);
1295 		goto new_bitmap;
1296 	}
1297 
1298 	end = bitmap_info->offset +
1299 		(u64)(BITS_PER_BITMAP * block_group->sectorsize);
1300 
1301 	if (offset >= bitmap_info->offset && offset + bytes > end) {
1302 		bitmap_set_bits(block_group, bitmap_info, offset,
1303 				end - offset);
1304 		bytes -= end - offset;
1305 		offset = end;
1306 		added = 0;
1307 	} else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1308 		bitmap_set_bits(block_group, bitmap_info, offset, bytes);
1309 		bytes = 0;
1310 	} else {
1311 		BUG();
1312 	}
1313 
1314 	if (!bytes) {
1315 		ret = 1;
1316 		goto out;
1317 	} else
1318 		goto again;
1319 
1320 new_bitmap:
1321 	if (info && info->bitmap) {
1322 		add_new_bitmap(block_group, info, offset);
1323 		added = 1;
1324 		info = NULL;
1325 		goto again;
1326 	} else {
1327 		spin_unlock(&block_group->tree_lock);
1328 
1329 		/* no pre-allocated info, allocate a new one */
1330 		if (!info) {
1331 			info = kzalloc(sizeof(struct btrfs_free_space),
1332 				       GFP_NOFS);
1333 			if (!info) {
1334 				spin_lock(&block_group->tree_lock);
1335 				ret = -ENOMEM;
1336 				goto out;
1337 			}
1338 		}
1339 
1340 		/* allocate the bitmap */
1341 		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1342 		spin_lock(&block_group->tree_lock);
1343 		if (!info->bitmap) {
1344 			ret = -ENOMEM;
1345 			goto out;
1346 		}
1347 		goto again;
1348 	}
1349 
1350 out:
1351 	if (info) {
1352 		if (info->bitmap)
1353 			kfree(info->bitmap);
1354 		kfree(info);
1355 	}
1356 
1357 	return ret;
1358 }
1359 
1360 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1361 			 u64 offset, u64 bytes)
1362 {
1363 	struct btrfs_free_space *right_info = NULL;
1364 	struct btrfs_free_space *left_info = NULL;
1365 	struct btrfs_free_space *info = NULL;
1366 	int ret = 0;
1367 
1368 	info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1369 	if (!info)
1370 		return -ENOMEM;
1371 
1372 	info->offset = offset;
1373 	info->bytes = bytes;
1374 
1375 	spin_lock(&block_group->tree_lock);
1376 
1377 	/*
1378 	 * first we want to see if there is free space adjacent to the range we
1379 	 * are adding, if there is remove that struct and add a new one to
1380 	 * cover the entire range
1381 	 */
1382 	right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
1383 	if (right_info && rb_prev(&right_info->offset_index))
1384 		left_info = rb_entry(rb_prev(&right_info->offset_index),
1385 				     struct btrfs_free_space, offset_index);
1386 	else
1387 		left_info = tree_search_offset(block_group, offset - 1, 0, 0);
1388 
1389 	/*
1390 	 * If there was no extent directly to the left or right of this new
1391 	 * extent then we know we're going to have to allocate a new extent, so
1392 	 * before we do that see if we need to drop this into a bitmap
1393 	 */
1394 	if ((!left_info || left_info->bitmap) &&
1395 	    (!right_info || right_info->bitmap)) {
1396 		ret = insert_into_bitmap(block_group, info);
1397 
1398 		if (ret < 0) {
1399 			goto out;
1400 		} else if (ret) {
1401 			ret = 0;
1402 			goto out;
1403 		}
1404 	}
1405 
1406 	if (right_info && !right_info->bitmap) {
1407 		unlink_free_space(block_group, right_info);
1408 		info->bytes += right_info->bytes;
1409 		kfree(right_info);
1410 	}
1411 
1412 	if (left_info && !left_info->bitmap &&
1413 	    left_info->offset + left_info->bytes == offset) {
1414 		unlink_free_space(block_group, left_info);
1415 		info->offset = left_info->offset;
1416 		info->bytes += left_info->bytes;
1417 		kfree(left_info);
1418 	}
1419 
1420 	ret = link_free_space(block_group, info);
1421 	if (ret)
1422 		kfree(info);
1423 out:
1424 	spin_unlock(&block_group->tree_lock);
1425 
1426 	if (ret) {
1427 		printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1428 		BUG_ON(ret == -EEXIST);
1429 	}
1430 
1431 	return ret;
1432 }
1433 
1434 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1435 			    u64 offset, u64 bytes)
1436 {
1437 	struct btrfs_free_space *info;
1438 	struct btrfs_free_space *next_info = NULL;
1439 	int ret = 0;
1440 
1441 	spin_lock(&block_group->tree_lock);
1442 
1443 again:
1444 	info = tree_search_offset(block_group, offset, 0, 0);
1445 	if (!info) {
1446 		/*
1447 		 * oops didn't find an extent that matched the space we wanted
1448 		 * to remove, look for a bitmap instead
1449 		 */
1450 		info = tree_search_offset(block_group,
1451 					  offset_to_bitmap(block_group, offset),
1452 					  1, 0);
1453 		if (!info) {
1454 			WARN_ON(1);
1455 			goto out_lock;
1456 		}
1457 	}
1458 
1459 	if (info->bytes < bytes && rb_next(&info->offset_index)) {
1460 		u64 end;
1461 		next_info = rb_entry(rb_next(&info->offset_index),
1462 					     struct btrfs_free_space,
1463 					     offset_index);
1464 
1465 		if (next_info->bitmap)
1466 			end = next_info->offset + BITS_PER_BITMAP *
1467 				block_group->sectorsize - 1;
1468 		else
1469 			end = next_info->offset + next_info->bytes;
1470 
1471 		if (next_info->bytes < bytes ||
1472 		    next_info->offset > offset || offset > end) {
1473 			printk(KERN_CRIT "Found free space at %llu, size %llu,"
1474 			      " trying to use %llu\n",
1475 			      (unsigned long long)info->offset,
1476 			      (unsigned long long)info->bytes,
1477 			      (unsigned long long)bytes);
1478 			WARN_ON(1);
1479 			ret = -EINVAL;
1480 			goto out_lock;
1481 		}
1482 
1483 		info = next_info;
1484 	}
1485 
1486 	if (info->bytes == bytes) {
1487 		unlink_free_space(block_group, info);
1488 		if (info->bitmap) {
1489 			kfree(info->bitmap);
1490 			block_group->total_bitmaps--;
1491 		}
1492 		kfree(info);
1493 		goto out_lock;
1494 	}
1495 
1496 	if (!info->bitmap && info->offset == offset) {
1497 		unlink_free_space(block_group, info);
1498 		info->offset += bytes;
1499 		info->bytes -= bytes;
1500 		link_free_space(block_group, info);
1501 		goto out_lock;
1502 	}
1503 
1504 	if (!info->bitmap && info->offset <= offset &&
1505 	    info->offset + info->bytes >= offset + bytes) {
1506 		u64 old_start = info->offset;
1507 		/*
1508 		 * we're freeing space in the middle of the info,
1509 		 * this can happen during tree log replay
1510 		 *
1511 		 * first unlink the old info and then
1512 		 * insert it again after the hole we're creating
1513 		 */
1514 		unlink_free_space(block_group, info);
1515 		if (offset + bytes < info->offset + info->bytes) {
1516 			u64 old_end = info->offset + info->bytes;
1517 
1518 			info->offset = offset + bytes;
1519 			info->bytes = old_end - info->offset;
1520 			ret = link_free_space(block_group, info);
1521 			WARN_ON(ret);
1522 			if (ret)
1523 				goto out_lock;
1524 		} else {
1525 			/* the hole we're creating ends at the end
1526 			 * of the info struct, just free the info
1527 			 */
1528 			kfree(info);
1529 		}
1530 		spin_unlock(&block_group->tree_lock);
1531 
1532 		/* step two, insert a new info struct to cover
1533 		 * anything before the hole
1534 		 */
1535 		ret = btrfs_add_free_space(block_group, old_start,
1536 					   offset - old_start);
1537 		WARN_ON(ret);
1538 		goto out;
1539 	}
1540 
1541 	ret = remove_from_bitmap(block_group, info, &offset, &bytes);
1542 	if (ret == -EAGAIN)
1543 		goto again;
1544 	BUG_ON(ret);
1545 out_lock:
1546 	spin_unlock(&block_group->tree_lock);
1547 out:
1548 	return ret;
1549 }
1550 
1551 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1552 			   u64 bytes)
1553 {
1554 	struct btrfs_free_space *info;
1555 	struct rb_node *n;
1556 	int count = 0;
1557 
1558 	for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
1559 		info = rb_entry(n, struct btrfs_free_space, offset_index);
1560 		if (info->bytes >= bytes)
1561 			count++;
1562 		printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1563 		       (unsigned long long)info->offset,
1564 		       (unsigned long long)info->bytes,
1565 		       (info->bitmap) ? "yes" : "no");
1566 	}
1567 	printk(KERN_INFO "block group has cluster?: %s\n",
1568 	       list_empty(&block_group->cluster_list) ? "no" : "yes");
1569 	printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1570 	       "\n", count);
1571 }
1572 
1573 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
1574 {
1575 	struct btrfs_free_space *info;
1576 	struct rb_node *n;
1577 	u64 ret = 0;
1578 
1579 	for (n = rb_first(&block_group->free_space_offset); n;
1580 	     n = rb_next(n)) {
1581 		info = rb_entry(n, struct btrfs_free_space, offset_index);
1582 		ret += info->bytes;
1583 	}
1584 
1585 	return ret;
1586 }
1587 
1588 /*
1589  * for a given cluster, put all of its extents back into the free
1590  * space cache.  If the block group passed doesn't match the block group
1591  * pointed to by the cluster, someone else raced in and freed the
1592  * cluster already.  In that case, we just return without changing anything
1593  */
1594 static int
1595 __btrfs_return_cluster_to_free_space(
1596 			     struct btrfs_block_group_cache *block_group,
1597 			     struct btrfs_free_cluster *cluster)
1598 {
1599 	struct btrfs_free_space *entry;
1600 	struct rb_node *node;
1601 	bool bitmap;
1602 
1603 	spin_lock(&cluster->lock);
1604 	if (cluster->block_group != block_group)
1605 		goto out;
1606 
1607 	bitmap = cluster->points_to_bitmap;
1608 	cluster->block_group = NULL;
1609 	cluster->window_start = 0;
1610 	list_del_init(&cluster->block_group_list);
1611 	cluster->points_to_bitmap = false;
1612 
1613 	if (bitmap)
1614 		goto out;
1615 
1616 	node = rb_first(&cluster->root);
1617 	while (node) {
1618 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1619 		node = rb_next(&entry->offset_index);
1620 		rb_erase(&entry->offset_index, &cluster->root);
1621 		BUG_ON(entry->bitmap);
1622 		tree_insert_offset(&block_group->free_space_offset,
1623 				   entry->offset, &entry->offset_index, 0);
1624 	}
1625 	cluster->root = RB_ROOT;
1626 
1627 out:
1628 	spin_unlock(&cluster->lock);
1629 	btrfs_put_block_group(block_group);
1630 	return 0;
1631 }
1632 
1633 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1634 {
1635 	struct btrfs_free_space *info;
1636 	struct rb_node *node;
1637 	struct btrfs_free_cluster *cluster;
1638 	struct list_head *head;
1639 
1640 	spin_lock(&block_group->tree_lock);
1641 	while ((head = block_group->cluster_list.next) !=
1642 	       &block_group->cluster_list) {
1643 		cluster = list_entry(head, struct btrfs_free_cluster,
1644 				     block_group_list);
1645 
1646 		WARN_ON(cluster->block_group != block_group);
1647 		__btrfs_return_cluster_to_free_space(block_group, cluster);
1648 		if (need_resched()) {
1649 			spin_unlock(&block_group->tree_lock);
1650 			cond_resched();
1651 			spin_lock(&block_group->tree_lock);
1652 		}
1653 	}
1654 
1655 	while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1656 		info = rb_entry(node, struct btrfs_free_space, offset_index);
1657 		unlink_free_space(block_group, info);
1658 		if (info->bitmap)
1659 			kfree(info->bitmap);
1660 		kfree(info);
1661 		if (need_resched()) {
1662 			spin_unlock(&block_group->tree_lock);
1663 			cond_resched();
1664 			spin_lock(&block_group->tree_lock);
1665 		}
1666 	}
1667 
1668 	spin_unlock(&block_group->tree_lock);
1669 }
1670 
1671 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1672 			       u64 offset, u64 bytes, u64 empty_size)
1673 {
1674 	struct btrfs_free_space *entry = NULL;
1675 	u64 bytes_search = bytes + empty_size;
1676 	u64 ret = 0;
1677 
1678 	spin_lock(&block_group->tree_lock);
1679 	entry = find_free_space(block_group, &offset, &bytes_search, 0);
1680 	if (!entry)
1681 		goto out;
1682 
1683 	ret = offset;
1684 	if (entry->bitmap) {
1685 		bitmap_clear_bits(block_group, entry, offset, bytes);
1686 		if (!entry->bytes) {
1687 			unlink_free_space(block_group, entry);
1688 			kfree(entry->bitmap);
1689 			kfree(entry);
1690 			block_group->total_bitmaps--;
1691 			recalculate_thresholds(block_group);
1692 		}
1693 	} else {
1694 		unlink_free_space(block_group, entry);
1695 		entry->offset += bytes;
1696 		entry->bytes -= bytes;
1697 		if (!entry->bytes)
1698 			kfree(entry);
1699 		else
1700 			link_free_space(block_group, entry);
1701 	}
1702 
1703 out:
1704 	spin_unlock(&block_group->tree_lock);
1705 
1706 	return ret;
1707 }
1708 
1709 /*
1710  * given a cluster, put all of its extents back into the free space
1711  * cache.  If a block group is passed, this function will only free
1712  * a cluster that belongs to the passed block group.
1713  *
1714  * Otherwise, it'll get a reference on the block group pointed to by the
1715  * cluster and remove the cluster from it.
1716  */
1717 int btrfs_return_cluster_to_free_space(
1718 			       struct btrfs_block_group_cache *block_group,
1719 			       struct btrfs_free_cluster *cluster)
1720 {
1721 	int ret;
1722 
1723 	/* first, get a safe pointer to the block group */
1724 	spin_lock(&cluster->lock);
1725 	if (!block_group) {
1726 		block_group = cluster->block_group;
1727 		if (!block_group) {
1728 			spin_unlock(&cluster->lock);
1729 			return 0;
1730 		}
1731 	} else if (cluster->block_group != block_group) {
1732 		/* someone else has already freed it don't redo their work */
1733 		spin_unlock(&cluster->lock);
1734 		return 0;
1735 	}
1736 	atomic_inc(&block_group->count);
1737 	spin_unlock(&cluster->lock);
1738 
1739 	/* now return any extents the cluster had on it */
1740 	spin_lock(&block_group->tree_lock);
1741 	ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1742 	spin_unlock(&block_group->tree_lock);
1743 
1744 	/* finally drop our ref */
1745 	btrfs_put_block_group(block_group);
1746 	return ret;
1747 }
1748 
1749 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1750 				   struct btrfs_free_cluster *cluster,
1751 				   u64 bytes, u64 min_start)
1752 {
1753 	struct btrfs_free_space *entry;
1754 	int err;
1755 	u64 search_start = cluster->window_start;
1756 	u64 search_bytes = bytes;
1757 	u64 ret = 0;
1758 
1759 	spin_lock(&block_group->tree_lock);
1760 	spin_lock(&cluster->lock);
1761 
1762 	if (!cluster->points_to_bitmap)
1763 		goto out;
1764 
1765 	if (cluster->block_group != block_group)
1766 		goto out;
1767 
1768 	/*
1769 	 * search_start is the beginning of the bitmap, but at some point it may
1770 	 * be a good idea to point to the actual start of the free area in the
1771 	 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1772 	 * to 1 to make sure we get the bitmap entry
1773 	 */
1774 	entry = tree_search_offset(block_group,
1775 				   offset_to_bitmap(block_group, search_start),
1776 				   1, 0);
1777 	if (!entry || !entry->bitmap)
1778 		goto out;
1779 
1780 	search_start = min_start;
1781 	search_bytes = bytes;
1782 
1783 	err = search_bitmap(block_group, entry, &search_start,
1784 			    &search_bytes);
1785 	if (err)
1786 		goto out;
1787 
1788 	ret = search_start;
1789 	bitmap_clear_bits(block_group, entry, ret, bytes);
1790 out:
1791 	spin_unlock(&cluster->lock);
1792 	spin_unlock(&block_group->tree_lock);
1793 
1794 	return ret;
1795 }
1796 
1797 /*
1798  * given a cluster, try to allocate 'bytes' from it, returns 0
1799  * if it couldn't find anything suitably large, or a logical disk offset
1800  * if things worked out
1801  */
1802 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1803 			     struct btrfs_free_cluster *cluster, u64 bytes,
1804 			     u64 min_start)
1805 {
1806 	struct btrfs_free_space *entry = NULL;
1807 	struct rb_node *node;
1808 	u64 ret = 0;
1809 
1810 	if (cluster->points_to_bitmap)
1811 		return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1812 					       min_start);
1813 
1814 	spin_lock(&cluster->lock);
1815 	if (bytes > cluster->max_size)
1816 		goto out;
1817 
1818 	if (cluster->block_group != block_group)
1819 		goto out;
1820 
1821 	node = rb_first(&cluster->root);
1822 	if (!node)
1823 		goto out;
1824 
1825 	entry = rb_entry(node, struct btrfs_free_space, offset_index);
1826 
1827 	while(1) {
1828 		if (entry->bytes < bytes || entry->offset < min_start) {
1829 			struct rb_node *node;
1830 
1831 			node = rb_next(&entry->offset_index);
1832 			if (!node)
1833 				break;
1834 			entry = rb_entry(node, struct btrfs_free_space,
1835 					 offset_index);
1836 			continue;
1837 		}
1838 		ret = entry->offset;
1839 
1840 		entry->offset += bytes;
1841 		entry->bytes -= bytes;
1842 
1843 		if (entry->bytes == 0) {
1844 			rb_erase(&entry->offset_index, &cluster->root);
1845 			kfree(entry);
1846 		}
1847 		break;
1848 	}
1849 out:
1850 	spin_unlock(&cluster->lock);
1851 
1852 	return ret;
1853 }
1854 
1855 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1856 				struct btrfs_free_space *entry,
1857 				struct btrfs_free_cluster *cluster,
1858 				u64 offset, u64 bytes, u64 min_bytes)
1859 {
1860 	unsigned long next_zero;
1861 	unsigned long i;
1862 	unsigned long search_bits;
1863 	unsigned long total_bits;
1864 	unsigned long found_bits;
1865 	unsigned long start = 0;
1866 	unsigned long total_found = 0;
1867 	bool found = false;
1868 
1869 	i = offset_to_bit(entry->offset, block_group->sectorsize,
1870 			  max_t(u64, offset, entry->offset));
1871 	search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1872 	total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1873 
1874 again:
1875 	found_bits = 0;
1876 	for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1877 	     i < BITS_PER_BITMAP;
1878 	     i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1879 		next_zero = find_next_zero_bit(entry->bitmap,
1880 					       BITS_PER_BITMAP, i);
1881 		if (next_zero - i >= search_bits) {
1882 			found_bits = next_zero - i;
1883 			break;
1884 		}
1885 		i = next_zero;
1886 	}
1887 
1888 	if (!found_bits)
1889 		return -1;
1890 
1891 	if (!found) {
1892 		start = i;
1893 		found = true;
1894 	}
1895 
1896 	total_found += found_bits;
1897 
1898 	if (cluster->max_size < found_bits * block_group->sectorsize)
1899 		cluster->max_size = found_bits * block_group->sectorsize;
1900 
1901 	if (total_found < total_bits) {
1902 		i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1903 		if (i - start > total_bits * 2) {
1904 			total_found = 0;
1905 			cluster->max_size = 0;
1906 			found = false;
1907 		}
1908 		goto again;
1909 	}
1910 
1911 	cluster->window_start = start * block_group->sectorsize +
1912 		entry->offset;
1913 	cluster->points_to_bitmap = true;
1914 
1915 	return 0;
1916 }
1917 
1918 /*
1919  * here we try to find a cluster of blocks in a block group.  The goal
1920  * is to find at least bytes free and up to empty_size + bytes free.
1921  * We might not find them all in one contiguous area.
1922  *
1923  * returns zero and sets up cluster if things worked out, otherwise
1924  * it returns -enospc
1925  */
1926 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1927 			     struct btrfs_root *root,
1928 			     struct btrfs_block_group_cache *block_group,
1929 			     struct btrfs_free_cluster *cluster,
1930 			     u64 offset, u64 bytes, u64 empty_size)
1931 {
1932 	struct btrfs_free_space *entry = NULL;
1933 	struct rb_node *node;
1934 	struct btrfs_free_space *next;
1935 	struct btrfs_free_space *last = NULL;
1936 	u64 min_bytes;
1937 	u64 window_start;
1938 	u64 window_free;
1939 	u64 max_extent = 0;
1940 	bool found_bitmap = false;
1941 	int ret;
1942 
1943 	/* for metadata, allow allocates with more holes */
1944 	if (btrfs_test_opt(root, SSD_SPREAD)) {
1945 		min_bytes = bytes + empty_size;
1946 	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1947 		/*
1948 		 * we want to do larger allocations when we are
1949 		 * flushing out the delayed refs, it helps prevent
1950 		 * making more work as we go along.
1951 		 */
1952 		if (trans->transaction->delayed_refs.flushing)
1953 			min_bytes = max(bytes, (bytes + empty_size) >> 1);
1954 		else
1955 			min_bytes = max(bytes, (bytes + empty_size) >> 4);
1956 	} else
1957 		min_bytes = max(bytes, (bytes + empty_size) >> 2);
1958 
1959 	spin_lock(&block_group->tree_lock);
1960 	spin_lock(&cluster->lock);
1961 
1962 	/* someone already found a cluster, hooray */
1963 	if (cluster->block_group) {
1964 		ret = 0;
1965 		goto out;
1966 	}
1967 again:
1968 	entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1969 	if (!entry) {
1970 		ret = -ENOSPC;
1971 		goto out;
1972 	}
1973 
1974 	/*
1975 	 * If found_bitmap is true, we exhausted our search for extent entries,
1976 	 * and we just want to search all of the bitmaps that we can find, and
1977 	 * ignore any extent entries we find.
1978 	 */
1979 	while (entry->bitmap || found_bitmap ||
1980 	       (!entry->bitmap && entry->bytes < min_bytes)) {
1981 		struct rb_node *node = rb_next(&entry->offset_index);
1982 
1983 		if (entry->bitmap && entry->bytes > bytes + empty_size) {
1984 			ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1985 						   offset, bytes + empty_size,
1986 						   min_bytes);
1987 			if (!ret)
1988 				goto got_it;
1989 		}
1990 
1991 		if (!node) {
1992 			ret = -ENOSPC;
1993 			goto out;
1994 		}
1995 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1996 	}
1997 
1998 	/*
1999 	 * We already searched all the extent entries from the passed in offset
2000 	 * to the end and didn't find enough space for the cluster, and we also
2001 	 * didn't find any bitmaps that met our criteria, just go ahead and exit
2002 	 */
2003 	if (found_bitmap) {
2004 		ret = -ENOSPC;
2005 		goto out;
2006 	}
2007 
2008 	cluster->points_to_bitmap = false;
2009 	window_start = entry->offset;
2010 	window_free = entry->bytes;
2011 	last = entry;
2012 	max_extent = entry->bytes;
2013 
2014 	while (1) {
2015 		/* out window is just right, lets fill it */
2016 		if (window_free >= bytes + empty_size)
2017 			break;
2018 
2019 		node = rb_next(&last->offset_index);
2020 		if (!node) {
2021 			if (found_bitmap)
2022 				goto again;
2023 			ret = -ENOSPC;
2024 			goto out;
2025 		}
2026 		next = rb_entry(node, struct btrfs_free_space, offset_index);
2027 
2028 		/*
2029 		 * we found a bitmap, so if this search doesn't result in a
2030 		 * cluster, we know to go and search again for the bitmaps and
2031 		 * start looking for space there
2032 		 */
2033 		if (next->bitmap) {
2034 			if (!found_bitmap)
2035 				offset = next->offset;
2036 			found_bitmap = true;
2037 			last = next;
2038 			continue;
2039 		}
2040 
2041 		/*
2042 		 * we haven't filled the empty size and the window is
2043 		 * very large.  reset and try again
2044 		 */
2045 		if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
2046 		    next->offset - window_start > (bytes + empty_size) * 2) {
2047 			entry = next;
2048 			window_start = entry->offset;
2049 			window_free = entry->bytes;
2050 			last = entry;
2051 			max_extent = entry->bytes;
2052 		} else {
2053 			last = next;
2054 			window_free += next->bytes;
2055 			if (entry->bytes > max_extent)
2056 				max_extent = entry->bytes;
2057 		}
2058 	}
2059 
2060 	cluster->window_start = entry->offset;
2061 
2062 	/*
2063 	 * now we've found our entries, pull them out of the free space
2064 	 * cache and put them into the cluster rbtree
2065 	 *
2066 	 * The cluster includes an rbtree, but only uses the offset index
2067 	 * of each free space cache entry.
2068 	 */
2069 	while (1) {
2070 		node = rb_next(&entry->offset_index);
2071 		if (entry->bitmap && node) {
2072 			entry = rb_entry(node, struct btrfs_free_space,
2073 					 offset_index);
2074 			continue;
2075 		} else if (entry->bitmap && !node) {
2076 			break;
2077 		}
2078 
2079 		rb_erase(&entry->offset_index, &block_group->free_space_offset);
2080 		ret = tree_insert_offset(&cluster->root, entry->offset,
2081 					 &entry->offset_index, 0);
2082 		BUG_ON(ret);
2083 
2084 		if (!node || entry == last)
2085 			break;
2086 
2087 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2088 	}
2089 
2090 	cluster->max_size = max_extent;
2091 got_it:
2092 	ret = 0;
2093 	atomic_inc(&block_group->count);
2094 	list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
2095 	cluster->block_group = block_group;
2096 out:
2097 	spin_unlock(&cluster->lock);
2098 	spin_unlock(&block_group->tree_lock);
2099 
2100 	return ret;
2101 }
2102 
2103 /*
2104  * simple code to zero out a cluster
2105  */
2106 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2107 {
2108 	spin_lock_init(&cluster->lock);
2109 	spin_lock_init(&cluster->refill_lock);
2110 	cluster->root = RB_ROOT;
2111 	cluster->max_size = 0;
2112 	cluster->points_to_bitmap = false;
2113 	INIT_LIST_HEAD(&cluster->block_group_list);
2114 	cluster->block_group = NULL;
2115 }
2116 
2117