xref: /linux/fs/btrfs/free-space-cache.c (revision 9e8ba5f3ec35cba4fd8a8bebda548c4db2651e40)
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30 
31 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
33 
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35 			   struct btrfs_free_space *info);
36 
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 					       struct btrfs_path *path,
39 					       u64 offset)
40 {
41 	struct btrfs_key key;
42 	struct btrfs_key location;
43 	struct btrfs_disk_key disk_key;
44 	struct btrfs_free_space_header *header;
45 	struct extent_buffer *leaf;
46 	struct inode *inode = NULL;
47 	int ret;
48 
49 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50 	key.offset = offset;
51 	key.type = 0;
52 
53 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 	if (ret < 0)
55 		return ERR_PTR(ret);
56 	if (ret > 0) {
57 		btrfs_release_path(path);
58 		return ERR_PTR(-ENOENT);
59 	}
60 
61 	leaf = path->nodes[0];
62 	header = btrfs_item_ptr(leaf, path->slots[0],
63 				struct btrfs_free_space_header);
64 	btrfs_free_space_key(leaf, header, &disk_key);
65 	btrfs_disk_key_to_cpu(&location, &disk_key);
66 	btrfs_release_path(path);
67 
68 	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 	if (!inode)
70 		return ERR_PTR(-ENOENT);
71 	if (IS_ERR(inode))
72 		return inode;
73 	if (is_bad_inode(inode)) {
74 		iput(inode);
75 		return ERR_PTR(-ENOENT);
76 	}
77 
78 	inode->i_mapping->flags &= ~__GFP_FS;
79 
80 	return inode;
81 }
82 
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 				      struct btrfs_block_group_cache
85 				      *block_group, struct btrfs_path *path)
86 {
87 	struct inode *inode = NULL;
88 	u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89 
90 	spin_lock(&block_group->lock);
91 	if (block_group->inode)
92 		inode = igrab(block_group->inode);
93 	spin_unlock(&block_group->lock);
94 	if (inode)
95 		return inode;
96 
97 	inode = __lookup_free_space_inode(root, path,
98 					  block_group->key.objectid);
99 	if (IS_ERR(inode))
100 		return inode;
101 
102 	spin_lock(&block_group->lock);
103 	if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104 		printk(KERN_INFO "Old style space inode found, converting.\n");
105 		BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106 			BTRFS_INODE_NODATACOW;
107 		block_group->disk_cache_state = BTRFS_DC_CLEAR;
108 	}
109 
110 	if (!block_group->iref) {
111 		block_group->inode = igrab(inode);
112 		block_group->iref = 1;
113 	}
114 	spin_unlock(&block_group->lock);
115 
116 	return inode;
117 }
118 
119 int __create_free_space_inode(struct btrfs_root *root,
120 			      struct btrfs_trans_handle *trans,
121 			      struct btrfs_path *path, u64 ino, u64 offset)
122 {
123 	struct btrfs_key key;
124 	struct btrfs_disk_key disk_key;
125 	struct btrfs_free_space_header *header;
126 	struct btrfs_inode_item *inode_item;
127 	struct extent_buffer *leaf;
128 	u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129 	int ret;
130 
131 	ret = btrfs_insert_empty_inode(trans, root, path, ino);
132 	if (ret)
133 		return ret;
134 
135 	/* We inline crc's for the free disk space cache */
136 	if (ino != BTRFS_FREE_INO_OBJECTID)
137 		flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138 
139 	leaf = path->nodes[0];
140 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
141 				    struct btrfs_inode_item);
142 	btrfs_item_key(leaf, &disk_key, path->slots[0]);
143 	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144 			     sizeof(*inode_item));
145 	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146 	btrfs_set_inode_size(leaf, inode_item, 0);
147 	btrfs_set_inode_nbytes(leaf, inode_item, 0);
148 	btrfs_set_inode_uid(leaf, inode_item, 0);
149 	btrfs_set_inode_gid(leaf, inode_item, 0);
150 	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151 	btrfs_set_inode_flags(leaf, inode_item, flags);
152 	btrfs_set_inode_nlink(leaf, inode_item, 1);
153 	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154 	btrfs_set_inode_block_group(leaf, inode_item, offset);
155 	btrfs_mark_buffer_dirty(leaf);
156 	btrfs_release_path(path);
157 
158 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159 	key.offset = offset;
160 	key.type = 0;
161 
162 	ret = btrfs_insert_empty_item(trans, root, path, &key,
163 				      sizeof(struct btrfs_free_space_header));
164 	if (ret < 0) {
165 		btrfs_release_path(path);
166 		return ret;
167 	}
168 	leaf = path->nodes[0];
169 	header = btrfs_item_ptr(leaf, path->slots[0],
170 				struct btrfs_free_space_header);
171 	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172 	btrfs_set_free_space_key(leaf, header, &disk_key);
173 	btrfs_mark_buffer_dirty(leaf);
174 	btrfs_release_path(path);
175 
176 	return 0;
177 }
178 
179 int create_free_space_inode(struct btrfs_root *root,
180 			    struct btrfs_trans_handle *trans,
181 			    struct btrfs_block_group_cache *block_group,
182 			    struct btrfs_path *path)
183 {
184 	int ret;
185 	u64 ino;
186 
187 	ret = btrfs_find_free_objectid(root, &ino);
188 	if (ret < 0)
189 		return ret;
190 
191 	return __create_free_space_inode(root, trans, path, ino,
192 					 block_group->key.objectid);
193 }
194 
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196 				    struct btrfs_trans_handle *trans,
197 				    struct btrfs_path *path,
198 				    struct inode *inode)
199 {
200 	struct btrfs_block_rsv *rsv;
201 	u64 needed_bytes;
202 	loff_t oldsize;
203 	int ret = 0;
204 
205 	rsv = trans->block_rsv;
206 	trans->block_rsv = &root->fs_info->global_block_rsv;
207 
208 	/* 1 for slack space, 1 for updating the inode */
209 	needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210 		btrfs_calc_trans_metadata_size(root, 1);
211 
212 	spin_lock(&trans->block_rsv->lock);
213 	if (trans->block_rsv->reserved < needed_bytes) {
214 		spin_unlock(&trans->block_rsv->lock);
215 		trans->block_rsv = rsv;
216 		return -ENOSPC;
217 	}
218 	spin_unlock(&trans->block_rsv->lock);
219 
220 	oldsize = i_size_read(inode);
221 	btrfs_i_size_write(inode, 0);
222 	truncate_pagecache(inode, oldsize, 0);
223 
224 	/*
225 	 * We don't need an orphan item because truncating the free space cache
226 	 * will never be split across transactions.
227 	 */
228 	ret = btrfs_truncate_inode_items(trans, root, inode,
229 					 0, BTRFS_EXTENT_DATA_KEY);
230 
231 	if (ret) {
232 		trans->block_rsv = rsv;
233 		WARN_ON(1);
234 		return ret;
235 	}
236 
237 	ret = btrfs_update_inode(trans, root, inode);
238 	trans->block_rsv = rsv;
239 
240 	return ret;
241 }
242 
243 static int readahead_cache(struct inode *inode)
244 {
245 	struct file_ra_state *ra;
246 	unsigned long last_index;
247 
248 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
249 	if (!ra)
250 		return -ENOMEM;
251 
252 	file_ra_state_init(ra, inode->i_mapping);
253 	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254 
255 	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256 
257 	kfree(ra);
258 
259 	return 0;
260 }
261 
262 struct io_ctl {
263 	void *cur, *orig;
264 	struct page *page;
265 	struct page **pages;
266 	struct btrfs_root *root;
267 	unsigned long size;
268 	int index;
269 	int num_pages;
270 	unsigned check_crcs:1;
271 };
272 
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274 		       struct btrfs_root *root)
275 {
276 	memset(io_ctl, 0, sizeof(struct io_ctl));
277 	io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278 		PAGE_CACHE_SHIFT;
279 	io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280 				GFP_NOFS);
281 	if (!io_ctl->pages)
282 		return -ENOMEM;
283 	io_ctl->root = root;
284 	if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285 		io_ctl->check_crcs = 1;
286 	return 0;
287 }
288 
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291 	kfree(io_ctl->pages);
292 }
293 
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296 	if (io_ctl->cur) {
297 		kunmap(io_ctl->page);
298 		io_ctl->cur = NULL;
299 		io_ctl->orig = NULL;
300 	}
301 }
302 
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305 	WARN_ON(io_ctl->cur);
306 	BUG_ON(io_ctl->index >= io_ctl->num_pages);
307 	io_ctl->page = io_ctl->pages[io_ctl->index++];
308 	io_ctl->cur = kmap(io_ctl->page);
309 	io_ctl->orig = io_ctl->cur;
310 	io_ctl->size = PAGE_CACHE_SIZE;
311 	if (clear)
312 		memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314 
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317 	int i;
318 
319 	io_ctl_unmap_page(io_ctl);
320 
321 	for (i = 0; i < io_ctl->num_pages; i++) {
322 		ClearPageChecked(io_ctl->pages[i]);
323 		unlock_page(io_ctl->pages[i]);
324 		page_cache_release(io_ctl->pages[i]);
325 	}
326 }
327 
328 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
329 				int uptodate)
330 {
331 	struct page *page;
332 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
333 	int i;
334 
335 	for (i = 0; i < io_ctl->num_pages; i++) {
336 		page = find_or_create_page(inode->i_mapping, i, mask);
337 		if (!page) {
338 			io_ctl_drop_pages(io_ctl);
339 			return -ENOMEM;
340 		}
341 		io_ctl->pages[i] = page;
342 		if (uptodate && !PageUptodate(page)) {
343 			btrfs_readpage(NULL, page);
344 			lock_page(page);
345 			if (!PageUptodate(page)) {
346 				printk(KERN_ERR "btrfs: error reading free "
347 				       "space cache\n");
348 				io_ctl_drop_pages(io_ctl);
349 				return -EIO;
350 			}
351 		}
352 	}
353 
354 	for (i = 0; i < io_ctl->num_pages; i++) {
355 		clear_page_dirty_for_io(io_ctl->pages[i]);
356 		set_page_extent_mapped(io_ctl->pages[i]);
357 	}
358 
359 	return 0;
360 }
361 
362 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
363 {
364 	u64 *val;
365 
366 	io_ctl_map_page(io_ctl, 1);
367 
368 	/*
369 	 * Skip the csum areas.  If we don't check crcs then we just have a
370 	 * 64bit chunk at the front of the first page.
371 	 */
372 	if (io_ctl->check_crcs) {
373 		io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
374 		io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
375 	} else {
376 		io_ctl->cur += sizeof(u64);
377 		io_ctl->size -= sizeof(u64) * 2;
378 	}
379 
380 	val = io_ctl->cur;
381 	*val = cpu_to_le64(generation);
382 	io_ctl->cur += sizeof(u64);
383 }
384 
385 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
386 {
387 	u64 *gen;
388 
389 	/*
390 	 * Skip the crc area.  If we don't check crcs then we just have a 64bit
391 	 * chunk at the front of the first page.
392 	 */
393 	if (io_ctl->check_crcs) {
394 		io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
395 		io_ctl->size -= sizeof(u64) +
396 			(sizeof(u32) * io_ctl->num_pages);
397 	} else {
398 		io_ctl->cur += sizeof(u64);
399 		io_ctl->size -= sizeof(u64) * 2;
400 	}
401 
402 	gen = io_ctl->cur;
403 	if (le64_to_cpu(*gen) != generation) {
404 		printk_ratelimited(KERN_ERR "btrfs: space cache generation "
405 				   "(%Lu) does not match inode (%Lu)\n", *gen,
406 				   generation);
407 		io_ctl_unmap_page(io_ctl);
408 		return -EIO;
409 	}
410 	io_ctl->cur += sizeof(u64);
411 	return 0;
412 }
413 
414 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
415 {
416 	u32 *tmp;
417 	u32 crc = ~(u32)0;
418 	unsigned offset = 0;
419 
420 	if (!io_ctl->check_crcs) {
421 		io_ctl_unmap_page(io_ctl);
422 		return;
423 	}
424 
425 	if (index == 0)
426 		offset = sizeof(u32) * io_ctl->num_pages;;
427 
428 	crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
429 			      PAGE_CACHE_SIZE - offset);
430 	btrfs_csum_final(crc, (char *)&crc);
431 	io_ctl_unmap_page(io_ctl);
432 	tmp = kmap(io_ctl->pages[0]);
433 	tmp += index;
434 	*tmp = crc;
435 	kunmap(io_ctl->pages[0]);
436 }
437 
438 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
439 {
440 	u32 *tmp, val;
441 	u32 crc = ~(u32)0;
442 	unsigned offset = 0;
443 
444 	if (!io_ctl->check_crcs) {
445 		io_ctl_map_page(io_ctl, 0);
446 		return 0;
447 	}
448 
449 	if (index == 0)
450 		offset = sizeof(u32) * io_ctl->num_pages;
451 
452 	tmp = kmap(io_ctl->pages[0]);
453 	tmp += index;
454 	val = *tmp;
455 	kunmap(io_ctl->pages[0]);
456 
457 	io_ctl_map_page(io_ctl, 0);
458 	crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
459 			      PAGE_CACHE_SIZE - offset);
460 	btrfs_csum_final(crc, (char *)&crc);
461 	if (val != crc) {
462 		printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
463 				   "space cache\n");
464 		io_ctl_unmap_page(io_ctl);
465 		return -EIO;
466 	}
467 
468 	return 0;
469 }
470 
471 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
472 			    void *bitmap)
473 {
474 	struct btrfs_free_space_entry *entry;
475 
476 	if (!io_ctl->cur)
477 		return -ENOSPC;
478 
479 	entry = io_ctl->cur;
480 	entry->offset = cpu_to_le64(offset);
481 	entry->bytes = cpu_to_le64(bytes);
482 	entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
483 		BTRFS_FREE_SPACE_EXTENT;
484 	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
485 	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
486 
487 	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
488 		return 0;
489 
490 	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
491 
492 	/* No more pages to map */
493 	if (io_ctl->index >= io_ctl->num_pages)
494 		return 0;
495 
496 	/* map the next page */
497 	io_ctl_map_page(io_ctl, 1);
498 	return 0;
499 }
500 
501 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
502 {
503 	if (!io_ctl->cur)
504 		return -ENOSPC;
505 
506 	/*
507 	 * If we aren't at the start of the current page, unmap this one and
508 	 * map the next one if there is any left.
509 	 */
510 	if (io_ctl->cur != io_ctl->orig) {
511 		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
512 		if (io_ctl->index >= io_ctl->num_pages)
513 			return -ENOSPC;
514 		io_ctl_map_page(io_ctl, 0);
515 	}
516 
517 	memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
518 	io_ctl_set_crc(io_ctl, io_ctl->index - 1);
519 	if (io_ctl->index < io_ctl->num_pages)
520 		io_ctl_map_page(io_ctl, 0);
521 	return 0;
522 }
523 
524 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
525 {
526 	/*
527 	 * If we're not on the boundary we know we've modified the page and we
528 	 * need to crc the page.
529 	 */
530 	if (io_ctl->cur != io_ctl->orig)
531 		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
532 	else
533 		io_ctl_unmap_page(io_ctl);
534 
535 	while (io_ctl->index < io_ctl->num_pages) {
536 		io_ctl_map_page(io_ctl, 1);
537 		io_ctl_set_crc(io_ctl, io_ctl->index - 1);
538 	}
539 }
540 
541 static int io_ctl_read_entry(struct io_ctl *io_ctl,
542 			    struct btrfs_free_space *entry, u8 *type)
543 {
544 	struct btrfs_free_space_entry *e;
545 	int ret;
546 
547 	if (!io_ctl->cur) {
548 		ret = io_ctl_check_crc(io_ctl, io_ctl->index);
549 		if (ret)
550 			return ret;
551 	}
552 
553 	e = io_ctl->cur;
554 	entry->offset = le64_to_cpu(e->offset);
555 	entry->bytes = le64_to_cpu(e->bytes);
556 	*type = e->type;
557 	io_ctl->cur += sizeof(struct btrfs_free_space_entry);
558 	io_ctl->size -= sizeof(struct btrfs_free_space_entry);
559 
560 	if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
561 		return 0;
562 
563 	io_ctl_unmap_page(io_ctl);
564 
565 	return 0;
566 }
567 
568 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
569 			      struct btrfs_free_space *entry)
570 {
571 	int ret;
572 
573 	ret = io_ctl_check_crc(io_ctl, io_ctl->index);
574 	if (ret)
575 		return ret;
576 
577 	memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
578 	io_ctl_unmap_page(io_ctl);
579 
580 	return 0;
581 }
582 
583 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
584 			    struct btrfs_free_space_ctl *ctl,
585 			    struct btrfs_path *path, u64 offset)
586 {
587 	struct btrfs_free_space_header *header;
588 	struct extent_buffer *leaf;
589 	struct io_ctl io_ctl;
590 	struct btrfs_key key;
591 	struct btrfs_free_space *e, *n;
592 	struct list_head bitmaps;
593 	u64 num_entries;
594 	u64 num_bitmaps;
595 	u64 generation;
596 	u8 type;
597 	int ret = 0;
598 
599 	INIT_LIST_HEAD(&bitmaps);
600 
601 	/* Nothing in the space cache, goodbye */
602 	if (!i_size_read(inode))
603 		return 0;
604 
605 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
606 	key.offset = offset;
607 	key.type = 0;
608 
609 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
610 	if (ret < 0)
611 		return 0;
612 	else if (ret > 0) {
613 		btrfs_release_path(path);
614 		return 0;
615 	}
616 
617 	ret = -1;
618 
619 	leaf = path->nodes[0];
620 	header = btrfs_item_ptr(leaf, path->slots[0],
621 				struct btrfs_free_space_header);
622 	num_entries = btrfs_free_space_entries(leaf, header);
623 	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
624 	generation = btrfs_free_space_generation(leaf, header);
625 	btrfs_release_path(path);
626 
627 	if (BTRFS_I(inode)->generation != generation) {
628 		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
629 		       " not match free space cache generation (%llu)\n",
630 		       (unsigned long long)BTRFS_I(inode)->generation,
631 		       (unsigned long long)generation);
632 		return 0;
633 	}
634 
635 	if (!num_entries)
636 		return 0;
637 
638 	io_ctl_init(&io_ctl, inode, root);
639 	ret = readahead_cache(inode);
640 	if (ret)
641 		goto out;
642 
643 	ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
644 	if (ret)
645 		goto out;
646 
647 	ret = io_ctl_check_crc(&io_ctl, 0);
648 	if (ret)
649 		goto free_cache;
650 
651 	ret = io_ctl_check_generation(&io_ctl, generation);
652 	if (ret)
653 		goto free_cache;
654 
655 	while (num_entries) {
656 		e = kmem_cache_zalloc(btrfs_free_space_cachep,
657 				      GFP_NOFS);
658 		if (!e)
659 			goto free_cache;
660 
661 		ret = io_ctl_read_entry(&io_ctl, e, &type);
662 		if (ret) {
663 			kmem_cache_free(btrfs_free_space_cachep, e);
664 			goto free_cache;
665 		}
666 
667 		if (!e->bytes) {
668 			kmem_cache_free(btrfs_free_space_cachep, e);
669 			goto free_cache;
670 		}
671 
672 		if (type == BTRFS_FREE_SPACE_EXTENT) {
673 			spin_lock(&ctl->tree_lock);
674 			ret = link_free_space(ctl, e);
675 			spin_unlock(&ctl->tree_lock);
676 			if (ret) {
677 				printk(KERN_ERR "Duplicate entries in "
678 				       "free space cache, dumping\n");
679 				kmem_cache_free(btrfs_free_space_cachep, e);
680 				goto free_cache;
681 			}
682 		} else {
683 			BUG_ON(!num_bitmaps);
684 			num_bitmaps--;
685 			e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
686 			if (!e->bitmap) {
687 				kmem_cache_free(
688 					btrfs_free_space_cachep, e);
689 				goto free_cache;
690 			}
691 			spin_lock(&ctl->tree_lock);
692 			ret = link_free_space(ctl, e);
693 			ctl->total_bitmaps++;
694 			ctl->op->recalc_thresholds(ctl);
695 			spin_unlock(&ctl->tree_lock);
696 			if (ret) {
697 				printk(KERN_ERR "Duplicate entries in "
698 				       "free space cache, dumping\n");
699 				kmem_cache_free(btrfs_free_space_cachep, e);
700 				goto free_cache;
701 			}
702 			list_add_tail(&e->list, &bitmaps);
703 		}
704 
705 		num_entries--;
706 	}
707 
708 	io_ctl_unmap_page(&io_ctl);
709 
710 	/*
711 	 * We add the bitmaps at the end of the entries in order that
712 	 * the bitmap entries are added to the cache.
713 	 */
714 	list_for_each_entry_safe(e, n, &bitmaps, list) {
715 		list_del_init(&e->list);
716 		ret = io_ctl_read_bitmap(&io_ctl, e);
717 		if (ret)
718 			goto free_cache;
719 	}
720 
721 	io_ctl_drop_pages(&io_ctl);
722 	ret = 1;
723 out:
724 	io_ctl_free(&io_ctl);
725 	return ret;
726 free_cache:
727 	io_ctl_drop_pages(&io_ctl);
728 	__btrfs_remove_free_space_cache(ctl);
729 	goto out;
730 }
731 
732 int load_free_space_cache(struct btrfs_fs_info *fs_info,
733 			  struct btrfs_block_group_cache *block_group)
734 {
735 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
736 	struct btrfs_root *root = fs_info->tree_root;
737 	struct inode *inode;
738 	struct btrfs_path *path;
739 	int ret = 0;
740 	bool matched;
741 	u64 used = btrfs_block_group_used(&block_group->item);
742 
743 	/*
744 	 * If we're unmounting then just return, since this does a search on the
745 	 * normal root and not the commit root and we could deadlock.
746 	 */
747 	if (btrfs_fs_closing(fs_info))
748 		return 0;
749 
750 	/*
751 	 * If this block group has been marked to be cleared for one reason or
752 	 * another then we can't trust the on disk cache, so just return.
753 	 */
754 	spin_lock(&block_group->lock);
755 	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
756 		spin_unlock(&block_group->lock);
757 		return 0;
758 	}
759 	spin_unlock(&block_group->lock);
760 
761 	path = btrfs_alloc_path();
762 	if (!path)
763 		return 0;
764 
765 	inode = lookup_free_space_inode(root, block_group, path);
766 	if (IS_ERR(inode)) {
767 		btrfs_free_path(path);
768 		return 0;
769 	}
770 
771 	/* We may have converted the inode and made the cache invalid. */
772 	spin_lock(&block_group->lock);
773 	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
774 		spin_unlock(&block_group->lock);
775 		goto out;
776 	}
777 	spin_unlock(&block_group->lock);
778 
779 	ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
780 				      path, block_group->key.objectid);
781 	btrfs_free_path(path);
782 	if (ret <= 0)
783 		goto out;
784 
785 	spin_lock(&ctl->tree_lock);
786 	matched = (ctl->free_space == (block_group->key.offset - used -
787 				       block_group->bytes_super));
788 	spin_unlock(&ctl->tree_lock);
789 
790 	if (!matched) {
791 		__btrfs_remove_free_space_cache(ctl);
792 		printk(KERN_ERR "block group %llu has an wrong amount of free "
793 		       "space\n", block_group->key.objectid);
794 		ret = -1;
795 	}
796 out:
797 	if (ret < 0) {
798 		/* This cache is bogus, make sure it gets cleared */
799 		spin_lock(&block_group->lock);
800 		block_group->disk_cache_state = BTRFS_DC_CLEAR;
801 		spin_unlock(&block_group->lock);
802 		ret = 0;
803 
804 		printk(KERN_ERR "btrfs: failed to load free space cache "
805 		       "for block group %llu\n", block_group->key.objectid);
806 	}
807 
808 	iput(inode);
809 	return ret;
810 }
811 
812 /**
813  * __btrfs_write_out_cache - write out cached info to an inode
814  * @root - the root the inode belongs to
815  * @ctl - the free space cache we are going to write out
816  * @block_group - the block_group for this cache if it belongs to a block_group
817  * @trans - the trans handle
818  * @path - the path to use
819  * @offset - the offset for the key we'll insert
820  *
821  * This function writes out a free space cache struct to disk for quick recovery
822  * on mount.  This will return 0 if it was successfull in writing the cache out,
823  * and -1 if it was not.
824  */
825 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
826 			    struct btrfs_free_space_ctl *ctl,
827 			    struct btrfs_block_group_cache *block_group,
828 			    struct btrfs_trans_handle *trans,
829 			    struct btrfs_path *path, u64 offset)
830 {
831 	struct btrfs_free_space_header *header;
832 	struct extent_buffer *leaf;
833 	struct rb_node *node;
834 	struct list_head *pos, *n;
835 	struct extent_state *cached_state = NULL;
836 	struct btrfs_free_cluster *cluster = NULL;
837 	struct extent_io_tree *unpin = NULL;
838 	struct io_ctl io_ctl;
839 	struct list_head bitmap_list;
840 	struct btrfs_key key;
841 	u64 start, end, len;
842 	int entries = 0;
843 	int bitmaps = 0;
844 	int ret;
845 	int err = -1;
846 
847 	INIT_LIST_HEAD(&bitmap_list);
848 
849 	if (!i_size_read(inode))
850 		return -1;
851 
852 	io_ctl_init(&io_ctl, inode, root);
853 
854 	/* Get the cluster for this block_group if it exists */
855 	if (block_group && !list_empty(&block_group->cluster_list))
856 		cluster = list_entry(block_group->cluster_list.next,
857 				     struct btrfs_free_cluster,
858 				     block_group_list);
859 
860 	/*
861 	 * We shouldn't have switched the pinned extents yet so this is the
862 	 * right one
863 	 */
864 	unpin = root->fs_info->pinned_extents;
865 
866 	/* Lock all pages first so we can lock the extent safely. */
867 	io_ctl_prepare_pages(&io_ctl, inode, 0);
868 
869 	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
870 			 0, &cached_state, GFP_NOFS);
871 
872 	/*
873 	 * When searching for pinned extents, we need to start at our start
874 	 * offset.
875 	 */
876 	if (block_group)
877 		start = block_group->key.objectid;
878 
879 	node = rb_first(&ctl->free_space_offset);
880 	if (!node && cluster) {
881 		node = rb_first(&cluster->root);
882 		cluster = NULL;
883 	}
884 
885 	/* Make sure we can fit our crcs into the first page */
886 	if (io_ctl.check_crcs &&
887 	    (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
888 		WARN_ON(1);
889 		goto out_nospc;
890 	}
891 
892 	io_ctl_set_generation(&io_ctl, trans->transid);
893 
894 	/* Write out the extent entries */
895 	while (node) {
896 		struct btrfs_free_space *e;
897 
898 		e = rb_entry(node, struct btrfs_free_space, offset_index);
899 		entries++;
900 
901 		ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
902 				       e->bitmap);
903 		if (ret)
904 			goto out_nospc;
905 
906 		if (e->bitmap) {
907 			list_add_tail(&e->list, &bitmap_list);
908 			bitmaps++;
909 		}
910 		node = rb_next(node);
911 		if (!node && cluster) {
912 			node = rb_first(&cluster->root);
913 			cluster = NULL;
914 		}
915 	}
916 
917 	/*
918 	 * We want to add any pinned extents to our free space cache
919 	 * so we don't leak the space
920 	 */
921 	while (block_group && (start < block_group->key.objectid +
922 			       block_group->key.offset)) {
923 		ret = find_first_extent_bit(unpin, start, &start, &end,
924 					    EXTENT_DIRTY);
925 		if (ret) {
926 			ret = 0;
927 			break;
928 		}
929 
930 		/* This pinned extent is out of our range */
931 		if (start >= block_group->key.objectid +
932 		    block_group->key.offset)
933 			break;
934 
935 		len = block_group->key.objectid +
936 			block_group->key.offset - start;
937 		len = min(len, end + 1 - start);
938 
939 		entries++;
940 		ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
941 		if (ret)
942 			goto out_nospc;
943 
944 		start = end + 1;
945 	}
946 
947 	/* Write out the bitmaps */
948 	list_for_each_safe(pos, n, &bitmap_list) {
949 		struct btrfs_free_space *entry =
950 			list_entry(pos, struct btrfs_free_space, list);
951 
952 		ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
953 		if (ret)
954 			goto out_nospc;
955 		list_del_init(&entry->list);
956 	}
957 
958 	/* Zero out the rest of the pages just to make sure */
959 	io_ctl_zero_remaining_pages(&io_ctl);
960 
961 	ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
962 				0, i_size_read(inode), &cached_state);
963 	io_ctl_drop_pages(&io_ctl);
964 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
965 			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
966 
967 	if (ret)
968 		goto out;
969 
970 
971 	ret = filemap_write_and_wait(inode->i_mapping);
972 	if (ret)
973 		goto out;
974 
975 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
976 	key.offset = offset;
977 	key.type = 0;
978 
979 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
980 	if (ret < 0) {
981 		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
982 				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
983 				 GFP_NOFS);
984 		goto out;
985 	}
986 	leaf = path->nodes[0];
987 	if (ret > 0) {
988 		struct btrfs_key found_key;
989 		BUG_ON(!path->slots[0]);
990 		path->slots[0]--;
991 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
992 		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
993 		    found_key.offset != offset) {
994 			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
995 					 inode->i_size - 1,
996 					 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
997 					 NULL, GFP_NOFS);
998 			btrfs_release_path(path);
999 			goto out;
1000 		}
1001 	}
1002 
1003 	BTRFS_I(inode)->generation = trans->transid;
1004 	header = btrfs_item_ptr(leaf, path->slots[0],
1005 				struct btrfs_free_space_header);
1006 	btrfs_set_free_space_entries(leaf, header, entries);
1007 	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1008 	btrfs_set_free_space_generation(leaf, header, trans->transid);
1009 	btrfs_mark_buffer_dirty(leaf);
1010 	btrfs_release_path(path);
1011 
1012 	err = 0;
1013 out:
1014 	io_ctl_free(&io_ctl);
1015 	if (err) {
1016 		invalidate_inode_pages2(inode->i_mapping);
1017 		BTRFS_I(inode)->generation = 0;
1018 	}
1019 	btrfs_update_inode(trans, root, inode);
1020 	return err;
1021 
1022 out_nospc:
1023 	list_for_each_safe(pos, n, &bitmap_list) {
1024 		struct btrfs_free_space *entry =
1025 			list_entry(pos, struct btrfs_free_space, list);
1026 		list_del_init(&entry->list);
1027 	}
1028 	io_ctl_drop_pages(&io_ctl);
1029 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1030 			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1031 	goto out;
1032 }
1033 
1034 int btrfs_write_out_cache(struct btrfs_root *root,
1035 			  struct btrfs_trans_handle *trans,
1036 			  struct btrfs_block_group_cache *block_group,
1037 			  struct btrfs_path *path)
1038 {
1039 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1040 	struct inode *inode;
1041 	int ret = 0;
1042 
1043 	root = root->fs_info->tree_root;
1044 
1045 	spin_lock(&block_group->lock);
1046 	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1047 		spin_unlock(&block_group->lock);
1048 		return 0;
1049 	}
1050 	spin_unlock(&block_group->lock);
1051 
1052 	inode = lookup_free_space_inode(root, block_group, path);
1053 	if (IS_ERR(inode))
1054 		return 0;
1055 
1056 	ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1057 				      path, block_group->key.objectid);
1058 	if (ret) {
1059 		spin_lock(&block_group->lock);
1060 		block_group->disk_cache_state = BTRFS_DC_ERROR;
1061 		spin_unlock(&block_group->lock);
1062 		ret = 0;
1063 #ifdef DEBUG
1064 		printk(KERN_ERR "btrfs: failed to write free space cace "
1065 		       "for block group %llu\n", block_group->key.objectid);
1066 #endif
1067 	}
1068 
1069 	iput(inode);
1070 	return ret;
1071 }
1072 
1073 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1074 					  u64 offset)
1075 {
1076 	BUG_ON(offset < bitmap_start);
1077 	offset -= bitmap_start;
1078 	return (unsigned long)(div_u64(offset, unit));
1079 }
1080 
1081 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1082 {
1083 	return (unsigned long)(div_u64(bytes, unit));
1084 }
1085 
1086 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1087 				   u64 offset)
1088 {
1089 	u64 bitmap_start;
1090 	u64 bytes_per_bitmap;
1091 
1092 	bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1093 	bitmap_start = offset - ctl->start;
1094 	bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1095 	bitmap_start *= bytes_per_bitmap;
1096 	bitmap_start += ctl->start;
1097 
1098 	return bitmap_start;
1099 }
1100 
1101 static int tree_insert_offset(struct rb_root *root, u64 offset,
1102 			      struct rb_node *node, int bitmap)
1103 {
1104 	struct rb_node **p = &root->rb_node;
1105 	struct rb_node *parent = NULL;
1106 	struct btrfs_free_space *info;
1107 
1108 	while (*p) {
1109 		parent = *p;
1110 		info = rb_entry(parent, struct btrfs_free_space, offset_index);
1111 
1112 		if (offset < info->offset) {
1113 			p = &(*p)->rb_left;
1114 		} else if (offset > info->offset) {
1115 			p = &(*p)->rb_right;
1116 		} else {
1117 			/*
1118 			 * we could have a bitmap entry and an extent entry
1119 			 * share the same offset.  If this is the case, we want
1120 			 * the extent entry to always be found first if we do a
1121 			 * linear search through the tree, since we want to have
1122 			 * the quickest allocation time, and allocating from an
1123 			 * extent is faster than allocating from a bitmap.  So
1124 			 * if we're inserting a bitmap and we find an entry at
1125 			 * this offset, we want to go right, or after this entry
1126 			 * logically.  If we are inserting an extent and we've
1127 			 * found a bitmap, we want to go left, or before
1128 			 * logically.
1129 			 */
1130 			if (bitmap) {
1131 				if (info->bitmap) {
1132 					WARN_ON_ONCE(1);
1133 					return -EEXIST;
1134 				}
1135 				p = &(*p)->rb_right;
1136 			} else {
1137 				if (!info->bitmap) {
1138 					WARN_ON_ONCE(1);
1139 					return -EEXIST;
1140 				}
1141 				p = &(*p)->rb_left;
1142 			}
1143 		}
1144 	}
1145 
1146 	rb_link_node(node, parent, p);
1147 	rb_insert_color(node, root);
1148 
1149 	return 0;
1150 }
1151 
1152 /*
1153  * searches the tree for the given offset.
1154  *
1155  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1156  * want a section that has at least bytes size and comes at or after the given
1157  * offset.
1158  */
1159 static struct btrfs_free_space *
1160 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1161 		   u64 offset, int bitmap_only, int fuzzy)
1162 {
1163 	struct rb_node *n = ctl->free_space_offset.rb_node;
1164 	struct btrfs_free_space *entry, *prev = NULL;
1165 
1166 	/* find entry that is closest to the 'offset' */
1167 	while (1) {
1168 		if (!n) {
1169 			entry = NULL;
1170 			break;
1171 		}
1172 
1173 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1174 		prev = entry;
1175 
1176 		if (offset < entry->offset)
1177 			n = n->rb_left;
1178 		else if (offset > entry->offset)
1179 			n = n->rb_right;
1180 		else
1181 			break;
1182 	}
1183 
1184 	if (bitmap_only) {
1185 		if (!entry)
1186 			return NULL;
1187 		if (entry->bitmap)
1188 			return entry;
1189 
1190 		/*
1191 		 * bitmap entry and extent entry may share same offset,
1192 		 * in that case, bitmap entry comes after extent entry.
1193 		 */
1194 		n = rb_next(n);
1195 		if (!n)
1196 			return NULL;
1197 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1198 		if (entry->offset != offset)
1199 			return NULL;
1200 
1201 		WARN_ON(!entry->bitmap);
1202 		return entry;
1203 	} else if (entry) {
1204 		if (entry->bitmap) {
1205 			/*
1206 			 * if previous extent entry covers the offset,
1207 			 * we should return it instead of the bitmap entry
1208 			 */
1209 			n = &entry->offset_index;
1210 			while (1) {
1211 				n = rb_prev(n);
1212 				if (!n)
1213 					break;
1214 				prev = rb_entry(n, struct btrfs_free_space,
1215 						offset_index);
1216 				if (!prev->bitmap) {
1217 					if (prev->offset + prev->bytes > offset)
1218 						entry = prev;
1219 					break;
1220 				}
1221 			}
1222 		}
1223 		return entry;
1224 	}
1225 
1226 	if (!prev)
1227 		return NULL;
1228 
1229 	/* find last entry before the 'offset' */
1230 	entry = prev;
1231 	if (entry->offset > offset) {
1232 		n = rb_prev(&entry->offset_index);
1233 		if (n) {
1234 			entry = rb_entry(n, struct btrfs_free_space,
1235 					offset_index);
1236 			BUG_ON(entry->offset > offset);
1237 		} else {
1238 			if (fuzzy)
1239 				return entry;
1240 			else
1241 				return NULL;
1242 		}
1243 	}
1244 
1245 	if (entry->bitmap) {
1246 		n = &entry->offset_index;
1247 		while (1) {
1248 			n = rb_prev(n);
1249 			if (!n)
1250 				break;
1251 			prev = rb_entry(n, struct btrfs_free_space,
1252 					offset_index);
1253 			if (!prev->bitmap) {
1254 				if (prev->offset + prev->bytes > offset)
1255 					return prev;
1256 				break;
1257 			}
1258 		}
1259 		if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1260 			return entry;
1261 	} else if (entry->offset + entry->bytes > offset)
1262 		return entry;
1263 
1264 	if (!fuzzy)
1265 		return NULL;
1266 
1267 	while (1) {
1268 		if (entry->bitmap) {
1269 			if (entry->offset + BITS_PER_BITMAP *
1270 			    ctl->unit > offset)
1271 				break;
1272 		} else {
1273 			if (entry->offset + entry->bytes > offset)
1274 				break;
1275 		}
1276 
1277 		n = rb_next(&entry->offset_index);
1278 		if (!n)
1279 			return NULL;
1280 		entry = rb_entry(n, struct btrfs_free_space, offset_index);
1281 	}
1282 	return entry;
1283 }
1284 
1285 static inline void
1286 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1287 		    struct btrfs_free_space *info)
1288 {
1289 	rb_erase(&info->offset_index, &ctl->free_space_offset);
1290 	ctl->free_extents--;
1291 }
1292 
1293 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1294 			      struct btrfs_free_space *info)
1295 {
1296 	__unlink_free_space(ctl, info);
1297 	ctl->free_space -= info->bytes;
1298 }
1299 
1300 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1301 			   struct btrfs_free_space *info)
1302 {
1303 	int ret = 0;
1304 
1305 	BUG_ON(!info->bitmap && !info->bytes);
1306 	ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1307 				 &info->offset_index, (info->bitmap != NULL));
1308 	if (ret)
1309 		return ret;
1310 
1311 	ctl->free_space += info->bytes;
1312 	ctl->free_extents++;
1313 	return ret;
1314 }
1315 
1316 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1317 {
1318 	struct btrfs_block_group_cache *block_group = ctl->private;
1319 	u64 max_bytes;
1320 	u64 bitmap_bytes;
1321 	u64 extent_bytes;
1322 	u64 size = block_group->key.offset;
1323 	u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1324 	int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1325 
1326 	BUG_ON(ctl->total_bitmaps > max_bitmaps);
1327 
1328 	/*
1329 	 * The goal is to keep the total amount of memory used per 1gb of space
1330 	 * at or below 32k, so we need to adjust how much memory we allow to be
1331 	 * used by extent based free space tracking
1332 	 */
1333 	if (size < 1024 * 1024 * 1024)
1334 		max_bytes = MAX_CACHE_BYTES_PER_GIG;
1335 	else
1336 		max_bytes = MAX_CACHE_BYTES_PER_GIG *
1337 			div64_u64(size, 1024 * 1024 * 1024);
1338 
1339 	/*
1340 	 * we want to account for 1 more bitmap than what we have so we can make
1341 	 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1342 	 * we add more bitmaps.
1343 	 */
1344 	bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1345 
1346 	if (bitmap_bytes >= max_bytes) {
1347 		ctl->extents_thresh = 0;
1348 		return;
1349 	}
1350 
1351 	/*
1352 	 * we want the extent entry threshold to always be at most 1/2 the maxw
1353 	 * bytes we can have, or whatever is less than that.
1354 	 */
1355 	extent_bytes = max_bytes - bitmap_bytes;
1356 	extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1357 
1358 	ctl->extents_thresh =
1359 		div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1360 }
1361 
1362 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1363 				       struct btrfs_free_space *info,
1364 				       u64 offset, u64 bytes)
1365 {
1366 	unsigned long start, count;
1367 
1368 	start = offset_to_bit(info->offset, ctl->unit, offset);
1369 	count = bytes_to_bits(bytes, ctl->unit);
1370 	BUG_ON(start + count > BITS_PER_BITMAP);
1371 
1372 	bitmap_clear(info->bitmap, start, count);
1373 
1374 	info->bytes -= bytes;
1375 }
1376 
1377 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1378 			      struct btrfs_free_space *info, u64 offset,
1379 			      u64 bytes)
1380 {
1381 	__bitmap_clear_bits(ctl, info, offset, bytes);
1382 	ctl->free_space -= bytes;
1383 }
1384 
1385 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1386 			    struct btrfs_free_space *info, u64 offset,
1387 			    u64 bytes)
1388 {
1389 	unsigned long start, count;
1390 
1391 	start = offset_to_bit(info->offset, ctl->unit, offset);
1392 	count = bytes_to_bits(bytes, ctl->unit);
1393 	BUG_ON(start + count > BITS_PER_BITMAP);
1394 
1395 	bitmap_set(info->bitmap, start, count);
1396 
1397 	info->bytes += bytes;
1398 	ctl->free_space += bytes;
1399 }
1400 
1401 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1402 			 struct btrfs_free_space *bitmap_info, u64 *offset,
1403 			 u64 *bytes)
1404 {
1405 	unsigned long found_bits = 0;
1406 	unsigned long bits, i;
1407 	unsigned long next_zero;
1408 
1409 	i = offset_to_bit(bitmap_info->offset, ctl->unit,
1410 			  max_t(u64, *offset, bitmap_info->offset));
1411 	bits = bytes_to_bits(*bytes, ctl->unit);
1412 
1413 	for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1414 	     i < BITS_PER_BITMAP;
1415 	     i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1416 		next_zero = find_next_zero_bit(bitmap_info->bitmap,
1417 					       BITS_PER_BITMAP, i);
1418 		if ((next_zero - i) >= bits) {
1419 			found_bits = next_zero - i;
1420 			break;
1421 		}
1422 		i = next_zero;
1423 	}
1424 
1425 	if (found_bits) {
1426 		*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1427 		*bytes = (u64)(found_bits) * ctl->unit;
1428 		return 0;
1429 	}
1430 
1431 	return -1;
1432 }
1433 
1434 static struct btrfs_free_space *
1435 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1436 {
1437 	struct btrfs_free_space *entry;
1438 	struct rb_node *node;
1439 	int ret;
1440 
1441 	if (!ctl->free_space_offset.rb_node)
1442 		return NULL;
1443 
1444 	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1445 	if (!entry)
1446 		return NULL;
1447 
1448 	for (node = &entry->offset_index; node; node = rb_next(node)) {
1449 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1450 		if (entry->bytes < *bytes)
1451 			continue;
1452 
1453 		if (entry->bitmap) {
1454 			ret = search_bitmap(ctl, entry, offset, bytes);
1455 			if (!ret)
1456 				return entry;
1457 			continue;
1458 		}
1459 
1460 		*offset = entry->offset;
1461 		*bytes = entry->bytes;
1462 		return entry;
1463 	}
1464 
1465 	return NULL;
1466 }
1467 
1468 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1469 			   struct btrfs_free_space *info, u64 offset)
1470 {
1471 	info->offset = offset_to_bitmap(ctl, offset);
1472 	info->bytes = 0;
1473 	INIT_LIST_HEAD(&info->list);
1474 	link_free_space(ctl, info);
1475 	ctl->total_bitmaps++;
1476 
1477 	ctl->op->recalc_thresholds(ctl);
1478 }
1479 
1480 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1481 			struct btrfs_free_space *bitmap_info)
1482 {
1483 	unlink_free_space(ctl, bitmap_info);
1484 	kfree(bitmap_info->bitmap);
1485 	kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1486 	ctl->total_bitmaps--;
1487 	ctl->op->recalc_thresholds(ctl);
1488 }
1489 
1490 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1491 			      struct btrfs_free_space *bitmap_info,
1492 			      u64 *offset, u64 *bytes)
1493 {
1494 	u64 end;
1495 	u64 search_start, search_bytes;
1496 	int ret;
1497 
1498 again:
1499 	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1500 
1501 	/*
1502 	 * XXX - this can go away after a few releases.
1503 	 *
1504 	 * since the only user of btrfs_remove_free_space is the tree logging
1505 	 * stuff, and the only way to test that is under crash conditions, we
1506 	 * want to have this debug stuff here just in case somethings not
1507 	 * working.  Search the bitmap for the space we are trying to use to
1508 	 * make sure its actually there.  If its not there then we need to stop
1509 	 * because something has gone wrong.
1510 	 */
1511 	search_start = *offset;
1512 	search_bytes = *bytes;
1513 	search_bytes = min(search_bytes, end - search_start + 1);
1514 	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1515 	BUG_ON(ret < 0 || search_start != *offset);
1516 
1517 	if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1518 		bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1519 		*bytes -= end - *offset + 1;
1520 		*offset = end + 1;
1521 	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1522 		bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1523 		*bytes = 0;
1524 	}
1525 
1526 	if (*bytes) {
1527 		struct rb_node *next = rb_next(&bitmap_info->offset_index);
1528 		if (!bitmap_info->bytes)
1529 			free_bitmap(ctl, bitmap_info);
1530 
1531 		/*
1532 		 * no entry after this bitmap, but we still have bytes to
1533 		 * remove, so something has gone wrong.
1534 		 */
1535 		if (!next)
1536 			return -EINVAL;
1537 
1538 		bitmap_info = rb_entry(next, struct btrfs_free_space,
1539 				       offset_index);
1540 
1541 		/*
1542 		 * if the next entry isn't a bitmap we need to return to let the
1543 		 * extent stuff do its work.
1544 		 */
1545 		if (!bitmap_info->bitmap)
1546 			return -EAGAIN;
1547 
1548 		/*
1549 		 * Ok the next item is a bitmap, but it may not actually hold
1550 		 * the information for the rest of this free space stuff, so
1551 		 * look for it, and if we don't find it return so we can try
1552 		 * everything over again.
1553 		 */
1554 		search_start = *offset;
1555 		search_bytes = *bytes;
1556 		ret = search_bitmap(ctl, bitmap_info, &search_start,
1557 				    &search_bytes);
1558 		if (ret < 0 || search_start != *offset)
1559 			return -EAGAIN;
1560 
1561 		goto again;
1562 	} else if (!bitmap_info->bytes)
1563 		free_bitmap(ctl, bitmap_info);
1564 
1565 	return 0;
1566 }
1567 
1568 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1569 			       struct btrfs_free_space *info, u64 offset,
1570 			       u64 bytes)
1571 {
1572 	u64 bytes_to_set = 0;
1573 	u64 end;
1574 
1575 	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1576 
1577 	bytes_to_set = min(end - offset, bytes);
1578 
1579 	bitmap_set_bits(ctl, info, offset, bytes_to_set);
1580 
1581 	return bytes_to_set;
1582 
1583 }
1584 
1585 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1586 		      struct btrfs_free_space *info)
1587 {
1588 	struct btrfs_block_group_cache *block_group = ctl->private;
1589 
1590 	/*
1591 	 * If we are below the extents threshold then we can add this as an
1592 	 * extent, and don't have to deal with the bitmap
1593 	 */
1594 	if (ctl->free_extents < ctl->extents_thresh) {
1595 		/*
1596 		 * If this block group has some small extents we don't want to
1597 		 * use up all of our free slots in the cache with them, we want
1598 		 * to reserve them to larger extents, however if we have plent
1599 		 * of cache left then go ahead an dadd them, no sense in adding
1600 		 * the overhead of a bitmap if we don't have to.
1601 		 */
1602 		if (info->bytes <= block_group->sectorsize * 4) {
1603 			if (ctl->free_extents * 2 <= ctl->extents_thresh)
1604 				return false;
1605 		} else {
1606 			return false;
1607 		}
1608 	}
1609 
1610 	/*
1611 	 * some block groups are so tiny they can't be enveloped by a bitmap, so
1612 	 * don't even bother to create a bitmap for this
1613 	 */
1614 	if (BITS_PER_BITMAP * block_group->sectorsize >
1615 	    block_group->key.offset)
1616 		return false;
1617 
1618 	return true;
1619 }
1620 
1621 static struct btrfs_free_space_op free_space_op = {
1622 	.recalc_thresholds	= recalculate_thresholds,
1623 	.use_bitmap		= use_bitmap,
1624 };
1625 
1626 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1627 			      struct btrfs_free_space *info)
1628 {
1629 	struct btrfs_free_space *bitmap_info;
1630 	struct btrfs_block_group_cache *block_group = NULL;
1631 	int added = 0;
1632 	u64 bytes, offset, bytes_added;
1633 	int ret;
1634 
1635 	bytes = info->bytes;
1636 	offset = info->offset;
1637 
1638 	if (!ctl->op->use_bitmap(ctl, info))
1639 		return 0;
1640 
1641 	if (ctl->op == &free_space_op)
1642 		block_group = ctl->private;
1643 again:
1644 	/*
1645 	 * Since we link bitmaps right into the cluster we need to see if we
1646 	 * have a cluster here, and if so and it has our bitmap we need to add
1647 	 * the free space to that bitmap.
1648 	 */
1649 	if (block_group && !list_empty(&block_group->cluster_list)) {
1650 		struct btrfs_free_cluster *cluster;
1651 		struct rb_node *node;
1652 		struct btrfs_free_space *entry;
1653 
1654 		cluster = list_entry(block_group->cluster_list.next,
1655 				     struct btrfs_free_cluster,
1656 				     block_group_list);
1657 		spin_lock(&cluster->lock);
1658 		node = rb_first(&cluster->root);
1659 		if (!node) {
1660 			spin_unlock(&cluster->lock);
1661 			goto no_cluster_bitmap;
1662 		}
1663 
1664 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
1665 		if (!entry->bitmap) {
1666 			spin_unlock(&cluster->lock);
1667 			goto no_cluster_bitmap;
1668 		}
1669 
1670 		if (entry->offset == offset_to_bitmap(ctl, offset)) {
1671 			bytes_added = add_bytes_to_bitmap(ctl, entry,
1672 							  offset, bytes);
1673 			bytes -= bytes_added;
1674 			offset += bytes_added;
1675 		}
1676 		spin_unlock(&cluster->lock);
1677 		if (!bytes) {
1678 			ret = 1;
1679 			goto out;
1680 		}
1681 	}
1682 
1683 no_cluster_bitmap:
1684 	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1685 					 1, 0);
1686 	if (!bitmap_info) {
1687 		BUG_ON(added);
1688 		goto new_bitmap;
1689 	}
1690 
1691 	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1692 	bytes -= bytes_added;
1693 	offset += bytes_added;
1694 	added = 0;
1695 
1696 	if (!bytes) {
1697 		ret = 1;
1698 		goto out;
1699 	} else
1700 		goto again;
1701 
1702 new_bitmap:
1703 	if (info && info->bitmap) {
1704 		add_new_bitmap(ctl, info, offset);
1705 		added = 1;
1706 		info = NULL;
1707 		goto again;
1708 	} else {
1709 		spin_unlock(&ctl->tree_lock);
1710 
1711 		/* no pre-allocated info, allocate a new one */
1712 		if (!info) {
1713 			info = kmem_cache_zalloc(btrfs_free_space_cachep,
1714 						 GFP_NOFS);
1715 			if (!info) {
1716 				spin_lock(&ctl->tree_lock);
1717 				ret = -ENOMEM;
1718 				goto out;
1719 			}
1720 		}
1721 
1722 		/* allocate the bitmap */
1723 		info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1724 		spin_lock(&ctl->tree_lock);
1725 		if (!info->bitmap) {
1726 			ret = -ENOMEM;
1727 			goto out;
1728 		}
1729 		goto again;
1730 	}
1731 
1732 out:
1733 	if (info) {
1734 		if (info->bitmap)
1735 			kfree(info->bitmap);
1736 		kmem_cache_free(btrfs_free_space_cachep, info);
1737 	}
1738 
1739 	return ret;
1740 }
1741 
1742 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1743 			  struct btrfs_free_space *info, bool update_stat)
1744 {
1745 	struct btrfs_free_space *left_info;
1746 	struct btrfs_free_space *right_info;
1747 	bool merged = false;
1748 	u64 offset = info->offset;
1749 	u64 bytes = info->bytes;
1750 
1751 	/*
1752 	 * first we want to see if there is free space adjacent to the range we
1753 	 * are adding, if there is remove that struct and add a new one to
1754 	 * cover the entire range
1755 	 */
1756 	right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1757 	if (right_info && rb_prev(&right_info->offset_index))
1758 		left_info = rb_entry(rb_prev(&right_info->offset_index),
1759 				     struct btrfs_free_space, offset_index);
1760 	else
1761 		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1762 
1763 	if (right_info && !right_info->bitmap) {
1764 		if (update_stat)
1765 			unlink_free_space(ctl, right_info);
1766 		else
1767 			__unlink_free_space(ctl, right_info);
1768 		info->bytes += right_info->bytes;
1769 		kmem_cache_free(btrfs_free_space_cachep, right_info);
1770 		merged = true;
1771 	}
1772 
1773 	if (left_info && !left_info->bitmap &&
1774 	    left_info->offset + left_info->bytes == offset) {
1775 		if (update_stat)
1776 			unlink_free_space(ctl, left_info);
1777 		else
1778 			__unlink_free_space(ctl, left_info);
1779 		info->offset = left_info->offset;
1780 		info->bytes += left_info->bytes;
1781 		kmem_cache_free(btrfs_free_space_cachep, left_info);
1782 		merged = true;
1783 	}
1784 
1785 	return merged;
1786 }
1787 
1788 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1789 			   u64 offset, u64 bytes)
1790 {
1791 	struct btrfs_free_space *info;
1792 	int ret = 0;
1793 
1794 	info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1795 	if (!info)
1796 		return -ENOMEM;
1797 
1798 	info->offset = offset;
1799 	info->bytes = bytes;
1800 
1801 	spin_lock(&ctl->tree_lock);
1802 
1803 	if (try_merge_free_space(ctl, info, true))
1804 		goto link;
1805 
1806 	/*
1807 	 * There was no extent directly to the left or right of this new
1808 	 * extent then we know we're going to have to allocate a new extent, so
1809 	 * before we do that see if we need to drop this into a bitmap
1810 	 */
1811 	ret = insert_into_bitmap(ctl, info);
1812 	if (ret < 0) {
1813 		goto out;
1814 	} else if (ret) {
1815 		ret = 0;
1816 		goto out;
1817 	}
1818 link:
1819 	ret = link_free_space(ctl, info);
1820 	if (ret)
1821 		kmem_cache_free(btrfs_free_space_cachep, info);
1822 out:
1823 	spin_unlock(&ctl->tree_lock);
1824 
1825 	if (ret) {
1826 		printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1827 		BUG_ON(ret == -EEXIST);
1828 	}
1829 
1830 	return ret;
1831 }
1832 
1833 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1834 			    u64 offset, u64 bytes)
1835 {
1836 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1837 	struct btrfs_free_space *info;
1838 	struct btrfs_free_space *next_info = NULL;
1839 	int ret = 0;
1840 
1841 	spin_lock(&ctl->tree_lock);
1842 
1843 again:
1844 	info = tree_search_offset(ctl, offset, 0, 0);
1845 	if (!info) {
1846 		/*
1847 		 * oops didn't find an extent that matched the space we wanted
1848 		 * to remove, look for a bitmap instead
1849 		 */
1850 		info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1851 					  1, 0);
1852 		if (!info) {
1853 			/* the tree logging code might be calling us before we
1854 			 * have fully loaded the free space rbtree for this
1855 			 * block group.  So it is possible the entry won't
1856 			 * be in the rbtree yet at all.  The caching code
1857 			 * will make sure not to put it in the rbtree if
1858 			 * the logging code has pinned it.
1859 			 */
1860 			goto out_lock;
1861 		}
1862 	}
1863 
1864 	if (info->bytes < bytes && rb_next(&info->offset_index)) {
1865 		u64 end;
1866 		next_info = rb_entry(rb_next(&info->offset_index),
1867 					     struct btrfs_free_space,
1868 					     offset_index);
1869 
1870 		if (next_info->bitmap)
1871 			end = next_info->offset +
1872 			      BITS_PER_BITMAP * ctl->unit - 1;
1873 		else
1874 			end = next_info->offset + next_info->bytes;
1875 
1876 		if (next_info->bytes < bytes ||
1877 		    next_info->offset > offset || offset > end) {
1878 			printk(KERN_CRIT "Found free space at %llu, size %llu,"
1879 			      " trying to use %llu\n",
1880 			      (unsigned long long)info->offset,
1881 			      (unsigned long long)info->bytes,
1882 			      (unsigned long long)bytes);
1883 			WARN_ON(1);
1884 			ret = -EINVAL;
1885 			goto out_lock;
1886 		}
1887 
1888 		info = next_info;
1889 	}
1890 
1891 	if (info->bytes == bytes) {
1892 		unlink_free_space(ctl, info);
1893 		if (info->bitmap) {
1894 			kfree(info->bitmap);
1895 			ctl->total_bitmaps--;
1896 		}
1897 		kmem_cache_free(btrfs_free_space_cachep, info);
1898 		ret = 0;
1899 		goto out_lock;
1900 	}
1901 
1902 	if (!info->bitmap && info->offset == offset) {
1903 		unlink_free_space(ctl, info);
1904 		info->offset += bytes;
1905 		info->bytes -= bytes;
1906 		ret = link_free_space(ctl, info);
1907 		WARN_ON(ret);
1908 		goto out_lock;
1909 	}
1910 
1911 	if (!info->bitmap && info->offset <= offset &&
1912 	    info->offset + info->bytes >= offset + bytes) {
1913 		u64 old_start = info->offset;
1914 		/*
1915 		 * we're freeing space in the middle of the info,
1916 		 * this can happen during tree log replay
1917 		 *
1918 		 * first unlink the old info and then
1919 		 * insert it again after the hole we're creating
1920 		 */
1921 		unlink_free_space(ctl, info);
1922 		if (offset + bytes < info->offset + info->bytes) {
1923 			u64 old_end = info->offset + info->bytes;
1924 
1925 			info->offset = offset + bytes;
1926 			info->bytes = old_end - info->offset;
1927 			ret = link_free_space(ctl, info);
1928 			WARN_ON(ret);
1929 			if (ret)
1930 				goto out_lock;
1931 		} else {
1932 			/* the hole we're creating ends at the end
1933 			 * of the info struct, just free the info
1934 			 */
1935 			kmem_cache_free(btrfs_free_space_cachep, info);
1936 		}
1937 		spin_unlock(&ctl->tree_lock);
1938 
1939 		/* step two, insert a new info struct to cover
1940 		 * anything before the hole
1941 		 */
1942 		ret = btrfs_add_free_space(block_group, old_start,
1943 					   offset - old_start);
1944 		WARN_ON(ret);
1945 		goto out;
1946 	}
1947 
1948 	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1949 	if (ret == -EAGAIN)
1950 		goto again;
1951 	BUG_ON(ret);
1952 out_lock:
1953 	spin_unlock(&ctl->tree_lock);
1954 out:
1955 	return ret;
1956 }
1957 
1958 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1959 			   u64 bytes)
1960 {
1961 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1962 	struct btrfs_free_space *info;
1963 	struct rb_node *n;
1964 	int count = 0;
1965 
1966 	for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1967 		info = rb_entry(n, struct btrfs_free_space, offset_index);
1968 		if (info->bytes >= bytes)
1969 			count++;
1970 		printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1971 		       (unsigned long long)info->offset,
1972 		       (unsigned long long)info->bytes,
1973 		       (info->bitmap) ? "yes" : "no");
1974 	}
1975 	printk(KERN_INFO "block group has cluster?: %s\n",
1976 	       list_empty(&block_group->cluster_list) ? "no" : "yes");
1977 	printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1978 	       "\n", count);
1979 }
1980 
1981 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1982 {
1983 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1984 
1985 	spin_lock_init(&ctl->tree_lock);
1986 	ctl->unit = block_group->sectorsize;
1987 	ctl->start = block_group->key.objectid;
1988 	ctl->private = block_group;
1989 	ctl->op = &free_space_op;
1990 
1991 	/*
1992 	 * we only want to have 32k of ram per block group for keeping
1993 	 * track of free space, and if we pass 1/2 of that we want to
1994 	 * start converting things over to using bitmaps
1995 	 */
1996 	ctl->extents_thresh = ((1024 * 32) / 2) /
1997 				sizeof(struct btrfs_free_space);
1998 }
1999 
2000 /*
2001  * for a given cluster, put all of its extents back into the free
2002  * space cache.  If the block group passed doesn't match the block group
2003  * pointed to by the cluster, someone else raced in and freed the
2004  * cluster already.  In that case, we just return without changing anything
2005  */
2006 static int
2007 __btrfs_return_cluster_to_free_space(
2008 			     struct btrfs_block_group_cache *block_group,
2009 			     struct btrfs_free_cluster *cluster)
2010 {
2011 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2012 	struct btrfs_free_space *entry;
2013 	struct rb_node *node;
2014 
2015 	spin_lock(&cluster->lock);
2016 	if (cluster->block_group != block_group)
2017 		goto out;
2018 
2019 	cluster->block_group = NULL;
2020 	cluster->window_start = 0;
2021 	list_del_init(&cluster->block_group_list);
2022 
2023 	node = rb_first(&cluster->root);
2024 	while (node) {
2025 		bool bitmap;
2026 
2027 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2028 		node = rb_next(&entry->offset_index);
2029 		rb_erase(&entry->offset_index, &cluster->root);
2030 
2031 		bitmap = (entry->bitmap != NULL);
2032 		if (!bitmap)
2033 			try_merge_free_space(ctl, entry, false);
2034 		tree_insert_offset(&ctl->free_space_offset,
2035 				   entry->offset, &entry->offset_index, bitmap);
2036 	}
2037 	cluster->root = RB_ROOT;
2038 
2039 out:
2040 	spin_unlock(&cluster->lock);
2041 	btrfs_put_block_group(block_group);
2042 	return 0;
2043 }
2044 
2045 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2046 {
2047 	struct btrfs_free_space *info;
2048 	struct rb_node *node;
2049 
2050 	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2051 		info = rb_entry(node, struct btrfs_free_space, offset_index);
2052 		if (!info->bitmap) {
2053 			unlink_free_space(ctl, info);
2054 			kmem_cache_free(btrfs_free_space_cachep, info);
2055 		} else {
2056 			free_bitmap(ctl, info);
2057 		}
2058 		if (need_resched()) {
2059 			spin_unlock(&ctl->tree_lock);
2060 			cond_resched();
2061 			spin_lock(&ctl->tree_lock);
2062 		}
2063 	}
2064 }
2065 
2066 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2067 {
2068 	spin_lock(&ctl->tree_lock);
2069 	__btrfs_remove_free_space_cache_locked(ctl);
2070 	spin_unlock(&ctl->tree_lock);
2071 }
2072 
2073 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2074 {
2075 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2076 	struct btrfs_free_cluster *cluster;
2077 	struct list_head *head;
2078 
2079 	spin_lock(&ctl->tree_lock);
2080 	while ((head = block_group->cluster_list.next) !=
2081 	       &block_group->cluster_list) {
2082 		cluster = list_entry(head, struct btrfs_free_cluster,
2083 				     block_group_list);
2084 
2085 		WARN_ON(cluster->block_group != block_group);
2086 		__btrfs_return_cluster_to_free_space(block_group, cluster);
2087 		if (need_resched()) {
2088 			spin_unlock(&ctl->tree_lock);
2089 			cond_resched();
2090 			spin_lock(&ctl->tree_lock);
2091 		}
2092 	}
2093 	__btrfs_remove_free_space_cache_locked(ctl);
2094 	spin_unlock(&ctl->tree_lock);
2095 
2096 }
2097 
2098 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2099 			       u64 offset, u64 bytes, u64 empty_size)
2100 {
2101 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2102 	struct btrfs_free_space *entry = NULL;
2103 	u64 bytes_search = bytes + empty_size;
2104 	u64 ret = 0;
2105 
2106 	spin_lock(&ctl->tree_lock);
2107 	entry = find_free_space(ctl, &offset, &bytes_search);
2108 	if (!entry)
2109 		goto out;
2110 
2111 	ret = offset;
2112 	if (entry->bitmap) {
2113 		bitmap_clear_bits(ctl, entry, offset, bytes);
2114 		if (!entry->bytes)
2115 			free_bitmap(ctl, entry);
2116 	} else {
2117 		unlink_free_space(ctl, entry);
2118 		entry->offset += bytes;
2119 		entry->bytes -= bytes;
2120 		if (!entry->bytes)
2121 			kmem_cache_free(btrfs_free_space_cachep, entry);
2122 		else
2123 			link_free_space(ctl, entry);
2124 	}
2125 
2126 out:
2127 	spin_unlock(&ctl->tree_lock);
2128 
2129 	return ret;
2130 }
2131 
2132 /*
2133  * given a cluster, put all of its extents back into the free space
2134  * cache.  If a block group is passed, this function will only free
2135  * a cluster that belongs to the passed block group.
2136  *
2137  * Otherwise, it'll get a reference on the block group pointed to by the
2138  * cluster and remove the cluster from it.
2139  */
2140 int btrfs_return_cluster_to_free_space(
2141 			       struct btrfs_block_group_cache *block_group,
2142 			       struct btrfs_free_cluster *cluster)
2143 {
2144 	struct btrfs_free_space_ctl *ctl;
2145 	int ret;
2146 
2147 	/* first, get a safe pointer to the block group */
2148 	spin_lock(&cluster->lock);
2149 	if (!block_group) {
2150 		block_group = cluster->block_group;
2151 		if (!block_group) {
2152 			spin_unlock(&cluster->lock);
2153 			return 0;
2154 		}
2155 	} else if (cluster->block_group != block_group) {
2156 		/* someone else has already freed it don't redo their work */
2157 		spin_unlock(&cluster->lock);
2158 		return 0;
2159 	}
2160 	atomic_inc(&block_group->count);
2161 	spin_unlock(&cluster->lock);
2162 
2163 	ctl = block_group->free_space_ctl;
2164 
2165 	/* now return any extents the cluster had on it */
2166 	spin_lock(&ctl->tree_lock);
2167 	ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2168 	spin_unlock(&ctl->tree_lock);
2169 
2170 	/* finally drop our ref */
2171 	btrfs_put_block_group(block_group);
2172 	return ret;
2173 }
2174 
2175 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2176 				   struct btrfs_free_cluster *cluster,
2177 				   struct btrfs_free_space *entry,
2178 				   u64 bytes, u64 min_start)
2179 {
2180 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2181 	int err;
2182 	u64 search_start = cluster->window_start;
2183 	u64 search_bytes = bytes;
2184 	u64 ret = 0;
2185 
2186 	search_start = min_start;
2187 	search_bytes = bytes;
2188 
2189 	err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2190 	if (err)
2191 		return 0;
2192 
2193 	ret = search_start;
2194 	__bitmap_clear_bits(ctl, entry, ret, bytes);
2195 
2196 	return ret;
2197 }
2198 
2199 /*
2200  * given a cluster, try to allocate 'bytes' from it, returns 0
2201  * if it couldn't find anything suitably large, or a logical disk offset
2202  * if things worked out
2203  */
2204 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2205 			     struct btrfs_free_cluster *cluster, u64 bytes,
2206 			     u64 min_start)
2207 {
2208 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2209 	struct btrfs_free_space *entry = NULL;
2210 	struct rb_node *node;
2211 	u64 ret = 0;
2212 
2213 	spin_lock(&cluster->lock);
2214 	if (bytes > cluster->max_size)
2215 		goto out;
2216 
2217 	if (cluster->block_group != block_group)
2218 		goto out;
2219 
2220 	node = rb_first(&cluster->root);
2221 	if (!node)
2222 		goto out;
2223 
2224 	entry = rb_entry(node, struct btrfs_free_space, offset_index);
2225 	while(1) {
2226 		if (entry->bytes < bytes ||
2227 		    (!entry->bitmap && entry->offset < min_start)) {
2228 			node = rb_next(&entry->offset_index);
2229 			if (!node)
2230 				break;
2231 			entry = rb_entry(node, struct btrfs_free_space,
2232 					 offset_index);
2233 			continue;
2234 		}
2235 
2236 		if (entry->bitmap) {
2237 			ret = btrfs_alloc_from_bitmap(block_group,
2238 						      cluster, entry, bytes,
2239 						      min_start);
2240 			if (ret == 0) {
2241 				node = rb_next(&entry->offset_index);
2242 				if (!node)
2243 					break;
2244 				entry = rb_entry(node, struct btrfs_free_space,
2245 						 offset_index);
2246 				continue;
2247 			}
2248 		} else {
2249 			ret = entry->offset;
2250 
2251 			entry->offset += bytes;
2252 			entry->bytes -= bytes;
2253 		}
2254 
2255 		if (entry->bytes == 0)
2256 			rb_erase(&entry->offset_index, &cluster->root);
2257 		break;
2258 	}
2259 out:
2260 	spin_unlock(&cluster->lock);
2261 
2262 	if (!ret)
2263 		return 0;
2264 
2265 	spin_lock(&ctl->tree_lock);
2266 
2267 	ctl->free_space -= bytes;
2268 	if (entry->bytes == 0) {
2269 		ctl->free_extents--;
2270 		if (entry->bitmap) {
2271 			kfree(entry->bitmap);
2272 			ctl->total_bitmaps--;
2273 			ctl->op->recalc_thresholds(ctl);
2274 		}
2275 		kmem_cache_free(btrfs_free_space_cachep, entry);
2276 	}
2277 
2278 	spin_unlock(&ctl->tree_lock);
2279 
2280 	return ret;
2281 }
2282 
2283 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2284 				struct btrfs_free_space *entry,
2285 				struct btrfs_free_cluster *cluster,
2286 				u64 offset, u64 bytes, u64 min_bytes)
2287 {
2288 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2289 	unsigned long next_zero;
2290 	unsigned long i;
2291 	unsigned long search_bits;
2292 	unsigned long total_bits;
2293 	unsigned long found_bits;
2294 	unsigned long start = 0;
2295 	unsigned long total_found = 0;
2296 	int ret;
2297 	bool found = false;
2298 
2299 	i = offset_to_bit(entry->offset, block_group->sectorsize,
2300 			  max_t(u64, offset, entry->offset));
2301 	search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2302 	total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2303 
2304 again:
2305 	found_bits = 0;
2306 	for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2307 	     i < BITS_PER_BITMAP;
2308 	     i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2309 		next_zero = find_next_zero_bit(entry->bitmap,
2310 					       BITS_PER_BITMAP, i);
2311 		if (next_zero - i >= search_bits) {
2312 			found_bits = next_zero - i;
2313 			break;
2314 		}
2315 		i = next_zero;
2316 	}
2317 
2318 	if (!found_bits)
2319 		return -ENOSPC;
2320 
2321 	if (!found) {
2322 		start = i;
2323 		cluster->max_size = 0;
2324 		found = true;
2325 	}
2326 
2327 	total_found += found_bits;
2328 
2329 	if (cluster->max_size < found_bits * block_group->sectorsize)
2330 		cluster->max_size = found_bits * block_group->sectorsize;
2331 
2332 	if (total_found < total_bits) {
2333 		i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2334 		if (i - start > total_bits * 2) {
2335 			total_found = 0;
2336 			cluster->max_size = 0;
2337 			found = false;
2338 		}
2339 		goto again;
2340 	}
2341 
2342 	cluster->window_start = start * block_group->sectorsize +
2343 		entry->offset;
2344 	rb_erase(&entry->offset_index, &ctl->free_space_offset);
2345 	ret = tree_insert_offset(&cluster->root, entry->offset,
2346 				 &entry->offset_index, 1);
2347 	BUG_ON(ret);
2348 
2349 	return 0;
2350 }
2351 
2352 /*
2353  * This searches the block group for just extents to fill the cluster with.
2354  */
2355 static noinline int
2356 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2357 			struct btrfs_free_cluster *cluster,
2358 			struct list_head *bitmaps, u64 offset, u64 bytes,
2359 			u64 min_bytes)
2360 {
2361 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2362 	struct btrfs_free_space *first = NULL;
2363 	struct btrfs_free_space *entry = NULL;
2364 	struct btrfs_free_space *prev = NULL;
2365 	struct btrfs_free_space *last;
2366 	struct rb_node *node;
2367 	u64 window_start;
2368 	u64 window_free;
2369 	u64 max_extent;
2370 	u64 max_gap = 128 * 1024;
2371 
2372 	entry = tree_search_offset(ctl, offset, 0, 1);
2373 	if (!entry)
2374 		return -ENOSPC;
2375 
2376 	/*
2377 	 * We don't want bitmaps, so just move along until we find a normal
2378 	 * extent entry.
2379 	 */
2380 	while (entry->bitmap) {
2381 		if (list_empty(&entry->list))
2382 			list_add_tail(&entry->list, bitmaps);
2383 		node = rb_next(&entry->offset_index);
2384 		if (!node)
2385 			return -ENOSPC;
2386 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2387 	}
2388 
2389 	window_start = entry->offset;
2390 	window_free = entry->bytes;
2391 	max_extent = entry->bytes;
2392 	first = entry;
2393 	last = entry;
2394 	prev = entry;
2395 
2396 	while (window_free <= min_bytes) {
2397 		node = rb_next(&entry->offset_index);
2398 		if (!node)
2399 			return -ENOSPC;
2400 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2401 
2402 		if (entry->bitmap) {
2403 			if (list_empty(&entry->list))
2404 				list_add_tail(&entry->list, bitmaps);
2405 			continue;
2406 		}
2407 
2408 		/*
2409 		 * we haven't filled the empty size and the window is
2410 		 * very large.  reset and try again
2411 		 */
2412 		if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2413 		    entry->offset - window_start > (min_bytes * 2)) {
2414 			first = entry;
2415 			window_start = entry->offset;
2416 			window_free = entry->bytes;
2417 			last = entry;
2418 			max_extent = entry->bytes;
2419 		} else {
2420 			last = entry;
2421 			window_free += entry->bytes;
2422 			if (entry->bytes > max_extent)
2423 				max_extent = entry->bytes;
2424 		}
2425 		prev = entry;
2426 	}
2427 
2428 	cluster->window_start = first->offset;
2429 
2430 	node = &first->offset_index;
2431 
2432 	/*
2433 	 * now we've found our entries, pull them out of the free space
2434 	 * cache and put them into the cluster rbtree
2435 	 */
2436 	do {
2437 		int ret;
2438 
2439 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
2440 		node = rb_next(&entry->offset_index);
2441 		if (entry->bitmap)
2442 			continue;
2443 
2444 		rb_erase(&entry->offset_index, &ctl->free_space_offset);
2445 		ret = tree_insert_offset(&cluster->root, entry->offset,
2446 					 &entry->offset_index, 0);
2447 		BUG_ON(ret);
2448 	} while (node && entry != last);
2449 
2450 	cluster->max_size = max_extent;
2451 
2452 	return 0;
2453 }
2454 
2455 /*
2456  * This specifically looks for bitmaps that may work in the cluster, we assume
2457  * that we have already failed to find extents that will work.
2458  */
2459 static noinline int
2460 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2461 		     struct btrfs_free_cluster *cluster,
2462 		     struct list_head *bitmaps, u64 offset, u64 bytes,
2463 		     u64 min_bytes)
2464 {
2465 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2466 	struct btrfs_free_space *entry;
2467 	int ret = -ENOSPC;
2468 	u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2469 
2470 	if (ctl->total_bitmaps == 0)
2471 		return -ENOSPC;
2472 
2473 	/*
2474 	 * The bitmap that covers offset won't be in the list unless offset
2475 	 * is just its start offset.
2476 	 */
2477 	entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2478 	if (entry->offset != bitmap_offset) {
2479 		entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2480 		if (entry && list_empty(&entry->list))
2481 			list_add(&entry->list, bitmaps);
2482 	}
2483 
2484 	list_for_each_entry(entry, bitmaps, list) {
2485 		if (entry->bytes < min_bytes)
2486 			continue;
2487 		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2488 					   bytes, min_bytes);
2489 		if (!ret)
2490 			return 0;
2491 	}
2492 
2493 	/*
2494 	 * The bitmaps list has all the bitmaps that record free space
2495 	 * starting after offset, so no more search is required.
2496 	 */
2497 	return -ENOSPC;
2498 }
2499 
2500 /*
2501  * here we try to find a cluster of blocks in a block group.  The goal
2502  * is to find at least bytes free and up to empty_size + bytes free.
2503  * We might not find them all in one contiguous area.
2504  *
2505  * returns zero and sets up cluster if things worked out, otherwise
2506  * it returns -enospc
2507  */
2508 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2509 			     struct btrfs_root *root,
2510 			     struct btrfs_block_group_cache *block_group,
2511 			     struct btrfs_free_cluster *cluster,
2512 			     u64 offset, u64 bytes, u64 empty_size)
2513 {
2514 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2515 	struct btrfs_free_space *entry, *tmp;
2516 	LIST_HEAD(bitmaps);
2517 	u64 min_bytes;
2518 	int ret;
2519 
2520 	/* for metadata, allow allocates with more holes */
2521 	if (btrfs_test_opt(root, SSD_SPREAD)) {
2522 		min_bytes = bytes + empty_size;
2523 	} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2524 		/*
2525 		 * we want to do larger allocations when we are
2526 		 * flushing out the delayed refs, it helps prevent
2527 		 * making more work as we go along.
2528 		 */
2529 		if (trans->transaction->delayed_refs.flushing)
2530 			min_bytes = max(bytes, (bytes + empty_size) >> 1);
2531 		else
2532 			min_bytes = max(bytes, (bytes + empty_size) >> 4);
2533 	} else
2534 		min_bytes = max(bytes, (bytes + empty_size) >> 2);
2535 
2536 	spin_lock(&ctl->tree_lock);
2537 
2538 	/*
2539 	 * If we know we don't have enough space to make a cluster don't even
2540 	 * bother doing all the work to try and find one.
2541 	 */
2542 	if (ctl->free_space < min_bytes) {
2543 		spin_unlock(&ctl->tree_lock);
2544 		return -ENOSPC;
2545 	}
2546 
2547 	spin_lock(&cluster->lock);
2548 
2549 	/* someone already found a cluster, hooray */
2550 	if (cluster->block_group) {
2551 		ret = 0;
2552 		goto out;
2553 	}
2554 
2555 	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2556 				      bytes, min_bytes);
2557 	if (ret)
2558 		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2559 					   offset, bytes, min_bytes);
2560 
2561 	/* Clear our temporary list */
2562 	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2563 		list_del_init(&entry->list);
2564 
2565 	if (!ret) {
2566 		atomic_inc(&block_group->count);
2567 		list_add_tail(&cluster->block_group_list,
2568 			      &block_group->cluster_list);
2569 		cluster->block_group = block_group;
2570 	}
2571 out:
2572 	spin_unlock(&cluster->lock);
2573 	spin_unlock(&ctl->tree_lock);
2574 
2575 	return ret;
2576 }
2577 
2578 /*
2579  * simple code to zero out a cluster
2580  */
2581 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2582 {
2583 	spin_lock_init(&cluster->lock);
2584 	spin_lock_init(&cluster->refill_lock);
2585 	cluster->root = RB_ROOT;
2586 	cluster->max_size = 0;
2587 	INIT_LIST_HEAD(&cluster->block_group_list);
2588 	cluster->block_group = NULL;
2589 }
2590 
2591 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2592 			   u64 *trimmed, u64 start, u64 end, u64 minlen)
2593 {
2594 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2595 	struct btrfs_free_space *entry = NULL;
2596 	struct btrfs_fs_info *fs_info = block_group->fs_info;
2597 	u64 bytes = 0;
2598 	u64 actually_trimmed;
2599 	int ret = 0;
2600 
2601 	*trimmed = 0;
2602 
2603 	while (start < end) {
2604 		spin_lock(&ctl->tree_lock);
2605 
2606 		if (ctl->free_space < minlen) {
2607 			spin_unlock(&ctl->tree_lock);
2608 			break;
2609 		}
2610 
2611 		entry = tree_search_offset(ctl, start, 0, 1);
2612 		if (!entry)
2613 			entry = tree_search_offset(ctl,
2614 						   offset_to_bitmap(ctl, start),
2615 						   1, 1);
2616 
2617 		if (!entry || entry->offset >= end) {
2618 			spin_unlock(&ctl->tree_lock);
2619 			break;
2620 		}
2621 
2622 		if (entry->bitmap) {
2623 			ret = search_bitmap(ctl, entry, &start, &bytes);
2624 			if (!ret) {
2625 				if (start >= end) {
2626 					spin_unlock(&ctl->tree_lock);
2627 					break;
2628 				}
2629 				bytes = min(bytes, end - start);
2630 				bitmap_clear_bits(ctl, entry, start, bytes);
2631 				if (entry->bytes == 0)
2632 					free_bitmap(ctl, entry);
2633 			} else {
2634 				start = entry->offset + BITS_PER_BITMAP *
2635 					block_group->sectorsize;
2636 				spin_unlock(&ctl->tree_lock);
2637 				ret = 0;
2638 				continue;
2639 			}
2640 		} else {
2641 			start = entry->offset;
2642 			bytes = min(entry->bytes, end - start);
2643 			unlink_free_space(ctl, entry);
2644 			kmem_cache_free(btrfs_free_space_cachep, entry);
2645 		}
2646 
2647 		spin_unlock(&ctl->tree_lock);
2648 
2649 		if (bytes >= minlen) {
2650 			struct btrfs_space_info *space_info;
2651 			int update = 0;
2652 
2653 			space_info = block_group->space_info;
2654 			spin_lock(&space_info->lock);
2655 			spin_lock(&block_group->lock);
2656 			if (!block_group->ro) {
2657 				block_group->reserved += bytes;
2658 				space_info->bytes_reserved += bytes;
2659 				update = 1;
2660 			}
2661 			spin_unlock(&block_group->lock);
2662 			spin_unlock(&space_info->lock);
2663 
2664 			ret = btrfs_error_discard_extent(fs_info->extent_root,
2665 							 start,
2666 							 bytes,
2667 							 &actually_trimmed);
2668 
2669 			btrfs_add_free_space(block_group, start, bytes);
2670 			if (update) {
2671 				spin_lock(&space_info->lock);
2672 				spin_lock(&block_group->lock);
2673 				if (block_group->ro)
2674 					space_info->bytes_readonly += bytes;
2675 				block_group->reserved -= bytes;
2676 				space_info->bytes_reserved -= bytes;
2677 				spin_unlock(&space_info->lock);
2678 				spin_unlock(&block_group->lock);
2679 			}
2680 
2681 			if (ret)
2682 				break;
2683 			*trimmed += actually_trimmed;
2684 		}
2685 		start += bytes;
2686 		bytes = 0;
2687 
2688 		if (fatal_signal_pending(current)) {
2689 			ret = -ERESTARTSYS;
2690 			break;
2691 		}
2692 
2693 		cond_resched();
2694 	}
2695 
2696 	return ret;
2697 }
2698 
2699 /*
2700  * Find the left-most item in the cache tree, and then return the
2701  * smallest inode number in the item.
2702  *
2703  * Note: the returned inode number may not be the smallest one in
2704  * the tree, if the left-most item is a bitmap.
2705  */
2706 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2707 {
2708 	struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2709 	struct btrfs_free_space *entry = NULL;
2710 	u64 ino = 0;
2711 
2712 	spin_lock(&ctl->tree_lock);
2713 
2714 	if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2715 		goto out;
2716 
2717 	entry = rb_entry(rb_first(&ctl->free_space_offset),
2718 			 struct btrfs_free_space, offset_index);
2719 
2720 	if (!entry->bitmap) {
2721 		ino = entry->offset;
2722 
2723 		unlink_free_space(ctl, entry);
2724 		entry->offset++;
2725 		entry->bytes--;
2726 		if (!entry->bytes)
2727 			kmem_cache_free(btrfs_free_space_cachep, entry);
2728 		else
2729 			link_free_space(ctl, entry);
2730 	} else {
2731 		u64 offset = 0;
2732 		u64 count = 1;
2733 		int ret;
2734 
2735 		ret = search_bitmap(ctl, entry, &offset, &count);
2736 		BUG_ON(ret);
2737 
2738 		ino = offset;
2739 		bitmap_clear_bits(ctl, entry, offset, 1);
2740 		if (entry->bytes == 0)
2741 			free_bitmap(ctl, entry);
2742 	}
2743 out:
2744 	spin_unlock(&ctl->tree_lock);
2745 
2746 	return ino;
2747 }
2748 
2749 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2750 				    struct btrfs_path *path)
2751 {
2752 	struct inode *inode = NULL;
2753 
2754 	spin_lock(&root->cache_lock);
2755 	if (root->cache_inode)
2756 		inode = igrab(root->cache_inode);
2757 	spin_unlock(&root->cache_lock);
2758 	if (inode)
2759 		return inode;
2760 
2761 	inode = __lookup_free_space_inode(root, path, 0);
2762 	if (IS_ERR(inode))
2763 		return inode;
2764 
2765 	spin_lock(&root->cache_lock);
2766 	if (!btrfs_fs_closing(root->fs_info))
2767 		root->cache_inode = igrab(inode);
2768 	spin_unlock(&root->cache_lock);
2769 
2770 	return inode;
2771 }
2772 
2773 int create_free_ino_inode(struct btrfs_root *root,
2774 			  struct btrfs_trans_handle *trans,
2775 			  struct btrfs_path *path)
2776 {
2777 	return __create_free_space_inode(root, trans, path,
2778 					 BTRFS_FREE_INO_OBJECTID, 0);
2779 }
2780 
2781 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2782 {
2783 	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2784 	struct btrfs_path *path;
2785 	struct inode *inode;
2786 	int ret = 0;
2787 	u64 root_gen = btrfs_root_generation(&root->root_item);
2788 
2789 	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2790 		return 0;
2791 
2792 	/*
2793 	 * If we're unmounting then just return, since this does a search on the
2794 	 * normal root and not the commit root and we could deadlock.
2795 	 */
2796 	if (btrfs_fs_closing(fs_info))
2797 		return 0;
2798 
2799 	path = btrfs_alloc_path();
2800 	if (!path)
2801 		return 0;
2802 
2803 	inode = lookup_free_ino_inode(root, path);
2804 	if (IS_ERR(inode))
2805 		goto out;
2806 
2807 	if (root_gen != BTRFS_I(inode)->generation)
2808 		goto out_put;
2809 
2810 	ret = __load_free_space_cache(root, inode, ctl, path, 0);
2811 
2812 	if (ret < 0)
2813 		printk(KERN_ERR "btrfs: failed to load free ino cache for "
2814 		       "root %llu\n", root->root_key.objectid);
2815 out_put:
2816 	iput(inode);
2817 out:
2818 	btrfs_free_path(path);
2819 	return ret;
2820 }
2821 
2822 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2823 			      struct btrfs_trans_handle *trans,
2824 			      struct btrfs_path *path)
2825 {
2826 	struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2827 	struct inode *inode;
2828 	int ret;
2829 
2830 	if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2831 		return 0;
2832 
2833 	inode = lookup_free_ino_inode(root, path);
2834 	if (IS_ERR(inode))
2835 		return 0;
2836 
2837 	ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2838 	if (ret) {
2839 		btrfs_delalloc_release_metadata(inode, inode->i_size);
2840 #ifdef DEBUG
2841 		printk(KERN_ERR "btrfs: failed to write free ino cache "
2842 		       "for root %llu\n", root->root_key.objectid);
2843 #endif
2844 	}
2845 
2846 	iput(inode);
2847 	return ret;
2848 }
2849