xref: /linux/fs/btrfs/extent-tree.c (revision e27ecdd94d81e5bc3d1f68591701db5adb342f0d)
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include "compat.h"
25 #include "hash.h"
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "print-tree.h"
29 #include "transaction.h"
30 #include "volumes.h"
31 #include "locking.h"
32 #include "free-space-cache.h"
33 
34 static int update_reserved_extents(struct btrfs_root *root,
35 				   u64 bytenr, u64 num, int reserve);
36 static int update_block_group(struct btrfs_trans_handle *trans,
37 			      struct btrfs_root *root,
38 			      u64 bytenr, u64 num_bytes, int alloc,
39 			      int mark_free);
40 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
41 				struct btrfs_root *root,
42 				u64 bytenr, u64 num_bytes, u64 parent,
43 				u64 root_objectid, u64 owner_objectid,
44 				u64 owner_offset, int refs_to_drop,
45 				struct btrfs_delayed_extent_op *extra_op);
46 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
47 				    struct extent_buffer *leaf,
48 				    struct btrfs_extent_item *ei);
49 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
50 				      struct btrfs_root *root,
51 				      u64 parent, u64 root_objectid,
52 				      u64 flags, u64 owner, u64 offset,
53 				      struct btrfs_key *ins, int ref_mod);
54 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
55 				     struct btrfs_root *root,
56 				     u64 parent, u64 root_objectid,
57 				     u64 flags, struct btrfs_disk_key *key,
58 				     int level, struct btrfs_key *ins);
59 
60 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61 			  struct btrfs_root *extent_root, u64 alloc_bytes,
62 			  u64 flags, int force);
63 
64 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65 {
66 	return (cache->flags & bits) == bits;
67 }
68 
69 /*
70  * this adds the block group to the fs_info rb tree for the block group
71  * cache
72  */
73 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
74 				struct btrfs_block_group_cache *block_group)
75 {
76 	struct rb_node **p;
77 	struct rb_node *parent = NULL;
78 	struct btrfs_block_group_cache *cache;
79 
80 	spin_lock(&info->block_group_cache_lock);
81 	p = &info->block_group_cache_tree.rb_node;
82 
83 	while (*p) {
84 		parent = *p;
85 		cache = rb_entry(parent, struct btrfs_block_group_cache,
86 				 cache_node);
87 		if (block_group->key.objectid < cache->key.objectid) {
88 			p = &(*p)->rb_left;
89 		} else if (block_group->key.objectid > cache->key.objectid) {
90 			p = &(*p)->rb_right;
91 		} else {
92 			spin_unlock(&info->block_group_cache_lock);
93 			return -EEXIST;
94 		}
95 	}
96 
97 	rb_link_node(&block_group->cache_node, parent, p);
98 	rb_insert_color(&block_group->cache_node,
99 			&info->block_group_cache_tree);
100 	spin_unlock(&info->block_group_cache_lock);
101 
102 	return 0;
103 }
104 
105 /*
106  * This will return the block group at or after bytenr if contains is 0, else
107  * it will return the block group that contains the bytenr
108  */
109 static struct btrfs_block_group_cache *
110 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
111 			      int contains)
112 {
113 	struct btrfs_block_group_cache *cache, *ret = NULL;
114 	struct rb_node *n;
115 	u64 end, start;
116 
117 	spin_lock(&info->block_group_cache_lock);
118 	n = info->block_group_cache_tree.rb_node;
119 
120 	while (n) {
121 		cache = rb_entry(n, struct btrfs_block_group_cache,
122 				 cache_node);
123 		end = cache->key.objectid + cache->key.offset - 1;
124 		start = cache->key.objectid;
125 
126 		if (bytenr < start) {
127 			if (!contains && (!ret || start < ret->key.objectid))
128 				ret = cache;
129 			n = n->rb_left;
130 		} else if (bytenr > start) {
131 			if (contains && bytenr <= end) {
132 				ret = cache;
133 				break;
134 			}
135 			n = n->rb_right;
136 		} else {
137 			ret = cache;
138 			break;
139 		}
140 	}
141 	if (ret)
142 		atomic_inc(&ret->count);
143 	spin_unlock(&info->block_group_cache_lock);
144 
145 	return ret;
146 }
147 
148 /*
149  * this is only called by cache_block_group, since we could have freed extents
150  * we need to check the pinned_extents for any extents that can't be used yet
151  * since their free space will be released as soon as the transaction commits.
152  */
153 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
154 			      struct btrfs_fs_info *info, u64 start, u64 end)
155 {
156 	u64 extent_start, extent_end, size;
157 	int ret;
158 
159 	while (start < end) {
160 		ret = find_first_extent_bit(&info->pinned_extents, start,
161 					    &extent_start, &extent_end,
162 					    EXTENT_DIRTY);
163 		if (ret)
164 			break;
165 
166 		if (extent_start == start) {
167 			start = extent_end + 1;
168 		} else if (extent_start > start && extent_start < end) {
169 			size = extent_start - start;
170 			ret = btrfs_add_free_space(block_group, start,
171 						   size);
172 			BUG_ON(ret);
173 			start = extent_end + 1;
174 		} else {
175 			break;
176 		}
177 	}
178 
179 	if (start < end) {
180 		size = end - start;
181 		ret = btrfs_add_free_space(block_group, start, size);
182 		BUG_ON(ret);
183 	}
184 
185 	return 0;
186 }
187 
188 static int remove_sb_from_cache(struct btrfs_root *root,
189 				struct btrfs_block_group_cache *cache)
190 {
191 	u64 bytenr;
192 	u64 *logical;
193 	int stripe_len;
194 	int i, nr, ret;
195 
196 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
197 		bytenr = btrfs_sb_offset(i);
198 		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
199 				       cache->key.objectid, bytenr, 0,
200 				       &logical, &nr, &stripe_len);
201 		BUG_ON(ret);
202 		while (nr--) {
203 			btrfs_remove_free_space(cache, logical[nr],
204 						stripe_len);
205 		}
206 		kfree(logical);
207 	}
208 	return 0;
209 }
210 
211 static int cache_block_group(struct btrfs_root *root,
212 			     struct btrfs_block_group_cache *block_group)
213 {
214 	struct btrfs_path *path;
215 	int ret = 0;
216 	struct btrfs_key key;
217 	struct extent_buffer *leaf;
218 	int slot;
219 	u64 last;
220 
221 	if (!block_group)
222 		return 0;
223 
224 	root = root->fs_info->extent_root;
225 
226 	if (block_group->cached)
227 		return 0;
228 
229 	path = btrfs_alloc_path();
230 	if (!path)
231 		return -ENOMEM;
232 
233 	path->reada = 2;
234 	/*
235 	 * we get into deadlocks with paths held by callers of this function.
236 	 * since the alloc_mutex is protecting things right now, just
237 	 * skip the locking here
238 	 */
239 	path->skip_locking = 1;
240 	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
241 	key.objectid = last;
242 	key.offset = 0;
243 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
244 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
245 	if (ret < 0)
246 		goto err;
247 
248 	while (1) {
249 		leaf = path->nodes[0];
250 		slot = path->slots[0];
251 		if (slot >= btrfs_header_nritems(leaf)) {
252 			ret = btrfs_next_leaf(root, path);
253 			if (ret < 0)
254 				goto err;
255 			if (ret == 0)
256 				continue;
257 			else
258 				break;
259 		}
260 		btrfs_item_key_to_cpu(leaf, &key, slot);
261 		if (key.objectid < block_group->key.objectid)
262 			goto next;
263 
264 		if (key.objectid >= block_group->key.objectid +
265 		    block_group->key.offset)
266 			break;
267 
268 		if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
269 			add_new_free_space(block_group, root->fs_info, last,
270 					   key.objectid);
271 
272 			last = key.objectid + key.offset;
273 		}
274 next:
275 		path->slots[0]++;
276 	}
277 
278 	add_new_free_space(block_group, root->fs_info, last,
279 			   block_group->key.objectid +
280 			   block_group->key.offset);
281 
282 	block_group->cached = 1;
283 	remove_sb_from_cache(root, block_group);
284 	ret = 0;
285 err:
286 	btrfs_free_path(path);
287 	return ret;
288 }
289 
290 /*
291  * return the block group that starts at or after bytenr
292  */
293 static struct btrfs_block_group_cache *
294 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
295 {
296 	struct btrfs_block_group_cache *cache;
297 
298 	cache = block_group_cache_tree_search(info, bytenr, 0);
299 
300 	return cache;
301 }
302 
303 /*
304  * return the block group that contains the given bytenr
305  */
306 struct btrfs_block_group_cache *btrfs_lookup_block_group(
307 						 struct btrfs_fs_info *info,
308 						 u64 bytenr)
309 {
310 	struct btrfs_block_group_cache *cache;
311 
312 	cache = block_group_cache_tree_search(info, bytenr, 1);
313 
314 	return cache;
315 }
316 
317 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
318 {
319 	if (atomic_dec_and_test(&cache->count))
320 		kfree(cache);
321 }
322 
323 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
324 						  u64 flags)
325 {
326 	struct list_head *head = &info->space_info;
327 	struct btrfs_space_info *found;
328 
329 	rcu_read_lock();
330 	list_for_each_entry_rcu(found, head, list) {
331 		if (found->flags == flags) {
332 			rcu_read_unlock();
333 			return found;
334 		}
335 	}
336 	rcu_read_unlock();
337 	return NULL;
338 }
339 
340 /*
341  * after adding space to the filesystem, we need to clear the full flags
342  * on all the space infos.
343  */
344 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
345 {
346 	struct list_head *head = &info->space_info;
347 	struct btrfs_space_info *found;
348 
349 	rcu_read_lock();
350 	list_for_each_entry_rcu(found, head, list)
351 		found->full = 0;
352 	rcu_read_unlock();
353 }
354 
355 static u64 div_factor(u64 num, int factor)
356 {
357 	if (factor == 10)
358 		return num;
359 	num *= factor;
360 	do_div(num, 10);
361 	return num;
362 }
363 
364 u64 btrfs_find_block_group(struct btrfs_root *root,
365 			   u64 search_start, u64 search_hint, int owner)
366 {
367 	struct btrfs_block_group_cache *cache;
368 	u64 used;
369 	u64 last = max(search_hint, search_start);
370 	u64 group_start = 0;
371 	int full_search = 0;
372 	int factor = 9;
373 	int wrapped = 0;
374 again:
375 	while (1) {
376 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
377 		if (!cache)
378 			break;
379 
380 		spin_lock(&cache->lock);
381 		last = cache->key.objectid + cache->key.offset;
382 		used = btrfs_block_group_used(&cache->item);
383 
384 		if ((full_search || !cache->ro) &&
385 		    block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
386 			if (used + cache->pinned + cache->reserved <
387 			    div_factor(cache->key.offset, factor)) {
388 				group_start = cache->key.objectid;
389 				spin_unlock(&cache->lock);
390 				btrfs_put_block_group(cache);
391 				goto found;
392 			}
393 		}
394 		spin_unlock(&cache->lock);
395 		btrfs_put_block_group(cache);
396 		cond_resched();
397 	}
398 	if (!wrapped) {
399 		last = search_start;
400 		wrapped = 1;
401 		goto again;
402 	}
403 	if (!full_search && factor < 10) {
404 		last = search_start;
405 		full_search = 1;
406 		factor = 10;
407 		goto again;
408 	}
409 found:
410 	return group_start;
411 }
412 
413 /* simple helper to search for an existing extent at a given offset */
414 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
415 {
416 	int ret;
417 	struct btrfs_key key;
418 	struct btrfs_path *path;
419 
420 	path = btrfs_alloc_path();
421 	BUG_ON(!path);
422 	key.objectid = start;
423 	key.offset = len;
424 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
425 	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
426 				0, 0);
427 	btrfs_free_path(path);
428 	return ret;
429 }
430 
431 /*
432  * Back reference rules.  Back refs have three main goals:
433  *
434  * 1) differentiate between all holders of references to an extent so that
435  *    when a reference is dropped we can make sure it was a valid reference
436  *    before freeing the extent.
437  *
438  * 2) Provide enough information to quickly find the holders of an extent
439  *    if we notice a given block is corrupted or bad.
440  *
441  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
442  *    maintenance.  This is actually the same as #2, but with a slightly
443  *    different use case.
444  *
445  * There are two kinds of back refs. The implicit back refs is optimized
446  * for pointers in non-shared tree blocks. For a given pointer in a block,
447  * back refs of this kind provide information about the block's owner tree
448  * and the pointer's key. These information allow us to find the block by
449  * b-tree searching. The full back refs is for pointers in tree blocks not
450  * referenced by their owner trees. The location of tree block is recorded
451  * in the back refs. Actually the full back refs is generic, and can be
452  * used in all cases the implicit back refs is used. The major shortcoming
453  * of the full back refs is its overhead. Every time a tree block gets
454  * COWed, we have to update back refs entry for all pointers in it.
455  *
456  * For a newly allocated tree block, we use implicit back refs for
457  * pointers in it. This means most tree related operations only involve
458  * implicit back refs. For a tree block created in old transaction, the
459  * only way to drop a reference to it is COW it. So we can detect the
460  * event that tree block loses its owner tree's reference and do the
461  * back refs conversion.
462  *
463  * When a tree block is COW'd through a tree, there are four cases:
464  *
465  * The reference count of the block is one and the tree is the block's
466  * owner tree. Nothing to do in this case.
467  *
468  * The reference count of the block is one and the tree is not the
469  * block's owner tree. In this case, full back refs is used for pointers
470  * in the block. Remove these full back refs, add implicit back refs for
471  * every pointers in the new block.
472  *
473  * The reference count of the block is greater than one and the tree is
474  * the block's owner tree. In this case, implicit back refs is used for
475  * pointers in the block. Add full back refs for every pointers in the
476  * block, increase lower level extents' reference counts. The original
477  * implicit back refs are entailed to the new block.
478  *
479  * The reference count of the block is greater than one and the tree is
480  * not the block's owner tree. Add implicit back refs for every pointer in
481  * the new block, increase lower level extents' reference count.
482  *
483  * Back Reference Key composing:
484  *
485  * The key objectid corresponds to the first byte in the extent,
486  * The key type is used to differentiate between types of back refs.
487  * There are different meanings of the key offset for different types
488  * of back refs.
489  *
490  * File extents can be referenced by:
491  *
492  * - multiple snapshots, subvolumes, or different generations in one subvol
493  * - different files inside a single subvolume
494  * - different offsets inside a file (bookend extents in file.c)
495  *
496  * The extent ref structure for the implicit back refs has fields for:
497  *
498  * - Objectid of the subvolume root
499  * - objectid of the file holding the reference
500  * - original offset in the file
501  * - how many bookend extents
502  *
503  * The key offset for the implicit back refs is hash of the first
504  * three fields.
505  *
506  * The extent ref structure for the full back refs has field for:
507  *
508  * - number of pointers in the tree leaf
509  *
510  * The key offset for the implicit back refs is the first byte of
511  * the tree leaf
512  *
513  * When a file extent is allocated, The implicit back refs is used.
514  * the fields are filled in:
515  *
516  *     (root_key.objectid, inode objectid, offset in file, 1)
517  *
518  * When a file extent is removed file truncation, we find the
519  * corresponding implicit back refs and check the following fields:
520  *
521  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
522  *
523  * Btree extents can be referenced by:
524  *
525  * - Different subvolumes
526  *
527  * Both the implicit back refs and the full back refs for tree blocks
528  * only consist of key. The key offset for the implicit back refs is
529  * objectid of block's owner tree. The key offset for the full back refs
530  * is the first byte of parent block.
531  *
532  * When implicit back refs is used, information about the lowest key and
533  * level of the tree block are required. These information are stored in
534  * tree block info structure.
535  */
536 
537 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
538 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
539 				  struct btrfs_root *root,
540 				  struct btrfs_path *path,
541 				  u64 owner, u32 extra_size)
542 {
543 	struct btrfs_extent_item *item;
544 	struct btrfs_extent_item_v0 *ei0;
545 	struct btrfs_extent_ref_v0 *ref0;
546 	struct btrfs_tree_block_info *bi;
547 	struct extent_buffer *leaf;
548 	struct btrfs_key key;
549 	struct btrfs_key found_key;
550 	u32 new_size = sizeof(*item);
551 	u64 refs;
552 	int ret;
553 
554 	leaf = path->nodes[0];
555 	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
556 
557 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
558 	ei0 = btrfs_item_ptr(leaf, path->slots[0],
559 			     struct btrfs_extent_item_v0);
560 	refs = btrfs_extent_refs_v0(leaf, ei0);
561 
562 	if (owner == (u64)-1) {
563 		while (1) {
564 			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
565 				ret = btrfs_next_leaf(root, path);
566 				if (ret < 0)
567 					return ret;
568 				BUG_ON(ret > 0);
569 				leaf = path->nodes[0];
570 			}
571 			btrfs_item_key_to_cpu(leaf, &found_key,
572 					      path->slots[0]);
573 			BUG_ON(key.objectid != found_key.objectid);
574 			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
575 				path->slots[0]++;
576 				continue;
577 			}
578 			ref0 = btrfs_item_ptr(leaf, path->slots[0],
579 					      struct btrfs_extent_ref_v0);
580 			owner = btrfs_ref_objectid_v0(leaf, ref0);
581 			break;
582 		}
583 	}
584 	btrfs_release_path(root, path);
585 
586 	if (owner < BTRFS_FIRST_FREE_OBJECTID)
587 		new_size += sizeof(*bi);
588 
589 	new_size -= sizeof(*ei0);
590 	ret = btrfs_search_slot(trans, root, &key, path,
591 				new_size + extra_size, 1);
592 	if (ret < 0)
593 		return ret;
594 	BUG_ON(ret);
595 
596 	ret = btrfs_extend_item(trans, root, path, new_size);
597 	BUG_ON(ret);
598 
599 	leaf = path->nodes[0];
600 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
601 	btrfs_set_extent_refs(leaf, item, refs);
602 	/* FIXME: get real generation */
603 	btrfs_set_extent_generation(leaf, item, 0);
604 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
605 		btrfs_set_extent_flags(leaf, item,
606 				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
607 				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
608 		bi = (struct btrfs_tree_block_info *)(item + 1);
609 		/* FIXME: get first key of the block */
610 		memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
611 		btrfs_set_tree_block_level(leaf, bi, (int)owner);
612 	} else {
613 		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
614 	}
615 	btrfs_mark_buffer_dirty(leaf);
616 	return 0;
617 }
618 #endif
619 
620 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
621 {
622 	u32 high_crc = ~(u32)0;
623 	u32 low_crc = ~(u32)0;
624 	__le64 lenum;
625 
626 	lenum = cpu_to_le64(root_objectid);
627 	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
628 	lenum = cpu_to_le64(owner);
629 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
630 	lenum = cpu_to_le64(offset);
631 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
632 
633 	return ((u64)high_crc << 31) ^ (u64)low_crc;
634 }
635 
636 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
637 				     struct btrfs_extent_data_ref *ref)
638 {
639 	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
640 				    btrfs_extent_data_ref_objectid(leaf, ref),
641 				    btrfs_extent_data_ref_offset(leaf, ref));
642 }
643 
644 static int match_extent_data_ref(struct extent_buffer *leaf,
645 				 struct btrfs_extent_data_ref *ref,
646 				 u64 root_objectid, u64 owner, u64 offset)
647 {
648 	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
649 	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
650 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
651 		return 0;
652 	return 1;
653 }
654 
655 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
656 					   struct btrfs_root *root,
657 					   struct btrfs_path *path,
658 					   u64 bytenr, u64 parent,
659 					   u64 root_objectid,
660 					   u64 owner, u64 offset)
661 {
662 	struct btrfs_key key;
663 	struct btrfs_extent_data_ref *ref;
664 	struct extent_buffer *leaf;
665 	u32 nritems;
666 	int ret;
667 	int recow;
668 	int err = -ENOENT;
669 
670 	key.objectid = bytenr;
671 	if (parent) {
672 		key.type = BTRFS_SHARED_DATA_REF_KEY;
673 		key.offset = parent;
674 	} else {
675 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
676 		key.offset = hash_extent_data_ref(root_objectid,
677 						  owner, offset);
678 	}
679 again:
680 	recow = 0;
681 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
682 	if (ret < 0) {
683 		err = ret;
684 		goto fail;
685 	}
686 
687 	if (parent) {
688 		if (!ret)
689 			return 0;
690 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
691 		key.type = BTRFS_EXTENT_REF_V0_KEY;
692 		btrfs_release_path(root, path);
693 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
694 		if (ret < 0) {
695 			err = ret;
696 			goto fail;
697 		}
698 		if (!ret)
699 			return 0;
700 #endif
701 		goto fail;
702 	}
703 
704 	leaf = path->nodes[0];
705 	nritems = btrfs_header_nritems(leaf);
706 	while (1) {
707 		if (path->slots[0] >= nritems) {
708 			ret = btrfs_next_leaf(root, path);
709 			if (ret < 0)
710 				err = ret;
711 			if (ret)
712 				goto fail;
713 
714 			leaf = path->nodes[0];
715 			nritems = btrfs_header_nritems(leaf);
716 			recow = 1;
717 		}
718 
719 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
720 		if (key.objectid != bytenr ||
721 		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
722 			goto fail;
723 
724 		ref = btrfs_item_ptr(leaf, path->slots[0],
725 				     struct btrfs_extent_data_ref);
726 
727 		if (match_extent_data_ref(leaf, ref, root_objectid,
728 					  owner, offset)) {
729 			if (recow) {
730 				btrfs_release_path(root, path);
731 				goto again;
732 			}
733 			err = 0;
734 			break;
735 		}
736 		path->slots[0]++;
737 	}
738 fail:
739 	return err;
740 }
741 
742 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
743 					   struct btrfs_root *root,
744 					   struct btrfs_path *path,
745 					   u64 bytenr, u64 parent,
746 					   u64 root_objectid, u64 owner,
747 					   u64 offset, int refs_to_add)
748 {
749 	struct btrfs_key key;
750 	struct extent_buffer *leaf;
751 	u32 size;
752 	u32 num_refs;
753 	int ret;
754 
755 	key.objectid = bytenr;
756 	if (parent) {
757 		key.type = BTRFS_SHARED_DATA_REF_KEY;
758 		key.offset = parent;
759 		size = sizeof(struct btrfs_shared_data_ref);
760 	} else {
761 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
762 		key.offset = hash_extent_data_ref(root_objectid,
763 						  owner, offset);
764 		size = sizeof(struct btrfs_extent_data_ref);
765 	}
766 
767 	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
768 	if (ret && ret != -EEXIST)
769 		goto fail;
770 
771 	leaf = path->nodes[0];
772 	if (parent) {
773 		struct btrfs_shared_data_ref *ref;
774 		ref = btrfs_item_ptr(leaf, path->slots[0],
775 				     struct btrfs_shared_data_ref);
776 		if (ret == 0) {
777 			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
778 		} else {
779 			num_refs = btrfs_shared_data_ref_count(leaf, ref);
780 			num_refs += refs_to_add;
781 			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
782 		}
783 	} else {
784 		struct btrfs_extent_data_ref *ref;
785 		while (ret == -EEXIST) {
786 			ref = btrfs_item_ptr(leaf, path->slots[0],
787 					     struct btrfs_extent_data_ref);
788 			if (match_extent_data_ref(leaf, ref, root_objectid,
789 						  owner, offset))
790 				break;
791 			btrfs_release_path(root, path);
792 			key.offset++;
793 			ret = btrfs_insert_empty_item(trans, root, path, &key,
794 						      size);
795 			if (ret && ret != -EEXIST)
796 				goto fail;
797 
798 			leaf = path->nodes[0];
799 		}
800 		ref = btrfs_item_ptr(leaf, path->slots[0],
801 				     struct btrfs_extent_data_ref);
802 		if (ret == 0) {
803 			btrfs_set_extent_data_ref_root(leaf, ref,
804 						       root_objectid);
805 			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
806 			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
807 			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
808 		} else {
809 			num_refs = btrfs_extent_data_ref_count(leaf, ref);
810 			num_refs += refs_to_add;
811 			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
812 		}
813 	}
814 	btrfs_mark_buffer_dirty(leaf);
815 	ret = 0;
816 fail:
817 	btrfs_release_path(root, path);
818 	return ret;
819 }
820 
821 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
822 					   struct btrfs_root *root,
823 					   struct btrfs_path *path,
824 					   int refs_to_drop)
825 {
826 	struct btrfs_key key;
827 	struct btrfs_extent_data_ref *ref1 = NULL;
828 	struct btrfs_shared_data_ref *ref2 = NULL;
829 	struct extent_buffer *leaf;
830 	u32 num_refs = 0;
831 	int ret = 0;
832 
833 	leaf = path->nodes[0];
834 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
835 
836 	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
837 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
838 				      struct btrfs_extent_data_ref);
839 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
840 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
841 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
842 				      struct btrfs_shared_data_ref);
843 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
845 	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
846 		struct btrfs_extent_ref_v0 *ref0;
847 		ref0 = btrfs_item_ptr(leaf, path->slots[0],
848 				      struct btrfs_extent_ref_v0);
849 		num_refs = btrfs_ref_count_v0(leaf, ref0);
850 #endif
851 	} else {
852 		BUG();
853 	}
854 
855 	BUG_ON(num_refs < refs_to_drop);
856 	num_refs -= refs_to_drop;
857 
858 	if (num_refs == 0) {
859 		ret = btrfs_del_item(trans, root, path);
860 	} else {
861 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
862 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
863 		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
864 			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
866 		else {
867 			struct btrfs_extent_ref_v0 *ref0;
868 			ref0 = btrfs_item_ptr(leaf, path->slots[0],
869 					struct btrfs_extent_ref_v0);
870 			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
871 		}
872 #endif
873 		btrfs_mark_buffer_dirty(leaf);
874 	}
875 	return ret;
876 }
877 
878 static noinline u32 extent_data_ref_count(struct btrfs_root *root,
879 					  struct btrfs_path *path,
880 					  struct btrfs_extent_inline_ref *iref)
881 {
882 	struct btrfs_key key;
883 	struct extent_buffer *leaf;
884 	struct btrfs_extent_data_ref *ref1;
885 	struct btrfs_shared_data_ref *ref2;
886 	u32 num_refs = 0;
887 
888 	leaf = path->nodes[0];
889 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
890 	if (iref) {
891 		if (btrfs_extent_inline_ref_type(leaf, iref) ==
892 		    BTRFS_EXTENT_DATA_REF_KEY) {
893 			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
894 			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
895 		} else {
896 			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
897 			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
898 		}
899 	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
900 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
901 				      struct btrfs_extent_data_ref);
902 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
903 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
904 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
905 				      struct btrfs_shared_data_ref);
906 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
908 	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
909 		struct btrfs_extent_ref_v0 *ref0;
910 		ref0 = btrfs_item_ptr(leaf, path->slots[0],
911 				      struct btrfs_extent_ref_v0);
912 		num_refs = btrfs_ref_count_v0(leaf, ref0);
913 #endif
914 	} else {
915 		WARN_ON(1);
916 	}
917 	return num_refs;
918 }
919 
920 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
921 					  struct btrfs_root *root,
922 					  struct btrfs_path *path,
923 					  u64 bytenr, u64 parent,
924 					  u64 root_objectid)
925 {
926 	struct btrfs_key key;
927 	int ret;
928 
929 	key.objectid = bytenr;
930 	if (parent) {
931 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
932 		key.offset = parent;
933 	} else {
934 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
935 		key.offset = root_objectid;
936 	}
937 
938 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
939 	if (ret > 0)
940 		ret = -ENOENT;
941 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
942 	if (ret == -ENOENT && parent) {
943 		btrfs_release_path(root, path);
944 		key.type = BTRFS_EXTENT_REF_V0_KEY;
945 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
946 		if (ret > 0)
947 			ret = -ENOENT;
948 	}
949 #endif
950 	return ret;
951 }
952 
953 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
954 					  struct btrfs_root *root,
955 					  struct btrfs_path *path,
956 					  u64 bytenr, u64 parent,
957 					  u64 root_objectid)
958 {
959 	struct btrfs_key key;
960 	int ret;
961 
962 	key.objectid = bytenr;
963 	if (parent) {
964 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
965 		key.offset = parent;
966 	} else {
967 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
968 		key.offset = root_objectid;
969 	}
970 
971 	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
972 	btrfs_release_path(root, path);
973 	return ret;
974 }
975 
976 static inline int extent_ref_type(u64 parent, u64 owner)
977 {
978 	int type;
979 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
980 		if (parent > 0)
981 			type = BTRFS_SHARED_BLOCK_REF_KEY;
982 		else
983 			type = BTRFS_TREE_BLOCK_REF_KEY;
984 	} else {
985 		if (parent > 0)
986 			type = BTRFS_SHARED_DATA_REF_KEY;
987 		else
988 			type = BTRFS_EXTENT_DATA_REF_KEY;
989 	}
990 	return type;
991 }
992 
993 static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
994 
995 {
996 	int level;
997 	BUG_ON(!path->keep_locks);
998 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 		if (!path->nodes[level])
1000 			break;
1001 		btrfs_assert_tree_locked(path->nodes[level]);
1002 		if (path->slots[level] + 1 >=
1003 		    btrfs_header_nritems(path->nodes[level]))
1004 			continue;
1005 		if (level == 0)
1006 			btrfs_item_key_to_cpu(path->nodes[level], key,
1007 					      path->slots[level] + 1);
1008 		else
1009 			btrfs_node_key_to_cpu(path->nodes[level], key,
1010 					      path->slots[level] + 1);
1011 		return 0;
1012 	}
1013 	return 1;
1014 }
1015 
1016 /*
1017  * look for inline back ref. if back ref is found, *ref_ret is set
1018  * to the address of inline back ref, and 0 is returned.
1019  *
1020  * if back ref isn't found, *ref_ret is set to the address where it
1021  * should be inserted, and -ENOENT is returned.
1022  *
1023  * if insert is true and there are too many inline back refs, the path
1024  * points to the extent item, and -EAGAIN is returned.
1025  *
1026  * NOTE: inline back refs are ordered in the same way that back ref
1027  *	 items in the tree are ordered.
1028  */
1029 static noinline_for_stack
1030 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1031 				 struct btrfs_root *root,
1032 				 struct btrfs_path *path,
1033 				 struct btrfs_extent_inline_ref **ref_ret,
1034 				 u64 bytenr, u64 num_bytes,
1035 				 u64 parent, u64 root_objectid,
1036 				 u64 owner, u64 offset, int insert)
1037 {
1038 	struct btrfs_key key;
1039 	struct extent_buffer *leaf;
1040 	struct btrfs_extent_item *ei;
1041 	struct btrfs_extent_inline_ref *iref;
1042 	u64 flags;
1043 	u64 item_size;
1044 	unsigned long ptr;
1045 	unsigned long end;
1046 	int extra_size;
1047 	int type;
1048 	int want;
1049 	int ret;
1050 	int err = 0;
1051 
1052 	key.objectid = bytenr;
1053 	key.type = BTRFS_EXTENT_ITEM_KEY;
1054 	key.offset = num_bytes;
1055 
1056 	want = extent_ref_type(parent, owner);
1057 	if (insert) {
1058 		extra_size = btrfs_extent_inline_ref_size(want);
1059 		path->keep_locks = 1;
1060 	} else
1061 		extra_size = -1;
1062 	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1063 	if (ret < 0) {
1064 		err = ret;
1065 		goto out;
1066 	}
1067 	BUG_ON(ret);
1068 
1069 	leaf = path->nodes[0];
1070 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1071 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1072 	if (item_size < sizeof(*ei)) {
1073 		if (!insert) {
1074 			err = -ENOENT;
1075 			goto out;
1076 		}
1077 		ret = convert_extent_item_v0(trans, root, path, owner,
1078 					     extra_size);
1079 		if (ret < 0) {
1080 			err = ret;
1081 			goto out;
1082 		}
1083 		leaf = path->nodes[0];
1084 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1085 	}
1086 #endif
1087 	BUG_ON(item_size < sizeof(*ei));
1088 
1089 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1090 	flags = btrfs_extent_flags(leaf, ei);
1091 
1092 	ptr = (unsigned long)(ei + 1);
1093 	end = (unsigned long)ei + item_size;
1094 
1095 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1096 		ptr += sizeof(struct btrfs_tree_block_info);
1097 		BUG_ON(ptr > end);
1098 	} else {
1099 		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1100 	}
1101 
1102 	err = -ENOENT;
1103 	while (1) {
1104 		if (ptr >= end) {
1105 			WARN_ON(ptr > end);
1106 			break;
1107 		}
1108 		iref = (struct btrfs_extent_inline_ref *)ptr;
1109 		type = btrfs_extent_inline_ref_type(leaf, iref);
1110 		if (want < type)
1111 			break;
1112 		if (want > type) {
1113 			ptr += btrfs_extent_inline_ref_size(type);
1114 			continue;
1115 		}
1116 
1117 		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1118 			struct btrfs_extent_data_ref *dref;
1119 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1120 			if (match_extent_data_ref(leaf, dref, root_objectid,
1121 						  owner, offset)) {
1122 				err = 0;
1123 				break;
1124 			}
1125 			if (hash_extent_data_ref_item(leaf, dref) <
1126 			    hash_extent_data_ref(root_objectid, owner, offset))
1127 				break;
1128 		} else {
1129 			u64 ref_offset;
1130 			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1131 			if (parent > 0) {
1132 				if (parent == ref_offset) {
1133 					err = 0;
1134 					break;
1135 				}
1136 				if (ref_offset < parent)
1137 					break;
1138 			} else {
1139 				if (root_objectid == ref_offset) {
1140 					err = 0;
1141 					break;
1142 				}
1143 				if (ref_offset < root_objectid)
1144 					break;
1145 			}
1146 		}
1147 		ptr += btrfs_extent_inline_ref_size(type);
1148 	}
1149 	if (err == -ENOENT && insert) {
1150 		if (item_size + extra_size >=
1151 		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1152 			err = -EAGAIN;
1153 			goto out;
1154 		}
1155 		/*
1156 		 * To add new inline back ref, we have to make sure
1157 		 * there is no corresponding back ref item.
1158 		 * For simplicity, we just do not add new inline back
1159 		 * ref if there is any kind of item for this block
1160 		 */
1161 		if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1162 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 			err = -EAGAIN;
1164 			goto out;
1165 		}
1166 	}
1167 	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1168 out:
1169 	if (insert) {
1170 		path->keep_locks = 0;
1171 		btrfs_unlock_up_safe(path, 1);
1172 	}
1173 	return err;
1174 }
1175 
1176 /*
1177  * helper to add new inline back ref
1178  */
1179 static noinline_for_stack
1180 int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1181 				struct btrfs_root *root,
1182 				struct btrfs_path *path,
1183 				struct btrfs_extent_inline_ref *iref,
1184 				u64 parent, u64 root_objectid,
1185 				u64 owner, u64 offset, int refs_to_add,
1186 				struct btrfs_delayed_extent_op *extent_op)
1187 {
1188 	struct extent_buffer *leaf;
1189 	struct btrfs_extent_item *ei;
1190 	unsigned long ptr;
1191 	unsigned long end;
1192 	unsigned long item_offset;
1193 	u64 refs;
1194 	int size;
1195 	int type;
1196 	int ret;
1197 
1198 	leaf = path->nodes[0];
1199 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1200 	item_offset = (unsigned long)iref - (unsigned long)ei;
1201 
1202 	type = extent_ref_type(parent, owner);
1203 	size = btrfs_extent_inline_ref_size(type);
1204 
1205 	ret = btrfs_extend_item(trans, root, path, size);
1206 	BUG_ON(ret);
1207 
1208 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1209 	refs = btrfs_extent_refs(leaf, ei);
1210 	refs += refs_to_add;
1211 	btrfs_set_extent_refs(leaf, ei, refs);
1212 	if (extent_op)
1213 		__run_delayed_extent_op(extent_op, leaf, ei);
1214 
1215 	ptr = (unsigned long)ei + item_offset;
1216 	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1217 	if (ptr < end - size)
1218 		memmove_extent_buffer(leaf, ptr + size, ptr,
1219 				      end - size - ptr);
1220 
1221 	iref = (struct btrfs_extent_inline_ref *)ptr;
1222 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
1223 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1224 		struct btrfs_extent_data_ref *dref;
1225 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1226 		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1227 		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1228 		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1229 		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1230 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1231 		struct btrfs_shared_data_ref *sref;
1232 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
1233 		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1234 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1235 	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1236 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1237 	} else {
1238 		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1239 	}
1240 	btrfs_mark_buffer_dirty(leaf);
1241 	return 0;
1242 }
1243 
1244 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1245 				 struct btrfs_root *root,
1246 				 struct btrfs_path *path,
1247 				 struct btrfs_extent_inline_ref **ref_ret,
1248 				 u64 bytenr, u64 num_bytes, u64 parent,
1249 				 u64 root_objectid, u64 owner, u64 offset)
1250 {
1251 	int ret;
1252 
1253 	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1254 					   bytenr, num_bytes, parent,
1255 					   root_objectid, owner, offset, 0);
1256 	if (ret != -ENOENT)
1257 		return ret;
1258 
1259 	btrfs_release_path(root, path);
1260 	*ref_ret = NULL;
1261 
1262 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1263 		ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1264 					    root_objectid);
1265 	} else {
1266 		ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1267 					     root_objectid, owner, offset);
1268 	}
1269 	return ret;
1270 }
1271 
1272 /*
1273  * helper to update/remove inline back ref
1274  */
1275 static noinline_for_stack
1276 int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1277 				 struct btrfs_root *root,
1278 				 struct btrfs_path *path,
1279 				 struct btrfs_extent_inline_ref *iref,
1280 				 int refs_to_mod,
1281 				 struct btrfs_delayed_extent_op *extent_op)
1282 {
1283 	struct extent_buffer *leaf;
1284 	struct btrfs_extent_item *ei;
1285 	struct btrfs_extent_data_ref *dref = NULL;
1286 	struct btrfs_shared_data_ref *sref = NULL;
1287 	unsigned long ptr;
1288 	unsigned long end;
1289 	u32 item_size;
1290 	int size;
1291 	int type;
1292 	int ret;
1293 	u64 refs;
1294 
1295 	leaf = path->nodes[0];
1296 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1297 	refs = btrfs_extent_refs(leaf, ei);
1298 	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1299 	refs += refs_to_mod;
1300 	btrfs_set_extent_refs(leaf, ei, refs);
1301 	if (extent_op)
1302 		__run_delayed_extent_op(extent_op, leaf, ei);
1303 
1304 	type = btrfs_extent_inline_ref_type(leaf, iref);
1305 
1306 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1307 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1308 		refs = btrfs_extent_data_ref_count(leaf, dref);
1309 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1310 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
1311 		refs = btrfs_shared_data_ref_count(leaf, sref);
1312 	} else {
1313 		refs = 1;
1314 		BUG_ON(refs_to_mod != -1);
1315 	}
1316 
1317 	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1318 	refs += refs_to_mod;
1319 
1320 	if (refs > 0) {
1321 		if (type == BTRFS_EXTENT_DATA_REF_KEY)
1322 			btrfs_set_extent_data_ref_count(leaf, dref, refs);
1323 		else
1324 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
1325 	} else {
1326 		size =  btrfs_extent_inline_ref_size(type);
1327 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1328 		ptr = (unsigned long)iref;
1329 		end = (unsigned long)ei + item_size;
1330 		if (ptr + size < end)
1331 			memmove_extent_buffer(leaf, ptr, ptr + size,
1332 					      end - ptr - size);
1333 		item_size -= size;
1334 		ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1335 		BUG_ON(ret);
1336 	}
1337 	btrfs_mark_buffer_dirty(leaf);
1338 	return 0;
1339 }
1340 
1341 static noinline_for_stack
1342 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1343 				 struct btrfs_root *root,
1344 				 struct btrfs_path *path,
1345 				 u64 bytenr, u64 num_bytes, u64 parent,
1346 				 u64 root_objectid, u64 owner,
1347 				 u64 offset, int refs_to_add,
1348 				 struct btrfs_delayed_extent_op *extent_op)
1349 {
1350 	struct btrfs_extent_inline_ref *iref;
1351 	int ret;
1352 
1353 	ret = lookup_inline_extent_backref(trans, root, path, &iref,
1354 					   bytenr, num_bytes, parent,
1355 					   root_objectid, owner, offset, 1);
1356 	if (ret == 0) {
1357 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1358 		ret = update_inline_extent_backref(trans, root, path, iref,
1359 						   refs_to_add, extent_op);
1360 	} else if (ret == -ENOENT) {
1361 		ret = setup_inline_extent_backref(trans, root, path, iref,
1362 						  parent, root_objectid,
1363 						  owner, offset, refs_to_add,
1364 						  extent_op);
1365 	}
1366 	return ret;
1367 }
1368 
1369 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1370 				 struct btrfs_root *root,
1371 				 struct btrfs_path *path,
1372 				 u64 bytenr, u64 parent, u64 root_objectid,
1373 				 u64 owner, u64 offset, int refs_to_add)
1374 {
1375 	int ret;
1376 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1377 		BUG_ON(refs_to_add != 1);
1378 		ret = insert_tree_block_ref(trans, root, path, bytenr,
1379 					    parent, root_objectid);
1380 	} else {
1381 		ret = insert_extent_data_ref(trans, root, path, bytenr,
1382 					     parent, root_objectid,
1383 					     owner, offset, refs_to_add);
1384 	}
1385 	return ret;
1386 }
1387 
1388 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1389 				 struct btrfs_root *root,
1390 				 struct btrfs_path *path,
1391 				 struct btrfs_extent_inline_ref *iref,
1392 				 int refs_to_drop, int is_data)
1393 {
1394 	int ret;
1395 
1396 	BUG_ON(!is_data && refs_to_drop != 1);
1397 	if (iref) {
1398 		ret = update_inline_extent_backref(trans, root, path, iref,
1399 						   -refs_to_drop, NULL);
1400 	} else if (is_data) {
1401 		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1402 	} else {
1403 		ret = btrfs_del_item(trans, root, path);
1404 	}
1405 	return ret;
1406 }
1407 
1408 #ifdef BIO_RW_DISCARD
1409 static void btrfs_issue_discard(struct block_device *bdev,
1410 				u64 start, u64 len)
1411 {
1412 	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
1413 }
1414 #endif
1415 
1416 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1417 				u64 num_bytes)
1418 {
1419 #ifdef BIO_RW_DISCARD
1420 	int ret;
1421 	u64 map_length = num_bytes;
1422 	struct btrfs_multi_bio *multi = NULL;
1423 
1424 	/* Tell the block device(s) that the sectors can be discarded */
1425 	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1426 			      bytenr, &map_length, &multi, 0);
1427 	if (!ret) {
1428 		struct btrfs_bio_stripe *stripe = multi->stripes;
1429 		int i;
1430 
1431 		if (map_length > num_bytes)
1432 			map_length = num_bytes;
1433 
1434 		for (i = 0; i < multi->num_stripes; i++, stripe++) {
1435 			btrfs_issue_discard(stripe->dev->bdev,
1436 					    stripe->physical,
1437 					    map_length);
1438 		}
1439 		kfree(multi);
1440 	}
1441 
1442 	return ret;
1443 #else
1444 	return 0;
1445 #endif
1446 }
1447 
1448 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1449 			 struct btrfs_root *root,
1450 			 u64 bytenr, u64 num_bytes, u64 parent,
1451 			 u64 root_objectid, u64 owner, u64 offset)
1452 {
1453 	int ret;
1454 	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1455 	       root_objectid == BTRFS_TREE_LOG_OBJECTID);
1456 
1457 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1458 		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
1459 					parent, root_objectid, (int)owner,
1460 					BTRFS_ADD_DELAYED_REF, NULL);
1461 	} else {
1462 		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1463 					parent, root_objectid, owner, offset,
1464 					BTRFS_ADD_DELAYED_REF, NULL);
1465 	}
1466 	return ret;
1467 }
1468 
1469 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1470 				  struct btrfs_root *root,
1471 				  u64 bytenr, u64 num_bytes,
1472 				  u64 parent, u64 root_objectid,
1473 				  u64 owner, u64 offset, int refs_to_add,
1474 				  struct btrfs_delayed_extent_op *extent_op)
1475 {
1476 	struct btrfs_path *path;
1477 	struct extent_buffer *leaf;
1478 	struct btrfs_extent_item *item;
1479 	u64 refs;
1480 	int ret;
1481 	int err = 0;
1482 
1483 	path = btrfs_alloc_path();
1484 	if (!path)
1485 		return -ENOMEM;
1486 
1487 	path->reada = 1;
1488 	path->leave_spinning = 1;
1489 	/* this will setup the path even if it fails to insert the back ref */
1490 	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1491 					   path, bytenr, num_bytes, parent,
1492 					   root_objectid, owner, offset,
1493 					   refs_to_add, extent_op);
1494 	if (ret == 0)
1495 		goto out;
1496 
1497 	if (ret != -EAGAIN) {
1498 		err = ret;
1499 		goto out;
1500 	}
1501 
1502 	leaf = path->nodes[0];
1503 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1504 	refs = btrfs_extent_refs(leaf, item);
1505 	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1506 	if (extent_op)
1507 		__run_delayed_extent_op(extent_op, leaf, item);
1508 
1509 	btrfs_mark_buffer_dirty(leaf);
1510 	btrfs_release_path(root->fs_info->extent_root, path);
1511 
1512 	path->reada = 1;
1513 	path->leave_spinning = 1;
1514 
1515 	/* now insert the actual backref */
1516 	ret = insert_extent_backref(trans, root->fs_info->extent_root,
1517 				    path, bytenr, parent, root_objectid,
1518 				    owner, offset, refs_to_add);
1519 	BUG_ON(ret);
1520 out:
1521 	btrfs_free_path(path);
1522 	return err;
1523 }
1524 
1525 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1526 				struct btrfs_root *root,
1527 				struct btrfs_delayed_ref_node *node,
1528 				struct btrfs_delayed_extent_op *extent_op,
1529 				int insert_reserved)
1530 {
1531 	int ret = 0;
1532 	struct btrfs_delayed_data_ref *ref;
1533 	struct btrfs_key ins;
1534 	u64 parent = 0;
1535 	u64 ref_root = 0;
1536 	u64 flags = 0;
1537 
1538 	ins.objectid = node->bytenr;
1539 	ins.offset = node->num_bytes;
1540 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1541 
1542 	ref = btrfs_delayed_node_to_data_ref(node);
1543 	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1544 		parent = ref->parent;
1545 	else
1546 		ref_root = ref->root;
1547 
1548 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1549 		if (extent_op) {
1550 			BUG_ON(extent_op->update_key);
1551 			flags |= extent_op->flags_to_set;
1552 		}
1553 		ret = alloc_reserved_file_extent(trans, root,
1554 						 parent, ref_root, flags,
1555 						 ref->objectid, ref->offset,
1556 						 &ins, node->ref_mod);
1557 		update_reserved_extents(root, ins.objectid, ins.offset, 0);
1558 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
1559 		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1560 					     node->num_bytes, parent,
1561 					     ref_root, ref->objectid,
1562 					     ref->offset, node->ref_mod,
1563 					     extent_op);
1564 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
1565 		ret = __btrfs_free_extent(trans, root, node->bytenr,
1566 					  node->num_bytes, parent,
1567 					  ref_root, ref->objectid,
1568 					  ref->offset, node->ref_mod,
1569 					  extent_op);
1570 	} else {
1571 		BUG();
1572 	}
1573 	return ret;
1574 }
1575 
1576 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1577 				    struct extent_buffer *leaf,
1578 				    struct btrfs_extent_item *ei)
1579 {
1580 	u64 flags = btrfs_extent_flags(leaf, ei);
1581 	if (extent_op->update_flags) {
1582 		flags |= extent_op->flags_to_set;
1583 		btrfs_set_extent_flags(leaf, ei, flags);
1584 	}
1585 
1586 	if (extent_op->update_key) {
1587 		struct btrfs_tree_block_info *bi;
1588 		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1589 		bi = (struct btrfs_tree_block_info *)(ei + 1);
1590 		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1591 	}
1592 }
1593 
1594 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1595 				 struct btrfs_root *root,
1596 				 struct btrfs_delayed_ref_node *node,
1597 				 struct btrfs_delayed_extent_op *extent_op)
1598 {
1599 	struct btrfs_key key;
1600 	struct btrfs_path *path;
1601 	struct btrfs_extent_item *ei;
1602 	struct extent_buffer *leaf;
1603 	u32 item_size;
1604 	int ret;
1605 	int err = 0;
1606 
1607 	path = btrfs_alloc_path();
1608 	if (!path)
1609 		return -ENOMEM;
1610 
1611 	key.objectid = node->bytenr;
1612 	key.type = BTRFS_EXTENT_ITEM_KEY;
1613 	key.offset = node->num_bytes;
1614 
1615 	path->reada = 1;
1616 	path->leave_spinning = 1;
1617 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1618 				path, 0, 1);
1619 	if (ret < 0) {
1620 		err = ret;
1621 		goto out;
1622 	}
1623 	if (ret > 0) {
1624 		err = -EIO;
1625 		goto out;
1626 	}
1627 
1628 	leaf = path->nodes[0];
1629 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1630 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1631 	if (item_size < sizeof(*ei)) {
1632 		ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1633 					     path, (u64)-1, 0);
1634 		if (ret < 0) {
1635 			err = ret;
1636 			goto out;
1637 		}
1638 		leaf = path->nodes[0];
1639 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1640 	}
1641 #endif
1642 	BUG_ON(item_size < sizeof(*ei));
1643 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1644 	__run_delayed_extent_op(extent_op, leaf, ei);
1645 
1646 	btrfs_mark_buffer_dirty(leaf);
1647 out:
1648 	btrfs_free_path(path);
1649 	return err;
1650 }
1651 
1652 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1653 				struct btrfs_root *root,
1654 				struct btrfs_delayed_ref_node *node,
1655 				struct btrfs_delayed_extent_op *extent_op,
1656 				int insert_reserved)
1657 {
1658 	int ret = 0;
1659 	struct btrfs_delayed_tree_ref *ref;
1660 	struct btrfs_key ins;
1661 	u64 parent = 0;
1662 	u64 ref_root = 0;
1663 
1664 	ins.objectid = node->bytenr;
1665 	ins.offset = node->num_bytes;
1666 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1667 
1668 	ref = btrfs_delayed_node_to_tree_ref(node);
1669 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1670 		parent = ref->parent;
1671 	else
1672 		ref_root = ref->root;
1673 
1674 	BUG_ON(node->ref_mod != 1);
1675 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1676 		BUG_ON(!extent_op || !extent_op->update_flags ||
1677 		       !extent_op->update_key);
1678 		ret = alloc_reserved_tree_block(trans, root,
1679 						parent, ref_root,
1680 						extent_op->flags_to_set,
1681 						&extent_op->key,
1682 						ref->level, &ins);
1683 		update_reserved_extents(root, ins.objectid, ins.offset, 0);
1684 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
1685 		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1686 					     node->num_bytes, parent, ref_root,
1687 					     ref->level, 0, 1, extent_op);
1688 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
1689 		ret = __btrfs_free_extent(trans, root, node->bytenr,
1690 					  node->num_bytes, parent, ref_root,
1691 					  ref->level, 0, 1, extent_op);
1692 	} else {
1693 		BUG();
1694 	}
1695 	return ret;
1696 }
1697 
1698 
1699 /* helper function to actually process a single delayed ref entry */
1700 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1701 			       struct btrfs_root *root,
1702 			       struct btrfs_delayed_ref_node *node,
1703 			       struct btrfs_delayed_extent_op *extent_op,
1704 			       int insert_reserved)
1705 {
1706 	int ret;
1707 	if (btrfs_delayed_ref_is_head(node)) {
1708 		struct btrfs_delayed_ref_head *head;
1709 		/*
1710 		 * we've hit the end of the chain and we were supposed
1711 		 * to insert this extent into the tree.  But, it got
1712 		 * deleted before we ever needed to insert it, so all
1713 		 * we have to do is clean up the accounting
1714 		 */
1715 		BUG_ON(extent_op);
1716 		head = btrfs_delayed_node_to_head(node);
1717 		if (insert_reserved) {
1718 			if (head->is_data) {
1719 				ret = btrfs_del_csums(trans, root,
1720 						      node->bytenr,
1721 						      node->num_bytes);
1722 				BUG_ON(ret);
1723 			}
1724 			btrfs_update_pinned_extents(root, node->bytenr,
1725 						    node->num_bytes, 1);
1726 			update_reserved_extents(root, node->bytenr,
1727 						node->num_bytes, 0);
1728 		}
1729 		mutex_unlock(&head->mutex);
1730 		return 0;
1731 	}
1732 
1733 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1734 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1735 		ret = run_delayed_tree_ref(trans, root, node, extent_op,
1736 					   insert_reserved);
1737 	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1738 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
1739 		ret = run_delayed_data_ref(trans, root, node, extent_op,
1740 					   insert_reserved);
1741 	else
1742 		BUG();
1743 	return ret;
1744 }
1745 
1746 static noinline struct btrfs_delayed_ref_node *
1747 select_delayed_ref(struct btrfs_delayed_ref_head *head)
1748 {
1749 	struct rb_node *node;
1750 	struct btrfs_delayed_ref_node *ref;
1751 	int action = BTRFS_ADD_DELAYED_REF;
1752 again:
1753 	/*
1754 	 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
1755 	 * this prevents ref count from going down to zero when
1756 	 * there still are pending delayed ref.
1757 	 */
1758 	node = rb_prev(&head->node.rb_node);
1759 	while (1) {
1760 		if (!node)
1761 			break;
1762 		ref = rb_entry(node, struct btrfs_delayed_ref_node,
1763 				rb_node);
1764 		if (ref->bytenr != head->node.bytenr)
1765 			break;
1766 		if (ref->action == action)
1767 			return ref;
1768 		node = rb_prev(node);
1769 	}
1770 	if (action == BTRFS_ADD_DELAYED_REF) {
1771 		action = BTRFS_DROP_DELAYED_REF;
1772 		goto again;
1773 	}
1774 	return NULL;
1775 }
1776 
1777 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
1778 				       struct btrfs_root *root,
1779 				       struct list_head *cluster)
1780 {
1781 	struct btrfs_delayed_ref_root *delayed_refs;
1782 	struct btrfs_delayed_ref_node *ref;
1783 	struct btrfs_delayed_ref_head *locked_ref = NULL;
1784 	struct btrfs_delayed_extent_op *extent_op;
1785 	int ret;
1786 	int count = 0;
1787 	int must_insert_reserved = 0;
1788 
1789 	delayed_refs = &trans->transaction->delayed_refs;
1790 	while (1) {
1791 		if (!locked_ref) {
1792 			/* pick a new head ref from the cluster list */
1793 			if (list_empty(cluster))
1794 				break;
1795 
1796 			locked_ref = list_entry(cluster->next,
1797 				     struct btrfs_delayed_ref_head, cluster);
1798 
1799 			/* grab the lock that says we are going to process
1800 			 * all the refs for this head */
1801 			ret = btrfs_delayed_ref_lock(trans, locked_ref);
1802 
1803 			/*
1804 			 * we may have dropped the spin lock to get the head
1805 			 * mutex lock, and that might have given someone else
1806 			 * time to free the head.  If that's true, it has been
1807 			 * removed from our list and we can move on.
1808 			 */
1809 			if (ret == -EAGAIN) {
1810 				locked_ref = NULL;
1811 				count++;
1812 				continue;
1813 			}
1814 		}
1815 
1816 		/*
1817 		 * record the must insert reserved flag before we
1818 		 * drop the spin lock.
1819 		 */
1820 		must_insert_reserved = locked_ref->must_insert_reserved;
1821 		locked_ref->must_insert_reserved = 0;
1822 
1823 		extent_op = locked_ref->extent_op;
1824 		locked_ref->extent_op = NULL;
1825 
1826 		/*
1827 		 * locked_ref is the head node, so we have to go one
1828 		 * node back for any delayed ref updates
1829 		 */
1830 		ref = select_delayed_ref(locked_ref);
1831 		if (!ref) {
1832 			/* All delayed refs have been processed, Go ahead
1833 			 * and send the head node to run_one_delayed_ref,
1834 			 * so that any accounting fixes can happen
1835 			 */
1836 			ref = &locked_ref->node;
1837 
1838 			if (extent_op && must_insert_reserved) {
1839 				kfree(extent_op);
1840 				extent_op = NULL;
1841 			}
1842 
1843 			if (extent_op) {
1844 				spin_unlock(&delayed_refs->lock);
1845 
1846 				ret = run_delayed_extent_op(trans, root,
1847 							    ref, extent_op);
1848 				BUG_ON(ret);
1849 				kfree(extent_op);
1850 
1851 				cond_resched();
1852 				spin_lock(&delayed_refs->lock);
1853 				continue;
1854 			}
1855 
1856 			list_del_init(&locked_ref->cluster);
1857 			locked_ref = NULL;
1858 		}
1859 
1860 		ref->in_tree = 0;
1861 		rb_erase(&ref->rb_node, &delayed_refs->root);
1862 		delayed_refs->num_entries--;
1863 
1864 		spin_unlock(&delayed_refs->lock);
1865 
1866 		ret = run_one_delayed_ref(trans, root, ref, extent_op,
1867 					  must_insert_reserved);
1868 		BUG_ON(ret);
1869 
1870 		btrfs_put_delayed_ref(ref);
1871 		kfree(extent_op);
1872 		count++;
1873 
1874 		cond_resched();
1875 		spin_lock(&delayed_refs->lock);
1876 	}
1877 	return count;
1878 }
1879 
1880 /*
1881  * this starts processing the delayed reference count updates and
1882  * extent insertions we have queued up so far.  count can be
1883  * 0, which means to process everything in the tree at the start
1884  * of the run (but not newly added entries), or it can be some target
1885  * number you'd like to process.
1886  */
1887 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1888 			   struct btrfs_root *root, unsigned long count)
1889 {
1890 	struct rb_node *node;
1891 	struct btrfs_delayed_ref_root *delayed_refs;
1892 	struct btrfs_delayed_ref_node *ref;
1893 	struct list_head cluster;
1894 	int ret;
1895 	int run_all = count == (unsigned long)-1;
1896 	int run_most = 0;
1897 
1898 	if (root == root->fs_info->extent_root)
1899 		root = root->fs_info->tree_root;
1900 
1901 	delayed_refs = &trans->transaction->delayed_refs;
1902 	INIT_LIST_HEAD(&cluster);
1903 again:
1904 	spin_lock(&delayed_refs->lock);
1905 	if (count == 0) {
1906 		count = delayed_refs->num_entries * 2;
1907 		run_most = 1;
1908 	}
1909 	while (1) {
1910 		if (!(run_all || run_most) &&
1911 		    delayed_refs->num_heads_ready < 64)
1912 			break;
1913 
1914 		/*
1915 		 * go find something we can process in the rbtree.  We start at
1916 		 * the beginning of the tree, and then build a cluster
1917 		 * of refs to process starting at the first one we are able to
1918 		 * lock
1919 		 */
1920 		ret = btrfs_find_ref_cluster(trans, &cluster,
1921 					     delayed_refs->run_delayed_start);
1922 		if (ret)
1923 			break;
1924 
1925 		ret = run_clustered_refs(trans, root, &cluster);
1926 		BUG_ON(ret < 0);
1927 
1928 		count -= min_t(unsigned long, ret, count);
1929 
1930 		if (count == 0)
1931 			break;
1932 	}
1933 
1934 	if (run_all) {
1935 		node = rb_first(&delayed_refs->root);
1936 		if (!node)
1937 			goto out;
1938 		count = (unsigned long)-1;
1939 
1940 		while (node) {
1941 			ref = rb_entry(node, struct btrfs_delayed_ref_node,
1942 				       rb_node);
1943 			if (btrfs_delayed_ref_is_head(ref)) {
1944 				struct btrfs_delayed_ref_head *head;
1945 
1946 				head = btrfs_delayed_node_to_head(ref);
1947 				atomic_inc(&ref->refs);
1948 
1949 				spin_unlock(&delayed_refs->lock);
1950 				mutex_lock(&head->mutex);
1951 				mutex_unlock(&head->mutex);
1952 
1953 				btrfs_put_delayed_ref(ref);
1954 				cond_resched();
1955 				goto again;
1956 			}
1957 			node = rb_next(node);
1958 		}
1959 		spin_unlock(&delayed_refs->lock);
1960 		schedule_timeout(1);
1961 		goto again;
1962 	}
1963 out:
1964 	spin_unlock(&delayed_refs->lock);
1965 	return 0;
1966 }
1967 
1968 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1969 				struct btrfs_root *root,
1970 				u64 bytenr, u64 num_bytes, u64 flags,
1971 				int is_data)
1972 {
1973 	struct btrfs_delayed_extent_op *extent_op;
1974 	int ret;
1975 
1976 	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1977 	if (!extent_op)
1978 		return -ENOMEM;
1979 
1980 	extent_op->flags_to_set = flags;
1981 	extent_op->update_flags = 1;
1982 	extent_op->update_key = 0;
1983 	extent_op->is_data = is_data ? 1 : 0;
1984 
1985 	ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
1986 	if (ret)
1987 		kfree(extent_op);
1988 	return ret;
1989 }
1990 
1991 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
1992 				      struct btrfs_root *root,
1993 				      struct btrfs_path *path,
1994 				      u64 objectid, u64 offset, u64 bytenr)
1995 {
1996 	struct btrfs_delayed_ref_head *head;
1997 	struct btrfs_delayed_ref_node *ref;
1998 	struct btrfs_delayed_data_ref *data_ref;
1999 	struct btrfs_delayed_ref_root *delayed_refs;
2000 	struct rb_node *node;
2001 	int ret = 0;
2002 
2003 	ret = -ENOENT;
2004 	delayed_refs = &trans->transaction->delayed_refs;
2005 	spin_lock(&delayed_refs->lock);
2006 	head = btrfs_find_delayed_ref_head(trans, bytenr);
2007 	if (!head)
2008 		goto out;
2009 
2010 	if (!mutex_trylock(&head->mutex)) {
2011 		atomic_inc(&head->node.refs);
2012 		spin_unlock(&delayed_refs->lock);
2013 
2014 		btrfs_release_path(root->fs_info->extent_root, path);
2015 
2016 		mutex_lock(&head->mutex);
2017 		mutex_unlock(&head->mutex);
2018 		btrfs_put_delayed_ref(&head->node);
2019 		return -EAGAIN;
2020 	}
2021 
2022 	node = rb_prev(&head->node.rb_node);
2023 	if (!node)
2024 		goto out_unlock;
2025 
2026 	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2027 
2028 	if (ref->bytenr != bytenr)
2029 		goto out_unlock;
2030 
2031 	ret = 1;
2032 	if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2033 		goto out_unlock;
2034 
2035 	data_ref = btrfs_delayed_node_to_data_ref(ref);
2036 
2037 	node = rb_prev(node);
2038 	if (node) {
2039 		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2040 		if (ref->bytenr == bytenr)
2041 			goto out_unlock;
2042 	}
2043 
2044 	if (data_ref->root != root->root_key.objectid ||
2045 	    data_ref->objectid != objectid || data_ref->offset != offset)
2046 		goto out_unlock;
2047 
2048 	ret = 0;
2049 out_unlock:
2050 	mutex_unlock(&head->mutex);
2051 out:
2052 	spin_unlock(&delayed_refs->lock);
2053 	return ret;
2054 }
2055 
2056 static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2057 					struct btrfs_root *root,
2058 					struct btrfs_path *path,
2059 					u64 objectid, u64 offset, u64 bytenr)
2060 {
2061 	struct btrfs_root *extent_root = root->fs_info->extent_root;
2062 	struct extent_buffer *leaf;
2063 	struct btrfs_extent_data_ref *ref;
2064 	struct btrfs_extent_inline_ref *iref;
2065 	struct btrfs_extent_item *ei;
2066 	struct btrfs_key key;
2067 	u32 item_size;
2068 	int ret;
2069 
2070 	key.objectid = bytenr;
2071 	key.offset = (u64)-1;
2072 	key.type = BTRFS_EXTENT_ITEM_KEY;
2073 
2074 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2075 	if (ret < 0)
2076 		goto out;
2077 	BUG_ON(ret == 0);
2078 
2079 	ret = -ENOENT;
2080 	if (path->slots[0] == 0)
2081 		goto out;
2082 
2083 	path->slots[0]--;
2084 	leaf = path->nodes[0];
2085 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2086 
2087 	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2088 		goto out;
2089 
2090 	ret = 1;
2091 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2093 	if (item_size < sizeof(*ei)) {
2094 		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2095 		goto out;
2096 	}
2097 #endif
2098 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2099 
2100 	if (item_size != sizeof(*ei) +
2101 	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2102 		goto out;
2103 
2104 	if (btrfs_extent_generation(leaf, ei) <=
2105 	    btrfs_root_last_snapshot(&root->root_item))
2106 		goto out;
2107 
2108 	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2109 	if (btrfs_extent_inline_ref_type(leaf, iref) !=
2110 	    BTRFS_EXTENT_DATA_REF_KEY)
2111 		goto out;
2112 
2113 	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2114 	if (btrfs_extent_refs(leaf, ei) !=
2115 	    btrfs_extent_data_ref_count(leaf, ref) ||
2116 	    btrfs_extent_data_ref_root(leaf, ref) !=
2117 	    root->root_key.objectid ||
2118 	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2119 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
2120 		goto out;
2121 
2122 	ret = 0;
2123 out:
2124 	return ret;
2125 }
2126 
2127 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2128 			  struct btrfs_root *root,
2129 			  u64 objectid, u64 offset, u64 bytenr)
2130 {
2131 	struct btrfs_path *path;
2132 	int ret;
2133 	int ret2;
2134 
2135 	path = btrfs_alloc_path();
2136 	if (!path)
2137 		return -ENOENT;
2138 
2139 	do {
2140 		ret = check_committed_ref(trans, root, path, objectid,
2141 					  offset, bytenr);
2142 		if (ret && ret != -ENOENT)
2143 			goto out;
2144 
2145 		ret2 = check_delayed_ref(trans, root, path, objectid,
2146 					 offset, bytenr);
2147 	} while (ret2 == -EAGAIN);
2148 
2149 	if (ret2 && ret2 != -ENOENT) {
2150 		ret = ret2;
2151 		goto out;
2152 	}
2153 
2154 	if (ret != -ENOENT || ret2 != -ENOENT)
2155 		ret = 0;
2156 out:
2157 	btrfs_free_path(path);
2158 	return ret;
2159 }
2160 
2161 #if 0
2162 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2163 		    struct extent_buffer *buf, u32 nr_extents)
2164 {
2165 	struct btrfs_key key;
2166 	struct btrfs_file_extent_item *fi;
2167 	u64 root_gen;
2168 	u32 nritems;
2169 	int i;
2170 	int level;
2171 	int ret = 0;
2172 	int shared = 0;
2173 
2174 	if (!root->ref_cows)
2175 		return 0;
2176 
2177 	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2178 		shared = 0;
2179 		root_gen = root->root_key.offset;
2180 	} else {
2181 		shared = 1;
2182 		root_gen = trans->transid - 1;
2183 	}
2184 
2185 	level = btrfs_header_level(buf);
2186 	nritems = btrfs_header_nritems(buf);
2187 
2188 	if (level == 0) {
2189 		struct btrfs_leaf_ref *ref;
2190 		struct btrfs_extent_info *info;
2191 
2192 		ref = btrfs_alloc_leaf_ref(root, nr_extents);
2193 		if (!ref) {
2194 			ret = -ENOMEM;
2195 			goto out;
2196 		}
2197 
2198 		ref->root_gen = root_gen;
2199 		ref->bytenr = buf->start;
2200 		ref->owner = btrfs_header_owner(buf);
2201 		ref->generation = btrfs_header_generation(buf);
2202 		ref->nritems = nr_extents;
2203 		info = ref->extents;
2204 
2205 		for (i = 0; nr_extents > 0 && i < nritems; i++) {
2206 			u64 disk_bytenr;
2207 			btrfs_item_key_to_cpu(buf, &key, i);
2208 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2209 				continue;
2210 			fi = btrfs_item_ptr(buf, i,
2211 					    struct btrfs_file_extent_item);
2212 			if (btrfs_file_extent_type(buf, fi) ==
2213 			    BTRFS_FILE_EXTENT_INLINE)
2214 				continue;
2215 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2216 			if (disk_bytenr == 0)
2217 				continue;
2218 
2219 			info->bytenr = disk_bytenr;
2220 			info->num_bytes =
2221 				btrfs_file_extent_disk_num_bytes(buf, fi);
2222 			info->objectid = key.objectid;
2223 			info->offset = key.offset;
2224 			info++;
2225 		}
2226 
2227 		ret = btrfs_add_leaf_ref(root, ref, shared);
2228 		if (ret == -EEXIST && shared) {
2229 			struct btrfs_leaf_ref *old;
2230 			old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2231 			BUG_ON(!old);
2232 			btrfs_remove_leaf_ref(root, old);
2233 			btrfs_free_leaf_ref(root, old);
2234 			ret = btrfs_add_leaf_ref(root, ref, shared);
2235 		}
2236 		WARN_ON(ret);
2237 		btrfs_free_leaf_ref(root, ref);
2238 	}
2239 out:
2240 	return ret;
2241 }
2242 
2243 /* when a block goes through cow, we update the reference counts of
2244  * everything that block points to.  The internal pointers of the block
2245  * can be in just about any order, and it is likely to have clusters of
2246  * things that are close together and clusters of things that are not.
2247  *
2248  * To help reduce the seeks that come with updating all of these reference
2249  * counts, sort them by byte number before actual updates are done.
2250  *
2251  * struct refsort is used to match byte number to slot in the btree block.
2252  * we sort based on the byte number and then use the slot to actually
2253  * find the item.
2254  *
2255  * struct refsort is smaller than strcut btrfs_item and smaller than
2256  * struct btrfs_key_ptr.  Since we're currently limited to the page size
2257  * for a btree block, there's no way for a kmalloc of refsorts for a
2258  * single node to be bigger than a page.
2259  */
2260 struct refsort {
2261 	u64 bytenr;
2262 	u32 slot;
2263 };
2264 
2265 /*
2266  * for passing into sort()
2267  */
2268 static int refsort_cmp(const void *a_void, const void *b_void)
2269 {
2270 	const struct refsort *a = a_void;
2271 	const struct refsort *b = b_void;
2272 
2273 	if (a->bytenr < b->bytenr)
2274 		return -1;
2275 	if (a->bytenr > b->bytenr)
2276 		return 1;
2277 	return 0;
2278 }
2279 #endif
2280 
2281 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2282 			   struct btrfs_root *root,
2283 			   struct extent_buffer *buf,
2284 			   int full_backref, int inc)
2285 {
2286 	u64 bytenr;
2287 	u64 num_bytes;
2288 	u64 parent;
2289 	u64 ref_root;
2290 	u32 nritems;
2291 	struct btrfs_key key;
2292 	struct btrfs_file_extent_item *fi;
2293 	int i;
2294 	int level;
2295 	int ret = 0;
2296 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
2297 			    u64, u64, u64, u64, u64, u64);
2298 
2299 	ref_root = btrfs_header_owner(buf);
2300 	nritems = btrfs_header_nritems(buf);
2301 	level = btrfs_header_level(buf);
2302 
2303 	if (!root->ref_cows && level == 0)
2304 		return 0;
2305 
2306 	if (inc)
2307 		process_func = btrfs_inc_extent_ref;
2308 	else
2309 		process_func = btrfs_free_extent;
2310 
2311 	if (full_backref)
2312 		parent = buf->start;
2313 	else
2314 		parent = 0;
2315 
2316 	for (i = 0; i < nritems; i++) {
2317 		if (level == 0) {
2318 			btrfs_item_key_to_cpu(buf, &key, i);
2319 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2320 				continue;
2321 			fi = btrfs_item_ptr(buf, i,
2322 					    struct btrfs_file_extent_item);
2323 			if (btrfs_file_extent_type(buf, fi) ==
2324 			    BTRFS_FILE_EXTENT_INLINE)
2325 				continue;
2326 			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2327 			if (bytenr == 0)
2328 				continue;
2329 
2330 			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2331 			key.offset -= btrfs_file_extent_offset(buf, fi);
2332 			ret = process_func(trans, root, bytenr, num_bytes,
2333 					   parent, ref_root, key.objectid,
2334 					   key.offset);
2335 			if (ret)
2336 				goto fail;
2337 		} else {
2338 			bytenr = btrfs_node_blockptr(buf, i);
2339 			num_bytes = btrfs_level_size(root, level - 1);
2340 			ret = process_func(trans, root, bytenr, num_bytes,
2341 					   parent, ref_root, level - 1, 0);
2342 			if (ret)
2343 				goto fail;
2344 		}
2345 	}
2346 	return 0;
2347 fail:
2348 	BUG();
2349 	return ret;
2350 }
2351 
2352 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2353 		  struct extent_buffer *buf, int full_backref)
2354 {
2355 	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2356 }
2357 
2358 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2359 		  struct extent_buffer *buf, int full_backref)
2360 {
2361 	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2362 }
2363 
2364 static int write_one_cache_group(struct btrfs_trans_handle *trans,
2365 				 struct btrfs_root *root,
2366 				 struct btrfs_path *path,
2367 				 struct btrfs_block_group_cache *cache)
2368 {
2369 	int ret;
2370 	struct btrfs_root *extent_root = root->fs_info->extent_root;
2371 	unsigned long bi;
2372 	struct extent_buffer *leaf;
2373 
2374 	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
2375 	if (ret < 0)
2376 		goto fail;
2377 	BUG_ON(ret);
2378 
2379 	leaf = path->nodes[0];
2380 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2381 	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2382 	btrfs_mark_buffer_dirty(leaf);
2383 	btrfs_release_path(extent_root, path);
2384 fail:
2385 	if (ret)
2386 		return ret;
2387 	return 0;
2388 
2389 }
2390 
2391 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2392 				   struct btrfs_root *root)
2393 {
2394 	struct btrfs_block_group_cache *cache, *entry;
2395 	struct rb_node *n;
2396 	int err = 0;
2397 	int werr = 0;
2398 	struct btrfs_path *path;
2399 	u64 last = 0;
2400 
2401 	path = btrfs_alloc_path();
2402 	if (!path)
2403 		return -ENOMEM;
2404 
2405 	while (1) {
2406 		cache = NULL;
2407 		spin_lock(&root->fs_info->block_group_cache_lock);
2408 		for (n = rb_first(&root->fs_info->block_group_cache_tree);
2409 		     n; n = rb_next(n)) {
2410 			entry = rb_entry(n, struct btrfs_block_group_cache,
2411 					 cache_node);
2412 			if (entry->dirty) {
2413 				cache = entry;
2414 				break;
2415 			}
2416 		}
2417 		spin_unlock(&root->fs_info->block_group_cache_lock);
2418 
2419 		if (!cache)
2420 			break;
2421 
2422 		cache->dirty = 0;
2423 		last += cache->key.offset;
2424 
2425 		err = write_one_cache_group(trans, root,
2426 					    path, cache);
2427 		/*
2428 		 * if we fail to write the cache group, we want
2429 		 * to keep it marked dirty in hopes that a later
2430 		 * write will work
2431 		 */
2432 		if (err) {
2433 			werr = err;
2434 			continue;
2435 		}
2436 	}
2437 	btrfs_free_path(path);
2438 	return werr;
2439 }
2440 
2441 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
2442 {
2443 	struct btrfs_block_group_cache *block_group;
2444 	int readonly = 0;
2445 
2446 	block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
2447 	if (!block_group || block_group->ro)
2448 		readonly = 1;
2449 	if (block_group)
2450 		btrfs_put_block_group(block_group);
2451 	return readonly;
2452 }
2453 
2454 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2455 			     u64 total_bytes, u64 bytes_used,
2456 			     struct btrfs_space_info **space_info)
2457 {
2458 	struct btrfs_space_info *found;
2459 
2460 	found = __find_space_info(info, flags);
2461 	if (found) {
2462 		spin_lock(&found->lock);
2463 		found->total_bytes += total_bytes;
2464 		found->bytes_used += bytes_used;
2465 		found->full = 0;
2466 		spin_unlock(&found->lock);
2467 		*space_info = found;
2468 		return 0;
2469 	}
2470 	found = kzalloc(sizeof(*found), GFP_NOFS);
2471 	if (!found)
2472 		return -ENOMEM;
2473 
2474 	INIT_LIST_HEAD(&found->block_groups);
2475 	init_rwsem(&found->groups_sem);
2476 	spin_lock_init(&found->lock);
2477 	found->flags = flags;
2478 	found->total_bytes = total_bytes;
2479 	found->bytes_used = bytes_used;
2480 	found->bytes_pinned = 0;
2481 	found->bytes_reserved = 0;
2482 	found->bytes_readonly = 0;
2483 	found->bytes_delalloc = 0;
2484 	found->full = 0;
2485 	found->force_alloc = 0;
2486 	*space_info = found;
2487 	list_add_rcu(&found->list, &info->space_info);
2488 	return 0;
2489 }
2490 
2491 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2492 {
2493 	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
2494 				   BTRFS_BLOCK_GROUP_RAID1 |
2495 				   BTRFS_BLOCK_GROUP_RAID10 |
2496 				   BTRFS_BLOCK_GROUP_DUP);
2497 	if (extra_flags) {
2498 		if (flags & BTRFS_BLOCK_GROUP_DATA)
2499 			fs_info->avail_data_alloc_bits |= extra_flags;
2500 		if (flags & BTRFS_BLOCK_GROUP_METADATA)
2501 			fs_info->avail_metadata_alloc_bits |= extra_flags;
2502 		if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
2503 			fs_info->avail_system_alloc_bits |= extra_flags;
2504 	}
2505 }
2506 
2507 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
2508 {
2509 	spin_lock(&cache->space_info->lock);
2510 	spin_lock(&cache->lock);
2511 	if (!cache->ro) {
2512 		cache->space_info->bytes_readonly += cache->key.offset -
2513 					btrfs_block_group_used(&cache->item);
2514 		cache->ro = 1;
2515 	}
2516 	spin_unlock(&cache->lock);
2517 	spin_unlock(&cache->space_info->lock);
2518 }
2519 
2520 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2521 {
2522 	u64 num_devices = root->fs_info->fs_devices->rw_devices;
2523 
2524 	if (num_devices == 1)
2525 		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
2526 	if (num_devices < 4)
2527 		flags &= ~BTRFS_BLOCK_GROUP_RAID10;
2528 
2529 	if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
2530 	    (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2531 		      BTRFS_BLOCK_GROUP_RAID10))) {
2532 		flags &= ~BTRFS_BLOCK_GROUP_DUP;
2533 	}
2534 
2535 	if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
2536 	    (flags & BTRFS_BLOCK_GROUP_RAID10)) {
2537 		flags &= ~BTRFS_BLOCK_GROUP_RAID1;
2538 	}
2539 
2540 	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
2541 	    ((flags & BTRFS_BLOCK_GROUP_RAID1) |
2542 	     (flags & BTRFS_BLOCK_GROUP_RAID10) |
2543 	     (flags & BTRFS_BLOCK_GROUP_DUP)))
2544 		flags &= ~BTRFS_BLOCK_GROUP_RAID0;
2545 	return flags;
2546 }
2547 
2548 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
2549 {
2550 	struct btrfs_fs_info *info = root->fs_info;
2551 	u64 alloc_profile;
2552 
2553 	if (data) {
2554 		alloc_profile = info->avail_data_alloc_bits &
2555 			info->data_alloc_profile;
2556 		data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2557 	} else if (root == root->fs_info->chunk_root) {
2558 		alloc_profile = info->avail_system_alloc_bits &
2559 			info->system_alloc_profile;
2560 		data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2561 	} else {
2562 		alloc_profile = info->avail_metadata_alloc_bits &
2563 			info->metadata_alloc_profile;
2564 		data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2565 	}
2566 
2567 	return btrfs_reduce_alloc_profile(root, data);
2568 }
2569 
2570 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2571 {
2572 	u64 alloc_target;
2573 
2574 	alloc_target = btrfs_get_alloc_profile(root, 1);
2575 	BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2576 						       alloc_target);
2577 }
2578 
2579 /*
2580  * for now this just makes sure we have at least 5% of our metadata space free
2581  * for use.
2582  */
2583 int btrfs_check_metadata_free_space(struct btrfs_root *root)
2584 {
2585 	struct btrfs_fs_info *info = root->fs_info;
2586 	struct btrfs_space_info *meta_sinfo;
2587 	u64 alloc_target, thresh;
2588 	int committed = 0, ret;
2589 
2590 	/* get the space info for where the metadata will live */
2591 	alloc_target = btrfs_get_alloc_profile(root, 0);
2592 	meta_sinfo = __find_space_info(info, alloc_target);
2593 
2594 again:
2595 	spin_lock(&meta_sinfo->lock);
2596 	if (!meta_sinfo->full)
2597 		thresh = meta_sinfo->total_bytes * 80;
2598 	else
2599 		thresh = meta_sinfo->total_bytes * 95;
2600 
2601 	do_div(thresh, 100);
2602 
2603 	if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2604 	    meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
2605 		struct btrfs_trans_handle *trans;
2606 		if (!meta_sinfo->full) {
2607 			meta_sinfo->force_alloc = 1;
2608 			spin_unlock(&meta_sinfo->lock);
2609 
2610 			trans = btrfs_start_transaction(root, 1);
2611 			if (!trans)
2612 				return -ENOMEM;
2613 
2614 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2615 					     2 * 1024 * 1024, alloc_target, 0);
2616 			btrfs_end_transaction(trans, root);
2617 			goto again;
2618 		}
2619 		spin_unlock(&meta_sinfo->lock);
2620 
2621 		if (!committed) {
2622 			committed = 1;
2623 			trans = btrfs_join_transaction(root, 1);
2624 			if (!trans)
2625 				return -ENOMEM;
2626 			ret = btrfs_commit_transaction(trans, root);
2627 			if (ret)
2628 				return ret;
2629 			goto again;
2630 		}
2631 		return -ENOSPC;
2632 	}
2633 	spin_unlock(&meta_sinfo->lock);
2634 
2635 	return 0;
2636 }
2637 
2638 /*
2639  * This will check the space that the inode allocates from to make sure we have
2640  * enough space for bytes.
2641  */
2642 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2643 				u64 bytes)
2644 {
2645 	struct btrfs_space_info *data_sinfo;
2646 	int ret = 0, committed = 0;
2647 
2648 	/* make sure bytes are sectorsize aligned */
2649 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2650 
2651 	data_sinfo = BTRFS_I(inode)->space_info;
2652 again:
2653 	/* make sure we have enough space to handle the data first */
2654 	spin_lock(&data_sinfo->lock);
2655 	if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2656 	    data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2657 	    data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2658 	    data_sinfo->bytes_may_use < bytes) {
2659 		struct btrfs_trans_handle *trans;
2660 
2661 		/*
2662 		 * if we don't have enough free bytes in this space then we need
2663 		 * to alloc a new chunk.
2664 		 */
2665 		if (!data_sinfo->full) {
2666 			u64 alloc_target;
2667 
2668 			data_sinfo->force_alloc = 1;
2669 			spin_unlock(&data_sinfo->lock);
2670 
2671 			alloc_target = btrfs_get_alloc_profile(root, 1);
2672 			trans = btrfs_start_transaction(root, 1);
2673 			if (!trans)
2674 				return -ENOMEM;
2675 
2676 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2677 					     bytes + 2 * 1024 * 1024,
2678 					     alloc_target, 0);
2679 			btrfs_end_transaction(trans, root);
2680 			if (ret)
2681 				return ret;
2682 			goto again;
2683 		}
2684 		spin_unlock(&data_sinfo->lock);
2685 
2686 		/* commit the current transaction and try again */
2687 		if (!committed) {
2688 			committed = 1;
2689 			trans = btrfs_join_transaction(root, 1);
2690 			if (!trans)
2691 				return -ENOMEM;
2692 			ret = btrfs_commit_transaction(trans, root);
2693 			if (ret)
2694 				return ret;
2695 			goto again;
2696 		}
2697 
2698 		printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699 		       ", %llu bytes_used, %llu bytes_reserved, "
2700 		       "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
2701 		       "%llu total\n", (unsigned long long)bytes,
2702 		       (unsigned long long)data_sinfo->bytes_delalloc,
2703 		       (unsigned long long)data_sinfo->bytes_used,
2704 		       (unsigned long long)data_sinfo->bytes_reserved,
2705 		       (unsigned long long)data_sinfo->bytes_pinned,
2706 		       (unsigned long long)data_sinfo->bytes_readonly,
2707 		       (unsigned long long)data_sinfo->bytes_may_use,
2708 		       (unsigned long long)data_sinfo->total_bytes);
2709 		return -ENOSPC;
2710 	}
2711 	data_sinfo->bytes_may_use += bytes;
2712 	BTRFS_I(inode)->reserved_bytes += bytes;
2713 	spin_unlock(&data_sinfo->lock);
2714 
2715 	return btrfs_check_metadata_free_space(root);
2716 }
2717 
2718 /*
2719  * if there was an error for whatever reason after calling
2720  * btrfs_check_data_free_space, call this so we can cleanup the counters.
2721  */
2722 void btrfs_free_reserved_data_space(struct btrfs_root *root,
2723 				    struct inode *inode, u64 bytes)
2724 {
2725 	struct btrfs_space_info *data_sinfo;
2726 
2727 	/* make sure bytes are sectorsize aligned */
2728 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2729 
2730 	data_sinfo = BTRFS_I(inode)->space_info;
2731 	spin_lock(&data_sinfo->lock);
2732 	data_sinfo->bytes_may_use -= bytes;
2733 	BTRFS_I(inode)->reserved_bytes -= bytes;
2734 	spin_unlock(&data_sinfo->lock);
2735 }
2736 
2737 /* called when we are adding a delalloc extent to the inode's io_tree */
2738 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2739 				  u64 bytes)
2740 {
2741 	struct btrfs_space_info *data_sinfo;
2742 
2743 	/* get the space info for where this inode will be storing its data */
2744 	data_sinfo = BTRFS_I(inode)->space_info;
2745 
2746 	/* make sure we have enough space to handle the data first */
2747 	spin_lock(&data_sinfo->lock);
2748 	data_sinfo->bytes_delalloc += bytes;
2749 
2750 	/*
2751 	 * we are adding a delalloc extent without calling
2752 	 * btrfs_check_data_free_space first.  This happens on a weird
2753 	 * writepage condition, but shouldn't hurt our accounting
2754 	 */
2755 	if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
2756 		data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
2757 		BTRFS_I(inode)->reserved_bytes = 0;
2758 	} else {
2759 		data_sinfo->bytes_may_use -= bytes;
2760 		BTRFS_I(inode)->reserved_bytes -= bytes;
2761 	}
2762 
2763 	spin_unlock(&data_sinfo->lock);
2764 }
2765 
2766 /* called when we are clearing an delalloc extent from the inode's io_tree */
2767 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2768 			      u64 bytes)
2769 {
2770 	struct btrfs_space_info *info;
2771 
2772 	info = BTRFS_I(inode)->space_info;
2773 
2774 	spin_lock(&info->lock);
2775 	info->bytes_delalloc -= bytes;
2776 	spin_unlock(&info->lock);
2777 }
2778 
2779 static void force_metadata_allocation(struct btrfs_fs_info *info)
2780 {
2781 	struct list_head *head = &info->space_info;
2782 	struct btrfs_space_info *found;
2783 
2784 	rcu_read_lock();
2785 	list_for_each_entry_rcu(found, head, list) {
2786 		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
2787 			found->force_alloc = 1;
2788 	}
2789 	rcu_read_unlock();
2790 }
2791 
2792 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
2793 			  struct btrfs_root *extent_root, u64 alloc_bytes,
2794 			  u64 flags, int force)
2795 {
2796 	struct btrfs_space_info *space_info;
2797 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
2798 	u64 thresh;
2799 	int ret = 0;
2800 
2801 	mutex_lock(&fs_info->chunk_mutex);
2802 
2803 	flags = btrfs_reduce_alloc_profile(extent_root, flags);
2804 
2805 	space_info = __find_space_info(extent_root->fs_info, flags);
2806 	if (!space_info) {
2807 		ret = update_space_info(extent_root->fs_info, flags,
2808 					0, 0, &space_info);
2809 		BUG_ON(ret);
2810 	}
2811 	BUG_ON(!space_info);
2812 
2813 	spin_lock(&space_info->lock);
2814 	if (space_info->force_alloc) {
2815 		force = 1;
2816 		space_info->force_alloc = 0;
2817 	}
2818 	if (space_info->full) {
2819 		spin_unlock(&space_info->lock);
2820 		goto out;
2821 	}
2822 
2823 	thresh = space_info->total_bytes - space_info->bytes_readonly;
2824 	thresh = div_factor(thresh, 6);
2825 	if (!force &&
2826 	   (space_info->bytes_used + space_info->bytes_pinned +
2827 	    space_info->bytes_reserved + alloc_bytes) < thresh) {
2828 		spin_unlock(&space_info->lock);
2829 		goto out;
2830 	}
2831 	spin_unlock(&space_info->lock);
2832 
2833 	/*
2834 	 * if we're doing a data chunk, go ahead and make sure that
2835 	 * we keep a reasonable number of metadata chunks allocated in the
2836 	 * FS as well.
2837 	 */
2838 	if (flags & BTRFS_BLOCK_GROUP_DATA) {
2839 		fs_info->data_chunk_allocations++;
2840 		if (!(fs_info->data_chunk_allocations %
2841 		      fs_info->metadata_ratio))
2842 			force_metadata_allocation(fs_info);
2843 	}
2844 
2845 	ret = btrfs_alloc_chunk(trans, extent_root, flags);
2846 	if (ret)
2847 		space_info->full = 1;
2848 out:
2849 	mutex_unlock(&extent_root->fs_info->chunk_mutex);
2850 	return ret;
2851 }
2852 
2853 static int update_block_group(struct btrfs_trans_handle *trans,
2854 			      struct btrfs_root *root,
2855 			      u64 bytenr, u64 num_bytes, int alloc,
2856 			      int mark_free)
2857 {
2858 	struct btrfs_block_group_cache *cache;
2859 	struct btrfs_fs_info *info = root->fs_info;
2860 	u64 total = num_bytes;
2861 	u64 old_val;
2862 	u64 byte_in_group;
2863 
2864 	/* block accounting for super block */
2865 	spin_lock(&info->delalloc_lock);
2866 	old_val = btrfs_super_bytes_used(&info->super_copy);
2867 	if (alloc)
2868 		old_val += num_bytes;
2869 	else
2870 		old_val -= num_bytes;
2871 	btrfs_set_super_bytes_used(&info->super_copy, old_val);
2872 
2873 	/* block accounting for root item */
2874 	old_val = btrfs_root_used(&root->root_item);
2875 	if (alloc)
2876 		old_val += num_bytes;
2877 	else
2878 		old_val -= num_bytes;
2879 	btrfs_set_root_used(&root->root_item, old_val);
2880 	spin_unlock(&info->delalloc_lock);
2881 
2882 	while (total) {
2883 		cache = btrfs_lookup_block_group(info, bytenr);
2884 		if (!cache)
2885 			return -1;
2886 		byte_in_group = bytenr - cache->key.objectid;
2887 		WARN_ON(byte_in_group > cache->key.offset);
2888 
2889 		spin_lock(&cache->space_info->lock);
2890 		spin_lock(&cache->lock);
2891 		cache->dirty = 1;
2892 		old_val = btrfs_block_group_used(&cache->item);
2893 		num_bytes = min(total, cache->key.offset - byte_in_group);
2894 		if (alloc) {
2895 			old_val += num_bytes;
2896 			cache->space_info->bytes_used += num_bytes;
2897 			if (cache->ro)
2898 				cache->space_info->bytes_readonly -= num_bytes;
2899 			btrfs_set_block_group_used(&cache->item, old_val);
2900 			spin_unlock(&cache->lock);
2901 			spin_unlock(&cache->space_info->lock);
2902 		} else {
2903 			old_val -= num_bytes;
2904 			cache->space_info->bytes_used -= num_bytes;
2905 			if (cache->ro)
2906 				cache->space_info->bytes_readonly += num_bytes;
2907 			btrfs_set_block_group_used(&cache->item, old_val);
2908 			spin_unlock(&cache->lock);
2909 			spin_unlock(&cache->space_info->lock);
2910 			if (mark_free) {
2911 				int ret;
2912 
2913 				ret = btrfs_discard_extent(root, bytenr,
2914 							   num_bytes);
2915 				WARN_ON(ret);
2916 
2917 				ret = btrfs_add_free_space(cache, bytenr,
2918 							   num_bytes);
2919 				WARN_ON(ret);
2920 			}
2921 		}
2922 		btrfs_put_block_group(cache);
2923 		total -= num_bytes;
2924 		bytenr += num_bytes;
2925 	}
2926 	return 0;
2927 }
2928 
2929 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2930 {
2931 	struct btrfs_block_group_cache *cache;
2932 	u64 bytenr;
2933 
2934 	cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2935 	if (!cache)
2936 		return 0;
2937 
2938 	bytenr = cache->key.objectid;
2939 	btrfs_put_block_group(cache);
2940 
2941 	return bytenr;
2942 }
2943 
2944 int btrfs_update_pinned_extents(struct btrfs_root *root,
2945 				u64 bytenr, u64 num, int pin)
2946 {
2947 	u64 len;
2948 	struct btrfs_block_group_cache *cache;
2949 	struct btrfs_fs_info *fs_info = root->fs_info;
2950 
2951 	if (pin) {
2952 		set_extent_dirty(&fs_info->pinned_extents,
2953 				bytenr, bytenr + num - 1, GFP_NOFS);
2954 	} else {
2955 		clear_extent_dirty(&fs_info->pinned_extents,
2956 				bytenr, bytenr + num - 1, GFP_NOFS);
2957 	}
2958 
2959 	while (num > 0) {
2960 		cache = btrfs_lookup_block_group(fs_info, bytenr);
2961 		BUG_ON(!cache);
2962 		len = min(num, cache->key.offset -
2963 			  (bytenr - cache->key.objectid));
2964 		if (pin) {
2965 			spin_lock(&cache->space_info->lock);
2966 			spin_lock(&cache->lock);
2967 			cache->pinned += len;
2968 			cache->space_info->bytes_pinned += len;
2969 			spin_unlock(&cache->lock);
2970 			spin_unlock(&cache->space_info->lock);
2971 			fs_info->total_pinned += len;
2972 		} else {
2973 			spin_lock(&cache->space_info->lock);
2974 			spin_lock(&cache->lock);
2975 			cache->pinned -= len;
2976 			cache->space_info->bytes_pinned -= len;
2977 			spin_unlock(&cache->lock);
2978 			spin_unlock(&cache->space_info->lock);
2979 			fs_info->total_pinned -= len;
2980 			if (cache->cached)
2981 				btrfs_add_free_space(cache, bytenr, len);
2982 		}
2983 		btrfs_put_block_group(cache);
2984 		bytenr += len;
2985 		num -= len;
2986 	}
2987 	return 0;
2988 }
2989 
2990 static int update_reserved_extents(struct btrfs_root *root,
2991 				   u64 bytenr, u64 num, int reserve)
2992 {
2993 	u64 len;
2994 	struct btrfs_block_group_cache *cache;
2995 	struct btrfs_fs_info *fs_info = root->fs_info;
2996 
2997 	while (num > 0) {
2998 		cache = btrfs_lookup_block_group(fs_info, bytenr);
2999 		BUG_ON(!cache);
3000 		len = min(num, cache->key.offset -
3001 			  (bytenr - cache->key.objectid));
3002 
3003 		spin_lock(&cache->space_info->lock);
3004 		spin_lock(&cache->lock);
3005 		if (reserve) {
3006 			cache->reserved += len;
3007 			cache->space_info->bytes_reserved += len;
3008 		} else {
3009 			cache->reserved -= len;
3010 			cache->space_info->bytes_reserved -= len;
3011 		}
3012 		spin_unlock(&cache->lock);
3013 		spin_unlock(&cache->space_info->lock);
3014 		btrfs_put_block_group(cache);
3015 		bytenr += len;
3016 		num -= len;
3017 	}
3018 	return 0;
3019 }
3020 
3021 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3022 {
3023 	u64 last = 0;
3024 	u64 start;
3025 	u64 end;
3026 	struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3027 	int ret;
3028 
3029 	while (1) {
3030 		ret = find_first_extent_bit(pinned_extents, last,
3031 					    &start, &end, EXTENT_DIRTY);
3032 		if (ret)
3033 			break;
3034 		set_extent_dirty(copy, start, end, GFP_NOFS);
3035 		last = end + 1;
3036 	}
3037 	return 0;
3038 }
3039 
3040 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3041 			       struct btrfs_root *root,
3042 			       struct extent_io_tree *unpin)
3043 {
3044 	u64 start;
3045 	u64 end;
3046 	int ret;
3047 
3048 	while (1) {
3049 		ret = find_first_extent_bit(unpin, 0, &start, &end,
3050 					    EXTENT_DIRTY);
3051 		if (ret)
3052 			break;
3053 
3054 		ret = btrfs_discard_extent(root, start, end + 1 - start);
3055 
3056 		/* unlocks the pinned mutex */
3057 		btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3058 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
3059 
3060 		cond_resched();
3061 	}
3062 	return ret;
3063 }
3064 
3065 static int pin_down_bytes(struct btrfs_trans_handle *trans,
3066 			  struct btrfs_root *root,
3067 			  struct btrfs_path *path,
3068 			  u64 bytenr, u64 num_bytes, int is_data,
3069 			  struct extent_buffer **must_clean)
3070 {
3071 	int err = 0;
3072 	struct extent_buffer *buf;
3073 
3074 	if (is_data)
3075 		goto pinit;
3076 
3077 	buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3078 	if (!buf)
3079 		goto pinit;
3080 
3081 	/* we can reuse a block if it hasn't been written
3082 	 * and it is from this transaction.  We can't
3083 	 * reuse anything from the tree log root because
3084 	 * it has tiny sub-transactions.
3085 	 */
3086 	if (btrfs_buffer_uptodate(buf, 0) &&
3087 	    btrfs_try_tree_lock(buf)) {
3088 		u64 header_owner = btrfs_header_owner(buf);
3089 		u64 header_transid = btrfs_header_generation(buf);
3090 		if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
3091 		    header_transid == trans->transid &&
3092 		    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3093 			*must_clean = buf;
3094 			return 1;
3095 		}
3096 		btrfs_tree_unlock(buf);
3097 	}
3098 	free_extent_buffer(buf);
3099 pinit:
3100 	btrfs_set_path_blocking(path);
3101 	/* unlocks the pinned mutex */
3102 	btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3103 
3104 	BUG_ON(err < 0);
3105 	return 0;
3106 }
3107 
3108 
3109 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3110 				struct btrfs_root *root,
3111 				u64 bytenr, u64 num_bytes, u64 parent,
3112 				u64 root_objectid, u64 owner_objectid,
3113 				u64 owner_offset, int refs_to_drop,
3114 				struct btrfs_delayed_extent_op *extent_op)
3115 {
3116 	struct btrfs_key key;
3117 	struct btrfs_path *path;
3118 	struct btrfs_fs_info *info = root->fs_info;
3119 	struct btrfs_root *extent_root = info->extent_root;
3120 	struct extent_buffer *leaf;
3121 	struct btrfs_extent_item *ei;
3122 	struct btrfs_extent_inline_ref *iref;
3123 	int ret;
3124 	int is_data;
3125 	int extent_slot = 0;
3126 	int found_extent = 0;
3127 	int num_to_del = 1;
3128 	u32 item_size;
3129 	u64 refs;
3130 
3131 	path = btrfs_alloc_path();
3132 	if (!path)
3133 		return -ENOMEM;
3134 
3135 	path->reada = 1;
3136 	path->leave_spinning = 1;
3137 
3138 	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3139 	BUG_ON(!is_data && refs_to_drop != 1);
3140 
3141 	ret = lookup_extent_backref(trans, extent_root, path, &iref,
3142 				    bytenr, num_bytes, parent,
3143 				    root_objectid, owner_objectid,
3144 				    owner_offset);
3145 	if (ret == 0) {
3146 		extent_slot = path->slots[0];
3147 		while (extent_slot >= 0) {
3148 			btrfs_item_key_to_cpu(path->nodes[0], &key,
3149 					      extent_slot);
3150 			if (key.objectid != bytenr)
3151 				break;
3152 			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3153 			    key.offset == num_bytes) {
3154 				found_extent = 1;
3155 				break;
3156 			}
3157 			if (path->slots[0] - extent_slot > 5)
3158 				break;
3159 			extent_slot--;
3160 		}
3161 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3162 		item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3163 		if (found_extent && item_size < sizeof(*ei))
3164 			found_extent = 0;
3165 #endif
3166 		if (!found_extent) {
3167 			BUG_ON(iref);
3168 			ret = remove_extent_backref(trans, extent_root, path,
3169 						    NULL, refs_to_drop,
3170 						    is_data);
3171 			BUG_ON(ret);
3172 			btrfs_release_path(extent_root, path);
3173 			path->leave_spinning = 1;
3174 
3175 			key.objectid = bytenr;
3176 			key.type = BTRFS_EXTENT_ITEM_KEY;
3177 			key.offset = num_bytes;
3178 
3179 			ret = btrfs_search_slot(trans, extent_root,
3180 						&key, path, -1, 1);
3181 			if (ret) {
3182 				printk(KERN_ERR "umm, got %d back from search"
3183 				       ", was looking for %llu\n", ret,
3184 				       (unsigned long long)bytenr);
3185 				btrfs_print_leaf(extent_root, path->nodes[0]);
3186 			}
3187 			BUG_ON(ret);
3188 			extent_slot = path->slots[0];
3189 		}
3190 	} else {
3191 		btrfs_print_leaf(extent_root, path->nodes[0]);
3192 		WARN_ON(1);
3193 		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
3194 		       "parent %llu root %llu  owner %llu offset %llu\n",
3195 		       (unsigned long long)bytenr,
3196 		       (unsigned long long)parent,
3197 		       (unsigned long long)root_objectid,
3198 		       (unsigned long long)owner_objectid,
3199 		       (unsigned long long)owner_offset);
3200 	}
3201 
3202 	leaf = path->nodes[0];
3203 	item_size = btrfs_item_size_nr(leaf, extent_slot);
3204 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3205 	if (item_size < sizeof(*ei)) {
3206 		BUG_ON(found_extent || extent_slot != path->slots[0]);
3207 		ret = convert_extent_item_v0(trans, extent_root, path,
3208 					     owner_objectid, 0);
3209 		BUG_ON(ret < 0);
3210 
3211 		btrfs_release_path(extent_root, path);
3212 		path->leave_spinning = 1;
3213 
3214 		key.objectid = bytenr;
3215 		key.type = BTRFS_EXTENT_ITEM_KEY;
3216 		key.offset = num_bytes;
3217 
3218 		ret = btrfs_search_slot(trans, extent_root, &key, path,
3219 					-1, 1);
3220 		if (ret) {
3221 			printk(KERN_ERR "umm, got %d back from search"
3222 			       ", was looking for %llu\n", ret,
3223 			       (unsigned long long)bytenr);
3224 			btrfs_print_leaf(extent_root, path->nodes[0]);
3225 		}
3226 		BUG_ON(ret);
3227 		extent_slot = path->slots[0];
3228 		leaf = path->nodes[0];
3229 		item_size = btrfs_item_size_nr(leaf, extent_slot);
3230 	}
3231 #endif
3232 	BUG_ON(item_size < sizeof(*ei));
3233 	ei = btrfs_item_ptr(leaf, extent_slot,
3234 			    struct btrfs_extent_item);
3235 	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3236 		struct btrfs_tree_block_info *bi;
3237 		BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3238 		bi = (struct btrfs_tree_block_info *)(ei + 1);
3239 		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3240 	}
3241 
3242 	refs = btrfs_extent_refs(leaf, ei);
3243 	BUG_ON(refs < refs_to_drop);
3244 	refs -= refs_to_drop;
3245 
3246 	if (refs > 0) {
3247 		if (extent_op)
3248 			__run_delayed_extent_op(extent_op, leaf, ei);
3249 		/*
3250 		 * In the case of inline back ref, reference count will
3251 		 * be updated by remove_extent_backref
3252 		 */
3253 		if (iref) {
3254 			BUG_ON(!found_extent);
3255 		} else {
3256 			btrfs_set_extent_refs(leaf, ei, refs);
3257 			btrfs_mark_buffer_dirty(leaf);
3258 		}
3259 		if (found_extent) {
3260 			ret = remove_extent_backref(trans, extent_root, path,
3261 						    iref, refs_to_drop,
3262 						    is_data);
3263 			BUG_ON(ret);
3264 		}
3265 	} else {
3266 		int mark_free = 0;
3267 		struct extent_buffer *must_clean = NULL;
3268 
3269 		if (found_extent) {
3270 			BUG_ON(is_data && refs_to_drop !=
3271 			       extent_data_ref_count(root, path, iref));
3272 			if (iref) {
3273 				BUG_ON(path->slots[0] != extent_slot);
3274 			} else {
3275 				BUG_ON(path->slots[0] != extent_slot + 1);
3276 				path->slots[0] = extent_slot;
3277 				num_to_del = 2;
3278 			}
3279 		}
3280 
3281 		ret = pin_down_bytes(trans, root, path, bytenr,
3282 				     num_bytes, is_data, &must_clean);
3283 		if (ret > 0)
3284 			mark_free = 1;
3285 		BUG_ON(ret < 0);
3286 		/*
3287 		 * it is going to be very rare for someone to be waiting
3288 		 * on the block we're freeing.  del_items might need to
3289 		 * schedule, so rather than get fancy, just force it
3290 		 * to blocking here
3291 		 */
3292 		if (must_clean)
3293 			btrfs_set_lock_blocking(must_clean);
3294 
3295 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3296 				      num_to_del);
3297 		BUG_ON(ret);
3298 		btrfs_release_path(extent_root, path);
3299 
3300 		if (must_clean) {
3301 			clean_tree_block(NULL, root, must_clean);
3302 			btrfs_tree_unlock(must_clean);
3303 			free_extent_buffer(must_clean);
3304 		}
3305 
3306 		if (is_data) {
3307 			ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
3308 			BUG_ON(ret);
3309 		} else {
3310 			invalidate_mapping_pages(info->btree_inode->i_mapping,
3311 			     bytenr >> PAGE_CACHE_SHIFT,
3312 			     (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
3313 		}
3314 
3315 		ret = update_block_group(trans, root, bytenr, num_bytes, 0,
3316 					 mark_free);
3317 		BUG_ON(ret);
3318 	}
3319 	btrfs_free_path(path);
3320 	return ret;
3321 }
3322 
3323 /*
3324  * when we free an extent, it is possible (and likely) that we free the last
3325  * delayed ref for that extent as well.  This searches the delayed ref tree for
3326  * a given extent, and if there are no other delayed refs to be processed, it
3327  * removes it from the tree.
3328  */
3329 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3330 				      struct btrfs_root *root, u64 bytenr)
3331 {
3332 	struct btrfs_delayed_ref_head *head;
3333 	struct btrfs_delayed_ref_root *delayed_refs;
3334 	struct btrfs_delayed_ref_node *ref;
3335 	struct rb_node *node;
3336 	int ret;
3337 
3338 	delayed_refs = &trans->transaction->delayed_refs;
3339 	spin_lock(&delayed_refs->lock);
3340 	head = btrfs_find_delayed_ref_head(trans, bytenr);
3341 	if (!head)
3342 		goto out;
3343 
3344 	node = rb_prev(&head->node.rb_node);
3345 	if (!node)
3346 		goto out;
3347 
3348 	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3349 
3350 	/* there are still entries for this ref, we can't drop it */
3351 	if (ref->bytenr == bytenr)
3352 		goto out;
3353 
3354 	if (head->extent_op) {
3355 		if (!head->must_insert_reserved)
3356 			goto out;
3357 		kfree(head->extent_op);
3358 		head->extent_op = NULL;
3359 	}
3360 
3361 	/*
3362 	 * waiting for the lock here would deadlock.  If someone else has it
3363 	 * locked they are already in the process of dropping it anyway
3364 	 */
3365 	if (!mutex_trylock(&head->mutex))
3366 		goto out;
3367 
3368 	/*
3369 	 * at this point we have a head with no other entries.  Go
3370 	 * ahead and process it.
3371 	 */
3372 	head->node.in_tree = 0;
3373 	rb_erase(&head->node.rb_node, &delayed_refs->root);
3374 
3375 	delayed_refs->num_entries--;
3376 
3377 	/*
3378 	 * we don't take a ref on the node because we're removing it from the
3379 	 * tree, so we just steal the ref the tree was holding.
3380 	 */
3381 	delayed_refs->num_heads--;
3382 	if (list_empty(&head->cluster))
3383 		delayed_refs->num_heads_ready--;
3384 
3385 	list_del_init(&head->cluster);
3386 	spin_unlock(&delayed_refs->lock);
3387 
3388 	ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
3389 				  &head->node, head->extent_op,
3390 				  head->must_insert_reserved);
3391 	BUG_ON(ret);
3392 	btrfs_put_delayed_ref(&head->node);
3393 	return 0;
3394 out:
3395 	spin_unlock(&delayed_refs->lock);
3396 	return 0;
3397 }
3398 
3399 int btrfs_free_extent(struct btrfs_trans_handle *trans,
3400 		      struct btrfs_root *root,
3401 		      u64 bytenr, u64 num_bytes, u64 parent,
3402 		      u64 root_objectid, u64 owner, u64 offset)
3403 {
3404 	int ret;
3405 
3406 	/*
3407 	 * tree log blocks never actually go into the extent allocation
3408 	 * tree, just update pinning info and exit early.
3409 	 */
3410 	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3411 		WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3412 		/* unlocks the pinned mutex */
3413 		btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3414 		update_reserved_extents(root, bytenr, num_bytes, 0);
3415 		ret = 0;
3416 	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
3417 		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
3418 					parent, root_objectid, (int)owner,
3419 					BTRFS_DROP_DELAYED_REF, NULL);
3420 		BUG_ON(ret);
3421 		ret = check_ref_cleanup(trans, root, bytenr);
3422 		BUG_ON(ret);
3423 	} else {
3424 		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
3425 					parent, root_objectid, owner,
3426 					offset, BTRFS_DROP_DELAYED_REF, NULL);
3427 		BUG_ON(ret);
3428 	}
3429 	return ret;
3430 }
3431 
3432 static u64 stripe_align(struct btrfs_root *root, u64 val)
3433 {
3434 	u64 mask = ((u64)root->stripesize - 1);
3435 	u64 ret = (val + mask) & ~mask;
3436 	return ret;
3437 }
3438 
3439 /*
3440  * walks the btree of allocated extents and find a hole of a given size.
3441  * The key ins is changed to record the hole:
3442  * ins->objectid == block start
3443  * ins->flags = BTRFS_EXTENT_ITEM_KEY
3444  * ins->offset == number of blocks
3445  * Any available blocks before search_start are skipped.
3446  */
3447 static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3448 				     struct btrfs_root *orig_root,
3449 				     u64 num_bytes, u64 empty_size,
3450 				     u64 search_start, u64 search_end,
3451 				     u64 hint_byte, struct btrfs_key *ins,
3452 				     u64 exclude_start, u64 exclude_nr,
3453 				     int data)
3454 {
3455 	int ret = 0;
3456 	struct btrfs_root *root = orig_root->fs_info->extent_root;
3457 	struct btrfs_free_cluster *last_ptr = NULL;
3458 	struct btrfs_block_group_cache *block_group = NULL;
3459 	int empty_cluster = 2 * 1024 * 1024;
3460 	int allowed_chunk_alloc = 0;
3461 	struct btrfs_space_info *space_info;
3462 	int last_ptr_loop = 0;
3463 	int loop = 0;
3464 
3465 	WARN_ON(num_bytes < root->sectorsize);
3466 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
3467 	ins->objectid = 0;
3468 	ins->offset = 0;
3469 
3470 	space_info = __find_space_info(root->fs_info, data);
3471 
3472 	if (orig_root->ref_cows || empty_size)
3473 		allowed_chunk_alloc = 1;
3474 
3475 	if (data & BTRFS_BLOCK_GROUP_METADATA) {
3476 		last_ptr = &root->fs_info->meta_alloc_cluster;
3477 		if (!btrfs_test_opt(root, SSD))
3478 			empty_cluster = 64 * 1024;
3479 	}
3480 
3481 	if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
3482 		last_ptr = &root->fs_info->data_alloc_cluster;
3483 	}
3484 
3485 	if (last_ptr) {
3486 		spin_lock(&last_ptr->lock);
3487 		if (last_ptr->block_group)
3488 			hint_byte = last_ptr->window_start;
3489 		spin_unlock(&last_ptr->lock);
3490 	}
3491 
3492 	search_start = max(search_start, first_logical_byte(root, 0));
3493 	search_start = max(search_start, hint_byte);
3494 
3495 	if (!last_ptr) {
3496 		empty_cluster = 0;
3497 		loop = 1;
3498 	}
3499 
3500 	if (search_start == hint_byte) {
3501 		block_group = btrfs_lookup_block_group(root->fs_info,
3502 						       search_start);
3503 		if (block_group && block_group_bits(block_group, data)) {
3504 			down_read(&space_info->groups_sem);
3505 			if (list_empty(&block_group->list) ||
3506 			    block_group->ro) {
3507 				/*
3508 				 * someone is removing this block group,
3509 				 * we can't jump into the have_block_group
3510 				 * target because our list pointers are not
3511 				 * valid
3512 				 */
3513 				btrfs_put_block_group(block_group);
3514 				up_read(&space_info->groups_sem);
3515 			} else
3516 				goto have_block_group;
3517 		} else if (block_group) {
3518 			btrfs_put_block_group(block_group);
3519 		}
3520 	}
3521 
3522 search:
3523 	down_read(&space_info->groups_sem);
3524 	list_for_each_entry(block_group, &space_info->block_groups, list) {
3525 		u64 offset;
3526 
3527 		atomic_inc(&block_group->count);
3528 		search_start = block_group->key.objectid;
3529 
3530 have_block_group:
3531 		if (unlikely(!block_group->cached)) {
3532 			mutex_lock(&block_group->cache_mutex);
3533 			ret = cache_block_group(root, block_group);
3534 			mutex_unlock(&block_group->cache_mutex);
3535 			if (ret) {
3536 				btrfs_put_block_group(block_group);
3537 				break;
3538 			}
3539 		}
3540 
3541 		if (unlikely(block_group->ro))
3542 			goto loop;
3543 
3544 		if (last_ptr) {
3545 			/*
3546 			 * the refill lock keeps out other
3547 			 * people trying to start a new cluster
3548 			 */
3549 			spin_lock(&last_ptr->refill_lock);
3550 			if (last_ptr->block_group &&
3551 			    (last_ptr->block_group->ro ||
3552 			    !block_group_bits(last_ptr->block_group, data))) {
3553 				offset = 0;
3554 				goto refill_cluster;
3555 			}
3556 
3557 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
3558 						 num_bytes, search_start);
3559 			if (offset) {
3560 				/* we have a block, we're done */
3561 				spin_unlock(&last_ptr->refill_lock);
3562 				goto checks;
3563 			}
3564 
3565 			spin_lock(&last_ptr->lock);
3566 			/*
3567 			 * whoops, this cluster doesn't actually point to
3568 			 * this block group.  Get a ref on the block
3569 			 * group is does point to and try again
3570 			 */
3571 			if (!last_ptr_loop && last_ptr->block_group &&
3572 			    last_ptr->block_group != block_group) {
3573 
3574 				btrfs_put_block_group(block_group);
3575 				block_group = last_ptr->block_group;
3576 				atomic_inc(&block_group->count);
3577 				spin_unlock(&last_ptr->lock);
3578 				spin_unlock(&last_ptr->refill_lock);
3579 
3580 				last_ptr_loop = 1;
3581 				search_start = block_group->key.objectid;
3582 				/*
3583 				 * we know this block group is properly
3584 				 * in the list because
3585 				 * btrfs_remove_block_group, drops the
3586 				 * cluster before it removes the block
3587 				 * group from the list
3588 				 */
3589 				goto have_block_group;
3590 			}
3591 			spin_unlock(&last_ptr->lock);
3592 refill_cluster:
3593 			/*
3594 			 * this cluster didn't work out, free it and
3595 			 * start over
3596 			 */
3597 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
3598 
3599 			last_ptr_loop = 0;
3600 
3601 			/* allocate a cluster in this block group */
3602 			ret = btrfs_find_space_cluster(trans, root,
3603 					       block_group, last_ptr,
3604 					       offset, num_bytes,
3605 					       empty_cluster + empty_size);
3606 			if (ret == 0) {
3607 				/*
3608 				 * now pull our allocation out of this
3609 				 * cluster
3610 				 */
3611 				offset = btrfs_alloc_from_cluster(block_group,
3612 						  last_ptr, num_bytes,
3613 						  search_start);
3614 				if (offset) {
3615 					/* we found one, proceed */
3616 					spin_unlock(&last_ptr->refill_lock);
3617 					goto checks;
3618 				}
3619 			}
3620 			/*
3621 			 * at this point we either didn't find a cluster
3622 			 * or we weren't able to allocate a block from our
3623 			 * cluster.  Free the cluster we've been trying
3624 			 * to use, and go to the next block group
3625 			 */
3626 			if (loop < 2) {
3627 				btrfs_return_cluster_to_free_space(NULL,
3628 								   last_ptr);
3629 				spin_unlock(&last_ptr->refill_lock);
3630 				goto loop;
3631 			}
3632 			spin_unlock(&last_ptr->refill_lock);
3633 		}
3634 
3635 		offset = btrfs_find_space_for_alloc(block_group, search_start,
3636 						    num_bytes, empty_size);
3637 		if (!offset)
3638 			goto loop;
3639 checks:
3640 		search_start = stripe_align(root, offset);
3641 
3642 		/* move on to the next group */
3643 		if (search_start + num_bytes >= search_end) {
3644 			btrfs_add_free_space(block_group, offset, num_bytes);
3645 			goto loop;
3646 		}
3647 
3648 		/* move on to the next group */
3649 		if (search_start + num_bytes >
3650 		    block_group->key.objectid + block_group->key.offset) {
3651 			btrfs_add_free_space(block_group, offset, num_bytes);
3652 			goto loop;
3653 		}
3654 
3655 		if (exclude_nr > 0 &&
3656 		    (search_start + num_bytes > exclude_start &&
3657 		     search_start < exclude_start + exclude_nr)) {
3658 			search_start = exclude_start + exclude_nr;
3659 
3660 			btrfs_add_free_space(block_group, offset, num_bytes);
3661 			/*
3662 			 * if search_start is still in this block group
3663 			 * then we just re-search this block group
3664 			 */
3665 			if (search_start >= block_group->key.objectid &&
3666 			    search_start < (block_group->key.objectid +
3667 					    block_group->key.offset))
3668 				goto have_block_group;
3669 			goto loop;
3670 		}
3671 
3672 		ins->objectid = search_start;
3673 		ins->offset = num_bytes;
3674 
3675 		if (offset < search_start)
3676 			btrfs_add_free_space(block_group, offset,
3677 					     search_start - offset);
3678 		BUG_ON(offset > search_start);
3679 
3680 		/* we are all good, lets return */
3681 		break;
3682 loop:
3683 		btrfs_put_block_group(block_group);
3684 	}
3685 	up_read(&space_info->groups_sem);
3686 
3687 	/* loop == 0, try to find a clustered alloc in every block group
3688 	 * loop == 1, try again after forcing a chunk allocation
3689 	 * loop == 2, set empty_size and empty_cluster to 0 and try again
3690 	 */
3691 	if (!ins->objectid && loop < 3 &&
3692 	    (empty_size || empty_cluster || allowed_chunk_alloc)) {
3693 		if (loop >= 2) {
3694 			empty_size = 0;
3695 			empty_cluster = 0;
3696 		}
3697 
3698 		if (allowed_chunk_alloc) {
3699 			ret = do_chunk_alloc(trans, root, num_bytes +
3700 					     2 * 1024 * 1024, data, 1);
3701 			allowed_chunk_alloc = 0;
3702 		} else {
3703 			space_info->force_alloc = 1;
3704 		}
3705 
3706 		if (loop < 3) {
3707 			loop++;
3708 			goto search;
3709 		}
3710 		ret = -ENOSPC;
3711 	} else if (!ins->objectid) {
3712 		ret = -ENOSPC;
3713 	}
3714 
3715 	/* we found what we needed */
3716 	if (ins->objectid) {
3717 		if (!(data & BTRFS_BLOCK_GROUP_DATA))
3718 			trans->block_group = block_group->key.objectid;
3719 
3720 		btrfs_put_block_group(block_group);
3721 		ret = 0;
3722 	}
3723 
3724 	return ret;
3725 }
3726 
3727 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3728 {
3729 	struct btrfs_block_group_cache *cache;
3730 
3731 	printk(KERN_INFO "space_info has %llu free, is %sfull\n",
3732 	       (unsigned long long)(info->total_bytes - info->bytes_used -
3733 				    info->bytes_pinned - info->bytes_reserved),
3734 	       (info->full) ? "" : "not ");
3735 	printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
3736 	       " may_use=%llu, used=%llu\n",
3737 	       (unsigned long long)info->total_bytes,
3738 	       (unsigned long long)info->bytes_pinned,
3739 	       (unsigned long long)info->bytes_delalloc,
3740 	       (unsigned long long)info->bytes_may_use,
3741 	       (unsigned long long)info->bytes_used);
3742 
3743 	down_read(&info->groups_sem);
3744 	list_for_each_entry(cache, &info->block_groups, list) {
3745 		spin_lock(&cache->lock);
3746 		printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
3747 		       "%llu pinned %llu reserved\n",
3748 		       (unsigned long long)cache->key.objectid,
3749 		       (unsigned long long)cache->key.offset,
3750 		       (unsigned long long)btrfs_block_group_used(&cache->item),
3751 		       (unsigned long long)cache->pinned,
3752 		       (unsigned long long)cache->reserved);
3753 		btrfs_dump_free_space(cache, bytes);
3754 		spin_unlock(&cache->lock);
3755 	}
3756 	up_read(&info->groups_sem);
3757 }
3758 
3759 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3760 				  struct btrfs_root *root,
3761 				  u64 num_bytes, u64 min_alloc_size,
3762 				  u64 empty_size, u64 hint_byte,
3763 				  u64 search_end, struct btrfs_key *ins,
3764 				  u64 data)
3765 {
3766 	int ret;
3767 	u64 search_start = 0;
3768 	struct btrfs_fs_info *info = root->fs_info;
3769 
3770 	data = btrfs_get_alloc_profile(root, data);
3771 again:
3772 	/*
3773 	 * the only place that sets empty_size is btrfs_realloc_node, which
3774 	 * is not called recursively on allocations
3775 	 */
3776 	if (empty_size || root->ref_cows) {
3777 		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
3778 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3779 				     2 * 1024 * 1024,
3780 				     BTRFS_BLOCK_GROUP_METADATA |
3781 				     (info->metadata_alloc_profile &
3782 				      info->avail_metadata_alloc_bits), 0);
3783 		}
3784 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3785 				     num_bytes + 2 * 1024 * 1024, data, 0);
3786 	}
3787 
3788 	WARN_ON(num_bytes < root->sectorsize);
3789 	ret = find_free_extent(trans, root, num_bytes, empty_size,
3790 			       search_start, search_end, hint_byte, ins,
3791 			       trans->alloc_exclude_start,
3792 			       trans->alloc_exclude_nr, data);
3793 
3794 	if (ret == -ENOSPC && num_bytes > min_alloc_size) {
3795 		num_bytes = num_bytes >> 1;
3796 		num_bytes = num_bytes & ~(root->sectorsize - 1);
3797 		num_bytes = max(num_bytes, min_alloc_size);
3798 		do_chunk_alloc(trans, root->fs_info->extent_root,
3799 			       num_bytes, data, 1);
3800 		goto again;
3801 	}
3802 	if (ret) {
3803 		struct btrfs_space_info *sinfo;
3804 
3805 		sinfo = __find_space_info(root->fs_info, data);
3806 		printk(KERN_ERR "btrfs allocation failed flags %llu, "
3807 		       "wanted %llu\n", (unsigned long long)data,
3808 		       (unsigned long long)num_bytes);
3809 		dump_space_info(sinfo, num_bytes);
3810 		BUG();
3811 	}
3812 
3813 	return ret;
3814 }
3815 
3816 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
3817 {
3818 	struct btrfs_block_group_cache *cache;
3819 	int ret = 0;
3820 
3821 	cache = btrfs_lookup_block_group(root->fs_info, start);
3822 	if (!cache) {
3823 		printk(KERN_ERR "Unable to find block group for %llu\n",
3824 		       (unsigned long long)start);
3825 		return -ENOSPC;
3826 	}
3827 
3828 	ret = btrfs_discard_extent(root, start, len);
3829 
3830 	btrfs_add_free_space(cache, start, len);
3831 	btrfs_put_block_group(cache);
3832 	update_reserved_extents(root, start, len, 0);
3833 
3834 	return ret;
3835 }
3836 
3837 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3838 				  struct btrfs_root *root,
3839 				  u64 num_bytes, u64 min_alloc_size,
3840 				  u64 empty_size, u64 hint_byte,
3841 				  u64 search_end, struct btrfs_key *ins,
3842 				  u64 data)
3843 {
3844 	int ret;
3845 	ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3846 				     empty_size, hint_byte, search_end, ins,
3847 				     data);
3848 	update_reserved_extents(root, ins->objectid, ins->offset, 1);
3849 	return ret;
3850 }
3851 
3852 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3853 				      struct btrfs_root *root,
3854 				      u64 parent, u64 root_objectid,
3855 				      u64 flags, u64 owner, u64 offset,
3856 				      struct btrfs_key *ins, int ref_mod)
3857 {
3858 	int ret;
3859 	struct btrfs_fs_info *fs_info = root->fs_info;
3860 	struct btrfs_extent_item *extent_item;
3861 	struct btrfs_extent_inline_ref *iref;
3862 	struct btrfs_path *path;
3863 	struct extent_buffer *leaf;
3864 	int type;
3865 	u32 size;
3866 
3867 	if (parent > 0)
3868 		type = BTRFS_SHARED_DATA_REF_KEY;
3869 	else
3870 		type = BTRFS_EXTENT_DATA_REF_KEY;
3871 
3872 	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
3873 
3874 	path = btrfs_alloc_path();
3875 	BUG_ON(!path);
3876 
3877 	path->leave_spinning = 1;
3878 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3879 				      ins, size);
3880 	BUG_ON(ret);
3881 
3882 	leaf = path->nodes[0];
3883 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
3884 				     struct btrfs_extent_item);
3885 	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
3886 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3887 	btrfs_set_extent_flags(leaf, extent_item,
3888 			       flags | BTRFS_EXTENT_FLAG_DATA);
3889 
3890 	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
3891 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
3892 	if (parent > 0) {
3893 		struct btrfs_shared_data_ref *ref;
3894 		ref = (struct btrfs_shared_data_ref *)(iref + 1);
3895 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3896 		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
3897 	} else {
3898 		struct btrfs_extent_data_ref *ref;
3899 		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3900 		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
3901 		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
3902 		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
3903 		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
3904 	}
3905 
3906 	btrfs_mark_buffer_dirty(path->nodes[0]);
3907 	btrfs_free_path(path);
3908 
3909 	ret = update_block_group(trans, root, ins->objectid, ins->offset,
3910 				 1, 0);
3911 	if (ret) {
3912 		printk(KERN_ERR "btrfs update block group failed for %llu "
3913 		       "%llu\n", (unsigned long long)ins->objectid,
3914 		       (unsigned long long)ins->offset);
3915 		BUG();
3916 	}
3917 	return ret;
3918 }
3919 
3920 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
3921 				     struct btrfs_root *root,
3922 				     u64 parent, u64 root_objectid,
3923 				     u64 flags, struct btrfs_disk_key *key,
3924 				     int level, struct btrfs_key *ins)
3925 {
3926 	int ret;
3927 	struct btrfs_fs_info *fs_info = root->fs_info;
3928 	struct btrfs_extent_item *extent_item;
3929 	struct btrfs_tree_block_info *block_info;
3930 	struct btrfs_extent_inline_ref *iref;
3931 	struct btrfs_path *path;
3932 	struct extent_buffer *leaf;
3933 	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
3934 
3935 	path = btrfs_alloc_path();
3936 	BUG_ON(!path);
3937 
3938 	path->leave_spinning = 1;
3939 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3940 				      ins, size);
3941 	BUG_ON(ret);
3942 
3943 	leaf = path->nodes[0];
3944 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
3945 				     struct btrfs_extent_item);
3946 	btrfs_set_extent_refs(leaf, extent_item, 1);
3947 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3948 	btrfs_set_extent_flags(leaf, extent_item,
3949 			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
3950 	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
3951 
3952 	btrfs_set_tree_block_key(leaf, block_info, key);
3953 	btrfs_set_tree_block_level(leaf, block_info, level);
3954 
3955 	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
3956 	if (parent > 0) {
3957 		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
3958 		btrfs_set_extent_inline_ref_type(leaf, iref,
3959 						 BTRFS_SHARED_BLOCK_REF_KEY);
3960 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3961 	} else {
3962 		btrfs_set_extent_inline_ref_type(leaf, iref,
3963 						 BTRFS_TREE_BLOCK_REF_KEY);
3964 		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
3965 	}
3966 
3967 	btrfs_mark_buffer_dirty(leaf);
3968 	btrfs_free_path(path);
3969 
3970 	ret = update_block_group(trans, root, ins->objectid, ins->offset,
3971 				 1, 0);
3972 	if (ret) {
3973 		printk(KERN_ERR "btrfs update block group failed for %llu "
3974 		       "%llu\n", (unsigned long long)ins->objectid,
3975 		       (unsigned long long)ins->offset);
3976 		BUG();
3977 	}
3978 	return ret;
3979 }
3980 
3981 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3982 				     struct btrfs_root *root,
3983 				     u64 root_objectid, u64 owner,
3984 				     u64 offset, struct btrfs_key *ins)
3985 {
3986 	int ret;
3987 
3988 	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
3989 
3990 	ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
3991 					 0, root_objectid, owner, offset,
3992 					 BTRFS_ADD_DELAYED_EXTENT, NULL);
3993 	return ret;
3994 }
3995 
3996 /*
3997  * this is used by the tree logging recovery code.  It records that
3998  * an extent has been allocated and makes sure to clear the free
3999  * space cache bits as well
4000  */
4001 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4002 				   struct btrfs_root *root,
4003 				   u64 root_objectid, u64 owner, u64 offset,
4004 				   struct btrfs_key *ins)
4005 {
4006 	int ret;
4007 	struct btrfs_block_group_cache *block_group;
4008 
4009 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4010 	mutex_lock(&block_group->cache_mutex);
4011 	cache_block_group(root, block_group);
4012 	mutex_unlock(&block_group->cache_mutex);
4013 
4014 	ret = btrfs_remove_free_space(block_group, ins->objectid,
4015 				      ins->offset);
4016 	BUG_ON(ret);
4017 	btrfs_put_block_group(block_group);
4018 	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4019 					 0, owner, offset, ins, 1);
4020 	return ret;
4021 }
4022 
4023 /*
4024  * finds a free extent and does all the dirty work required for allocation
4025  * returns the key for the extent through ins, and a tree buffer for
4026  * the first block of the extent through buf.
4027  *
4028  * returns 0 if everything worked, non-zero otherwise.
4029  */
4030 static int alloc_tree_block(struct btrfs_trans_handle *trans,
4031 			    struct btrfs_root *root,
4032 			    u64 num_bytes, u64 parent, u64 root_objectid,
4033 			    struct btrfs_disk_key *key, int level,
4034 			    u64 empty_size, u64 hint_byte, u64 search_end,
4035 			    struct btrfs_key *ins)
4036 {
4037 	int ret;
4038 	u64 flags = 0;
4039 
4040 	ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4041 				     empty_size, hint_byte, search_end,
4042 				     ins, 0);
4043 	BUG_ON(ret);
4044 
4045 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4046 		if (parent == 0)
4047 			parent = ins->objectid;
4048 		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4049 	} else
4050 		BUG_ON(parent > 0);
4051 
4052 	update_reserved_extents(root, ins->objectid, ins->offset, 1);
4053 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4054 		struct btrfs_delayed_extent_op *extent_op;
4055 		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
4056 		BUG_ON(!extent_op);
4057 		if (key)
4058 			memcpy(&extent_op->key, key, sizeof(extent_op->key));
4059 		else
4060 			memset(&extent_op->key, 0, sizeof(extent_op->key));
4061 		extent_op->flags_to_set = flags;
4062 		extent_op->update_key = 1;
4063 		extent_op->update_flags = 1;
4064 		extent_op->is_data = 0;
4065 
4066 		ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4067 					ins->offset, parent, root_objectid,
4068 					level, BTRFS_ADD_DELAYED_EXTENT,
4069 					extent_op);
4070 		BUG_ON(ret);
4071 	}
4072 	return ret;
4073 }
4074 
4075 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4076 					    struct btrfs_root *root,
4077 					    u64 bytenr, u32 blocksize,
4078 					    int level)
4079 {
4080 	struct extent_buffer *buf;
4081 
4082 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
4083 	if (!buf)
4084 		return ERR_PTR(-ENOMEM);
4085 	btrfs_set_header_generation(buf, trans->transid);
4086 	btrfs_set_buffer_lockdep_class(buf, level);
4087 	btrfs_tree_lock(buf);
4088 	clean_tree_block(trans, root, buf);
4089 
4090 	btrfs_set_lock_blocking(buf);
4091 	btrfs_set_buffer_uptodate(buf);
4092 
4093 	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4094 		set_extent_dirty(&root->dirty_log_pages, buf->start,
4095 			 buf->start + buf->len - 1, GFP_NOFS);
4096 	} else {
4097 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4098 			 buf->start + buf->len - 1, GFP_NOFS);
4099 	}
4100 	trans->blocks_used++;
4101 	/* this returns a buffer locked for blocking */
4102 	return buf;
4103 }
4104 
4105 /*
4106  * helper function to allocate a block for a given tree
4107  * returns the tree buffer or NULL.
4108  */
4109 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4110 					struct btrfs_root *root, u32 blocksize,
4111 					u64 parent, u64 root_objectid,
4112 					struct btrfs_disk_key *key, int level,
4113 					u64 hint, u64 empty_size)
4114 {
4115 	struct btrfs_key ins;
4116 	int ret;
4117 	struct extent_buffer *buf;
4118 
4119 	ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
4120 			       key, level, empty_size, hint, (u64)-1, &ins);
4121 	if (ret) {
4122 		BUG_ON(ret > 0);
4123 		return ERR_PTR(ret);
4124 	}
4125 
4126 	buf = btrfs_init_new_buffer(trans, root, ins.objectid,
4127 				    blocksize, level);
4128 	return buf;
4129 }
4130 
4131 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4132 			struct btrfs_root *root, struct extent_buffer *leaf)
4133 {
4134 	u64 disk_bytenr;
4135 	u64 num_bytes;
4136 	struct btrfs_key key;
4137 	struct btrfs_file_extent_item *fi;
4138 	u32 nritems;
4139 	int i;
4140 	int ret;
4141 
4142 	BUG_ON(!btrfs_is_leaf(leaf));
4143 	nritems = btrfs_header_nritems(leaf);
4144 
4145 	for (i = 0; i < nritems; i++) {
4146 		cond_resched();
4147 		btrfs_item_key_to_cpu(leaf, &key, i);
4148 
4149 		/* only extents have references, skip everything else */
4150 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4151 			continue;
4152 
4153 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4154 
4155 		/* inline extents live in the btree, they don't have refs */
4156 		if (btrfs_file_extent_type(leaf, fi) ==
4157 		    BTRFS_FILE_EXTENT_INLINE)
4158 			continue;
4159 
4160 		disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4161 
4162 		/* holes don't have refs */
4163 		if (disk_bytenr == 0)
4164 			continue;
4165 
4166 		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4167 		ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
4168 					leaf->start, 0, key.objectid, 0);
4169 		BUG_ON(ret);
4170 	}
4171 	return 0;
4172 }
4173 
4174 #if 0
4175 
4176 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4177 					struct btrfs_root *root,
4178 					struct btrfs_leaf_ref *ref)
4179 {
4180 	int i;
4181 	int ret;
4182 	struct btrfs_extent_info *info;
4183 	struct refsort *sorted;
4184 
4185 	if (ref->nritems == 0)
4186 		return 0;
4187 
4188 	sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
4189 	for (i = 0; i < ref->nritems; i++) {
4190 		sorted[i].bytenr = ref->extents[i].bytenr;
4191 		sorted[i].slot = i;
4192 	}
4193 	sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
4194 
4195 	/*
4196 	 * the items in the ref were sorted when the ref was inserted
4197 	 * into the ref cache, so this is already in order
4198 	 */
4199 	for (i = 0; i < ref->nritems; i++) {
4200 		info = ref->extents + sorted[i].slot;
4201 		ret = btrfs_free_extent(trans, root, info->bytenr,
4202 					  info->num_bytes, ref->bytenr,
4203 					  ref->owner, ref->generation,
4204 					  info->objectid, 0);
4205 
4206 		atomic_inc(&root->fs_info->throttle_gen);
4207 		wake_up(&root->fs_info->transaction_throttle);
4208 		cond_resched();
4209 
4210 		BUG_ON(ret);
4211 		info++;
4212 	}
4213 
4214 	kfree(sorted);
4215 	return 0;
4216 }
4217 
4218 
4219 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
4220 				     struct btrfs_root *root, u64 start,
4221 				     u64 len, u32 *refs)
4222 {
4223 	int ret;
4224 
4225 	ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
4226 	BUG_ON(ret);
4227 
4228 #if 0 /* some debugging code in case we see problems here */
4229 	/* if the refs count is one, it won't get increased again.  But
4230 	 * if the ref count is > 1, someone may be decreasing it at
4231 	 * the same time we are.
4232 	 */
4233 	if (*refs != 1) {
4234 		struct extent_buffer *eb = NULL;
4235 		eb = btrfs_find_create_tree_block(root, start, len);
4236 		if (eb)
4237 			btrfs_tree_lock(eb);
4238 
4239 		mutex_lock(&root->fs_info->alloc_mutex);
4240 		ret = lookup_extent_ref(NULL, root, start, len, refs);
4241 		BUG_ON(ret);
4242 		mutex_unlock(&root->fs_info->alloc_mutex);
4243 
4244 		if (eb) {
4245 			btrfs_tree_unlock(eb);
4246 			free_extent_buffer(eb);
4247 		}
4248 		if (*refs == 1) {
4249 			printk(KERN_ERR "btrfs block %llu went down to one "
4250 			       "during drop_snap\n", (unsigned long long)start);
4251 		}
4252 
4253 	}
4254 #endif
4255 
4256 	cond_resched();
4257 	return ret;
4258 }
4259 
4260 
4261 /*
4262  * this is used while deleting old snapshots, and it drops the refs
4263  * on a whole subtree starting from a level 1 node.
4264  *
4265  * The idea is to sort all the leaf pointers, and then drop the
4266  * ref on all the leaves in order.  Most of the time the leaves
4267  * will have ref cache entries, so no leaf IOs will be required to
4268  * find the extents they have references on.
4269  *
4270  * For each leaf, any references it has are also dropped in order
4271  *
4272  * This ends up dropping the references in something close to optimal
4273  * order for reading and modifying the extent allocation tree.
4274  */
4275 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4276 					struct btrfs_root *root,
4277 					struct btrfs_path *path)
4278 {
4279 	u64 bytenr;
4280 	u64 root_owner;
4281 	u64 root_gen;
4282 	struct extent_buffer *eb = path->nodes[1];
4283 	struct extent_buffer *leaf;
4284 	struct btrfs_leaf_ref *ref;
4285 	struct refsort *sorted = NULL;
4286 	int nritems = btrfs_header_nritems(eb);
4287 	int ret;
4288 	int i;
4289 	int refi = 0;
4290 	int slot = path->slots[1];
4291 	u32 blocksize = btrfs_level_size(root, 0);
4292 	u32 refs;
4293 
4294 	if (nritems == 0)
4295 		goto out;
4296 
4297 	root_owner = btrfs_header_owner(eb);
4298 	root_gen = btrfs_header_generation(eb);
4299 	sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
4300 
4301 	/*
4302 	 * step one, sort all the leaf pointers so we don't scribble
4303 	 * randomly into the extent allocation tree
4304 	 */
4305 	for (i = slot; i < nritems; i++) {
4306 		sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
4307 		sorted[refi].slot = i;
4308 		refi++;
4309 	}
4310 
4311 	/*
4312 	 * nritems won't be zero, but if we're picking up drop_snapshot
4313 	 * after a crash, slot might be > 0, so double check things
4314 	 * just in case.
4315 	 */
4316 	if (refi == 0)
4317 		goto out;
4318 
4319 	sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
4320 
4321 	/*
4322 	 * the first loop frees everything the leaves point to
4323 	 */
4324 	for (i = 0; i < refi; i++) {
4325 		u64 ptr_gen;
4326 
4327 		bytenr = sorted[i].bytenr;
4328 
4329 		/*
4330 		 * check the reference count on this leaf.  If it is > 1
4331 		 * we just decrement it below and don't update any
4332 		 * of the refs the leaf points to.
4333 		 */
4334 		ret = drop_snap_lookup_refcount(trans, root, bytenr,
4335 						blocksize, &refs);
4336 		BUG_ON(ret);
4337 		if (refs != 1)
4338 			continue;
4339 
4340 		ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
4341 
4342 		/*
4343 		 * the leaf only had one reference, which means the
4344 		 * only thing pointing to this leaf is the snapshot
4345 		 * we're deleting.  It isn't possible for the reference
4346 		 * count to increase again later
4347 		 *
4348 		 * The reference cache is checked for the leaf,
4349 		 * and if found we'll be able to drop any refs held by
4350 		 * the leaf without needing to read it in.
4351 		 */
4352 		ref = btrfs_lookup_leaf_ref(root, bytenr);
4353 		if (ref && ref->generation != ptr_gen) {
4354 			btrfs_free_leaf_ref(root, ref);
4355 			ref = NULL;
4356 		}
4357 		if (ref) {
4358 			ret = cache_drop_leaf_ref(trans, root, ref);
4359 			BUG_ON(ret);
4360 			btrfs_remove_leaf_ref(root, ref);
4361 			btrfs_free_leaf_ref(root, ref);
4362 		} else {
4363 			/*
4364 			 * the leaf wasn't in the reference cache, so
4365 			 * we have to read it.
4366 			 */
4367 			leaf = read_tree_block(root, bytenr, blocksize,
4368 					       ptr_gen);
4369 			ret = btrfs_drop_leaf_ref(trans, root, leaf);
4370 			BUG_ON(ret);
4371 			free_extent_buffer(leaf);
4372 		}
4373 		atomic_inc(&root->fs_info->throttle_gen);
4374 		wake_up(&root->fs_info->transaction_throttle);
4375 		cond_resched();
4376 	}
4377 
4378 	/*
4379 	 * run through the loop again to free the refs on the leaves.
4380 	 * This is faster than doing it in the loop above because
4381 	 * the leaves are likely to be clustered together.  We end up
4382 	 * working in nice chunks on the extent allocation tree.
4383 	 */
4384 	for (i = 0; i < refi; i++) {
4385 		bytenr = sorted[i].bytenr;
4386 		ret = btrfs_free_extent(trans, root, bytenr,
4387 					blocksize, eb->start,
4388 					root_owner, root_gen, 0, 1);
4389 		BUG_ON(ret);
4390 
4391 		atomic_inc(&root->fs_info->throttle_gen);
4392 		wake_up(&root->fs_info->transaction_throttle);
4393 		cond_resched();
4394 	}
4395 out:
4396 	kfree(sorted);
4397 
4398 	/*
4399 	 * update the path to show we've processed the entire level 1
4400 	 * node.  This will get saved into the root's drop_snapshot_progress
4401 	 * field so these drops are not repeated again if this transaction
4402 	 * commits.
4403 	 */
4404 	path->slots[1] = nritems;
4405 	return 0;
4406 }
4407 
4408 /*
4409  * helper function for drop_snapshot, this walks down the tree dropping ref
4410  * counts as it goes.
4411  */
4412 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4413 				   struct btrfs_root *root,
4414 				   struct btrfs_path *path, int *level)
4415 {
4416 	u64 root_owner;
4417 	u64 root_gen;
4418 	u64 bytenr;
4419 	u64 ptr_gen;
4420 	struct extent_buffer *next;
4421 	struct extent_buffer *cur;
4422 	struct extent_buffer *parent;
4423 	u32 blocksize;
4424 	int ret;
4425 	u32 refs;
4426 
4427 	WARN_ON(*level < 0);
4428 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
4429 	ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4430 				path->nodes[*level]->len, &refs);
4431 	BUG_ON(ret);
4432 	if (refs > 1)
4433 		goto out;
4434 
4435 	/*
4436 	 * walk down to the last node level and free all the leaves
4437 	 */
4438 	while (*level >= 0) {
4439 		WARN_ON(*level < 0);
4440 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
4441 		cur = path->nodes[*level];
4442 
4443 		if (btrfs_header_level(cur) != *level)
4444 			WARN_ON(1);
4445 
4446 		if (path->slots[*level] >=
4447 		    btrfs_header_nritems(cur))
4448 			break;
4449 
4450 		/* the new code goes down to level 1 and does all the
4451 		 * leaves pointed to that node in bulk.  So, this check
4452 		 * for level 0 will always be false.
4453 		 *
4454 		 * But, the disk format allows the drop_snapshot_progress
4455 		 * field in the root to leave things in a state where
4456 		 * a leaf will need cleaning up here.  If someone crashes
4457 		 * with the old code and then boots with the new code,
4458 		 * we might find a leaf here.
4459 		 */
4460 		if (*level == 0) {
4461 			ret = btrfs_drop_leaf_ref(trans, root, cur);
4462 			BUG_ON(ret);
4463 			break;
4464 		}
4465 
4466 		/*
4467 		 * once we get to level one, process the whole node
4468 		 * at once, including everything below it.
4469 		 */
4470 		if (*level == 1) {
4471 			ret = drop_level_one_refs(trans, root, path);
4472 			BUG_ON(ret);
4473 			break;
4474 		}
4475 
4476 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
4477 		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4478 		blocksize = btrfs_level_size(root, *level - 1);
4479 
4480 		ret = drop_snap_lookup_refcount(trans, root, bytenr,
4481 						blocksize, &refs);
4482 		BUG_ON(ret);
4483 
4484 		/*
4485 		 * if there is more than one reference, we don't need
4486 		 * to read that node to drop any references it has.  We
4487 		 * just drop the ref we hold on that node and move on to the
4488 		 * next slot in this level.
4489 		 */
4490 		if (refs != 1) {
4491 			parent = path->nodes[*level];
4492 			root_owner = btrfs_header_owner(parent);
4493 			root_gen = btrfs_header_generation(parent);
4494 			path->slots[*level]++;
4495 
4496 			ret = btrfs_free_extent(trans, root, bytenr,
4497 						blocksize, parent->start,
4498 						root_owner, root_gen,
4499 						*level - 1, 1);
4500 			BUG_ON(ret);
4501 
4502 			atomic_inc(&root->fs_info->throttle_gen);
4503 			wake_up(&root->fs_info->transaction_throttle);
4504 			cond_resched();
4505 
4506 			continue;
4507 		}
4508 
4509 		/*
4510 		 * we need to keep freeing things in the next level down.
4511 		 * read the block and loop around to process it
4512 		 */
4513 		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4514 		WARN_ON(*level <= 0);
4515 		if (path->nodes[*level-1])
4516 			free_extent_buffer(path->nodes[*level-1]);
4517 		path->nodes[*level-1] = next;
4518 		*level = btrfs_header_level(next);
4519 		path->slots[*level] = 0;
4520 		cond_resched();
4521 	}
4522 out:
4523 	WARN_ON(*level < 0);
4524 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
4525 
4526 	if (path->nodes[*level] == root->node) {
4527 		parent = path->nodes[*level];
4528 		bytenr = path->nodes[*level]->start;
4529 	} else {
4530 		parent = path->nodes[*level + 1];
4531 		bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
4532 	}
4533 
4534 	blocksize = btrfs_level_size(root, *level);
4535 	root_owner = btrfs_header_owner(parent);
4536 	root_gen = btrfs_header_generation(parent);
4537 
4538 	/*
4539 	 * cleanup and free the reference on the last node
4540 	 * we processed
4541 	 */
4542 	ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4543 				  parent->start, root_owner, root_gen,
4544 				  *level, 1);
4545 	free_extent_buffer(path->nodes[*level]);
4546 	path->nodes[*level] = NULL;
4547 
4548 	*level += 1;
4549 	BUG_ON(ret);
4550 
4551 	cond_resched();
4552 	return 0;
4553 }
4554 #endif
4555 
4556 /*
4557  * helper function for drop_subtree, this function is similar to
4558  * walk_down_tree. The main difference is that it checks reference
4559  * counts while tree blocks are locked.
4560  */
4561 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4562 				   struct btrfs_root *root,
4563 				   struct btrfs_path *path, int *level)
4564 {
4565 	struct extent_buffer *next;
4566 	struct extent_buffer *cur;
4567 	struct extent_buffer *parent;
4568 	u64 bytenr;
4569 	u64 ptr_gen;
4570 	u64 refs;
4571 	u64 flags;
4572 	u32 blocksize;
4573 	int ret;
4574 
4575 	cur = path->nodes[*level];
4576 	ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
4577 				       &refs, &flags);
4578 	BUG_ON(ret);
4579 	if (refs > 1)
4580 		goto out;
4581 
4582 	BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4583 
4584 	while (*level >= 0) {
4585 		cur = path->nodes[*level];
4586 		if (*level == 0) {
4587 			ret = btrfs_drop_leaf_ref(trans, root, cur);
4588 			BUG_ON(ret);
4589 			clean_tree_block(trans, root, cur);
4590 			break;
4591 		}
4592 		if (path->slots[*level] >= btrfs_header_nritems(cur)) {
4593 			clean_tree_block(trans, root, cur);
4594 			break;
4595 		}
4596 
4597 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
4598 		blocksize = btrfs_level_size(root, *level - 1);
4599 		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4600 
4601 		next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4602 		btrfs_tree_lock(next);
4603 		btrfs_set_lock_blocking(next);
4604 
4605 		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4606 					       &refs, &flags);
4607 		BUG_ON(ret);
4608 		if (refs > 1) {
4609 			parent = path->nodes[*level];
4610 			ret = btrfs_free_extent(trans, root, bytenr,
4611 						blocksize, parent->start,
4612 						btrfs_header_owner(parent),
4613 						*level - 1, 0);
4614 			BUG_ON(ret);
4615 			path->slots[*level]++;
4616 			btrfs_tree_unlock(next);
4617 			free_extent_buffer(next);
4618 			continue;
4619 		}
4620 
4621 		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4622 
4623 		*level = btrfs_header_level(next);
4624 		path->nodes[*level] = next;
4625 		path->slots[*level] = 0;
4626 		path->locks[*level] = 1;
4627 		cond_resched();
4628 	}
4629 out:
4630 	if (path->nodes[*level] == root->node)
4631 		parent = path->nodes[*level];
4632 	else
4633 		parent = path->nodes[*level + 1];
4634 	bytenr = path->nodes[*level]->start;
4635 	blocksize = path->nodes[*level]->len;
4636 
4637 	ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
4638 				btrfs_header_owner(parent), *level, 0);
4639 	BUG_ON(ret);
4640 
4641 	if (path->locks[*level]) {
4642 		btrfs_tree_unlock(path->nodes[*level]);
4643 		path->locks[*level] = 0;
4644 	}
4645 	free_extent_buffer(path->nodes[*level]);
4646 	path->nodes[*level] = NULL;
4647 	*level += 1;
4648 	cond_resched();
4649 	return 0;
4650 }
4651 
4652 /*
4653  * helper for dropping snapshots.  This walks back up the tree in the path
4654  * to find the first node higher up where we haven't yet gone through
4655  * all the slots
4656  */
4657 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
4658 				 struct btrfs_root *root,
4659 				 struct btrfs_path *path,
4660 				 int *level, int max_level)
4661 {
4662 	struct btrfs_root_item *root_item = &root->root_item;
4663 	int i;
4664 	int slot;
4665 	int ret;
4666 
4667 	for (i = *level; i < max_level && path->nodes[i]; i++) {
4668 		slot = path->slots[i];
4669 		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
4670 			/*
4671 			 * there is more work to do in this level.
4672 			 * Update the drop_progress marker to reflect
4673 			 * the work we've done so far, and then bump
4674 			 * the slot number
4675 			 */
4676 			path->slots[i]++;
4677 			WARN_ON(*level == 0);
4678 			if (max_level == BTRFS_MAX_LEVEL) {
4679 				btrfs_node_key(path->nodes[i],
4680 					       &root_item->drop_progress,
4681 					       path->slots[i]);
4682 				root_item->drop_level = i;
4683 			}
4684 			*level = i;
4685 			return 0;
4686 		} else {
4687 			struct extent_buffer *parent;
4688 
4689 			/*
4690 			 * this whole node is done, free our reference
4691 			 * on it and go up one level
4692 			 */
4693 			if (path->nodes[*level] == root->node)
4694 				parent = path->nodes[*level];
4695 			else
4696 				parent = path->nodes[*level + 1];
4697 
4698 			clean_tree_block(trans, root, path->nodes[i]);
4699 			ret = btrfs_free_extent(trans, root,
4700 						path->nodes[i]->start,
4701 						path->nodes[i]->len,
4702 						parent->start,
4703 						btrfs_header_owner(parent),
4704 						*level, 0);
4705 			BUG_ON(ret);
4706 			if (path->locks[*level]) {
4707 				btrfs_tree_unlock(path->nodes[i]);
4708 				path->locks[i] = 0;
4709 			}
4710 			free_extent_buffer(path->nodes[i]);
4711 			path->nodes[i] = NULL;
4712 			*level = i + 1;
4713 		}
4714 	}
4715 	return 1;
4716 }
4717 
4718 /*
4719  * drop the reference count on the tree rooted at 'snap'.  This traverses
4720  * the tree freeing any blocks that have a ref count of zero after being
4721  * decremented.
4722  */
4723 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
4724 			*root)
4725 {
4726 	int ret = 0;
4727 	int wret;
4728 	int level;
4729 	struct btrfs_path *path;
4730 	int update_count;
4731 	struct btrfs_root_item *root_item = &root->root_item;
4732 
4733 	path = btrfs_alloc_path();
4734 	BUG_ON(!path);
4735 
4736 	level = btrfs_header_level(root->node);
4737 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4738 		path->nodes[level] = btrfs_lock_root_node(root);
4739 		btrfs_set_lock_blocking(path->nodes[level]);
4740 		path->slots[level] = 0;
4741 		path->locks[level] = 1;
4742 	} else {
4743 		struct btrfs_key key;
4744 		struct btrfs_disk_key found_key;
4745 		struct extent_buffer *node;
4746 
4747 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4748 		level = root_item->drop_level;
4749 		path->lowest_level = level;
4750 		wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4751 		if (wret < 0) {
4752 			ret = wret;
4753 			goto out;
4754 		}
4755 		node = path->nodes[level];
4756 		btrfs_node_key(node, &found_key, path->slots[level]);
4757 		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4758 			       sizeof(found_key)));
4759 		/*
4760 		 * unlock our path, this is safe because only this
4761 		 * function is allowed to delete this snapshot
4762 		 */
4763 		btrfs_unlock_up_safe(path, 0);
4764 	}
4765 	while (1) {
4766 		unsigned long update;
4767 		wret = walk_down_tree(trans, root, path, &level);
4768 		if (wret > 0)
4769 			break;
4770 		if (wret < 0)
4771 			ret = wret;
4772 
4773 		wret = walk_up_tree(trans, root, path, &level,
4774 				    BTRFS_MAX_LEVEL);
4775 		if (wret > 0)
4776 			break;
4777 		if (wret < 0)
4778 			ret = wret;
4779 		if (trans->transaction->in_commit ||
4780 		    trans->transaction->delayed_refs.flushing) {
4781 			ret = -EAGAIN;
4782 			break;
4783 		}
4784 		for (update_count = 0; update_count < 16; update_count++) {
4785 			update = trans->delayed_ref_updates;
4786 			trans->delayed_ref_updates = 0;
4787 			if (update)
4788 				btrfs_run_delayed_refs(trans, root, update);
4789 			else
4790 				break;
4791 		}
4792 	}
4793 out:
4794 	btrfs_free_path(path);
4795 	return ret;
4796 }
4797 
4798 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4799 			struct btrfs_root *root,
4800 			struct extent_buffer *node,
4801 			struct extent_buffer *parent)
4802 {
4803 	struct btrfs_path *path;
4804 	int level;
4805 	int parent_level;
4806 	int ret = 0;
4807 	int wret;
4808 
4809 	path = btrfs_alloc_path();
4810 	BUG_ON(!path);
4811 
4812 	btrfs_assert_tree_locked(parent);
4813 	parent_level = btrfs_header_level(parent);
4814 	extent_buffer_get(parent);
4815 	path->nodes[parent_level] = parent;
4816 	path->slots[parent_level] = btrfs_header_nritems(parent);
4817 
4818 	btrfs_assert_tree_locked(node);
4819 	level = btrfs_header_level(node);
4820 	extent_buffer_get(node);
4821 	path->nodes[level] = node;
4822 	path->slots[level] = 0;
4823 
4824 	while (1) {
4825 		wret = walk_down_tree(trans, root, path, &level);
4826 		if (wret < 0)
4827 			ret = wret;
4828 		if (wret != 0)
4829 			break;
4830 
4831 		wret = walk_up_tree(trans, root, path, &level, parent_level);
4832 		if (wret < 0)
4833 			ret = wret;
4834 		if (wret != 0)
4835 			break;
4836 	}
4837 
4838 	btrfs_free_path(path);
4839 	return ret;
4840 }
4841 
4842 #if 0
4843 static unsigned long calc_ra(unsigned long start, unsigned long last,
4844 			     unsigned long nr)
4845 {
4846 	return min(last, start + nr - 1);
4847 }
4848 
4849 static noinline int relocate_inode_pages(struct inode *inode, u64 start,
4850 					 u64 len)
4851 {
4852 	u64 page_start;
4853 	u64 page_end;
4854 	unsigned long first_index;
4855 	unsigned long last_index;
4856 	unsigned long i;
4857 	struct page *page;
4858 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4859 	struct file_ra_state *ra;
4860 	struct btrfs_ordered_extent *ordered;
4861 	unsigned int total_read = 0;
4862 	unsigned int total_dirty = 0;
4863 	int ret = 0;
4864 
4865 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
4866 
4867 	mutex_lock(&inode->i_mutex);
4868 	first_index = start >> PAGE_CACHE_SHIFT;
4869 	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
4870 
4871 	/* make sure the dirty trick played by the caller work */
4872 	ret = invalidate_inode_pages2_range(inode->i_mapping,
4873 					    first_index, last_index);
4874 	if (ret)
4875 		goto out_unlock;
4876 
4877 	file_ra_state_init(ra, inode->i_mapping);
4878 
4879 	for (i = first_index ; i <= last_index; i++) {
4880 		if (total_read % ra->ra_pages == 0) {
4881 			btrfs_force_ra(inode->i_mapping, ra, NULL, i,
4882 				       calc_ra(i, last_index, ra->ra_pages));
4883 		}
4884 		total_read++;
4885 again:
4886 		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
4887 			BUG_ON(1);
4888 		page = grab_cache_page(inode->i_mapping, i);
4889 		if (!page) {
4890 			ret = -ENOMEM;
4891 			goto out_unlock;
4892 		}
4893 		if (!PageUptodate(page)) {
4894 			btrfs_readpage(NULL, page);
4895 			lock_page(page);
4896 			if (!PageUptodate(page)) {
4897 				unlock_page(page);
4898 				page_cache_release(page);
4899 				ret = -EIO;
4900 				goto out_unlock;
4901 			}
4902 		}
4903 		wait_on_page_writeback(page);
4904 
4905 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
4906 		page_end = page_start + PAGE_CACHE_SIZE - 1;
4907 		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
4908 
4909 		ordered = btrfs_lookup_ordered_extent(inode, page_start);
4910 		if (ordered) {
4911 			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4912 			unlock_page(page);
4913 			page_cache_release(page);
4914 			btrfs_start_ordered_extent(inode, ordered, 1);
4915 			btrfs_put_ordered_extent(ordered);
4916 			goto again;
4917 		}
4918 		set_page_extent_mapped(page);
4919 
4920 		if (i == first_index)
4921 			set_extent_bits(io_tree, page_start, page_end,
4922 					EXTENT_BOUNDARY, GFP_NOFS);
4923 		btrfs_set_extent_delalloc(inode, page_start, page_end);
4924 
4925 		set_page_dirty(page);
4926 		total_dirty++;
4927 
4928 		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4929 		unlock_page(page);
4930 		page_cache_release(page);
4931 	}
4932 
4933 out_unlock:
4934 	kfree(ra);
4935 	mutex_unlock(&inode->i_mutex);
4936 	balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
4937 	return ret;
4938 }
4939 
4940 static noinline int relocate_data_extent(struct inode *reloc_inode,
4941 					 struct btrfs_key *extent_key,
4942 					 u64 offset)
4943 {
4944 	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4945 	struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
4946 	struct extent_map *em;
4947 	u64 start = extent_key->objectid - offset;
4948 	u64 end = start + extent_key->offset - 1;
4949 
4950 	em = alloc_extent_map(GFP_NOFS);
4951 	BUG_ON(!em || IS_ERR(em));
4952 
4953 	em->start = start;
4954 	em->len = extent_key->offset;
4955 	em->block_len = extent_key->offset;
4956 	em->block_start = extent_key->objectid;
4957 	em->bdev = root->fs_info->fs_devices->latest_bdev;
4958 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
4959 
4960 	/* setup extent map to cheat btrfs_readpage */
4961 	lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4962 	while (1) {
4963 		int ret;
4964 		spin_lock(&em_tree->lock);
4965 		ret = add_extent_mapping(em_tree, em);
4966 		spin_unlock(&em_tree->lock);
4967 		if (ret != -EEXIST) {
4968 			free_extent_map(em);
4969 			break;
4970 		}
4971 		btrfs_drop_extent_cache(reloc_inode, start, end, 0);
4972 	}
4973 	unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
4974 
4975 	return relocate_inode_pages(reloc_inode, start, extent_key->offset);
4976 }
4977 
4978 struct btrfs_ref_path {
4979 	u64 extent_start;
4980 	u64 nodes[BTRFS_MAX_LEVEL];
4981 	u64 root_objectid;
4982 	u64 root_generation;
4983 	u64 owner_objectid;
4984 	u32 num_refs;
4985 	int lowest_level;
4986 	int current_level;
4987 	int shared_level;
4988 
4989 	struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
4990 	u64 new_nodes[BTRFS_MAX_LEVEL];
4991 };
4992 
4993 struct disk_extent {
4994 	u64 ram_bytes;
4995 	u64 disk_bytenr;
4996 	u64 disk_num_bytes;
4997 	u64 offset;
4998 	u64 num_bytes;
4999 	u8 compression;
5000 	u8 encryption;
5001 	u16 other_encoding;
5002 };
5003 
5004 static int is_cowonly_root(u64 root_objectid)
5005 {
5006 	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
5007 	    root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
5008 	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
5009 	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||
5010 	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5011 	    root_objectid == BTRFS_CSUM_TREE_OBJECTID)
5012 		return 1;
5013 	return 0;
5014 }
5015 
5016 static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
5017 				    struct btrfs_root *extent_root,
5018 				    struct btrfs_ref_path *ref_path,
5019 				    int first_time)
5020 {
5021 	struct extent_buffer *leaf;
5022 	struct btrfs_path *path;
5023 	struct btrfs_extent_ref *ref;
5024 	struct btrfs_key key;
5025 	struct btrfs_key found_key;
5026 	u64 bytenr;
5027 	u32 nritems;
5028 	int level;
5029 	int ret = 1;
5030 
5031 	path = btrfs_alloc_path();
5032 	if (!path)
5033 		return -ENOMEM;
5034 
5035 	if (first_time) {
5036 		ref_path->lowest_level = -1;
5037 		ref_path->current_level = -1;
5038 		ref_path->shared_level = -1;
5039 		goto walk_up;
5040 	}
5041 walk_down:
5042 	level = ref_path->current_level - 1;
5043 	while (level >= -1) {
5044 		u64 parent;
5045 		if (level < ref_path->lowest_level)
5046 			break;
5047 
5048 		if (level >= 0)
5049 			bytenr = ref_path->nodes[level];
5050 		else
5051 			bytenr = ref_path->extent_start;
5052 		BUG_ON(bytenr == 0);
5053 
5054 		parent = ref_path->nodes[level + 1];
5055 		ref_path->nodes[level + 1] = 0;
5056 		ref_path->current_level = level;
5057 		BUG_ON(parent == 0);
5058 
5059 		key.objectid = bytenr;
5060 		key.offset = parent + 1;
5061 		key.type = BTRFS_EXTENT_REF_KEY;
5062 
5063 		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
5064 		if (ret < 0)
5065 			goto out;
5066 		BUG_ON(ret == 0);
5067 
5068 		leaf = path->nodes[0];
5069 		nritems = btrfs_header_nritems(leaf);
5070 		if (path->slots[0] >= nritems) {
5071 			ret = btrfs_next_leaf(extent_root, path);
5072 			if (ret < 0)
5073 				goto out;
5074 			if (ret > 0)
5075 				goto next;
5076 			leaf = path->nodes[0];
5077 		}
5078 
5079 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5080 		if (found_key.objectid == bytenr &&
5081 		    found_key.type == BTRFS_EXTENT_REF_KEY) {
5082 			if (level < ref_path->shared_level)
5083 				ref_path->shared_level = level;
5084 			goto found;
5085 		}
5086 next:
5087 		level--;
5088 		btrfs_release_path(extent_root, path);
5089 		cond_resched();
5090 	}
5091 	/* reached lowest level */
5092 	ret = 1;
5093 	goto out;
5094 walk_up:
5095 	level = ref_path->current_level;
5096 	while (level < BTRFS_MAX_LEVEL - 1) {
5097 		u64 ref_objectid;
5098 
5099 		if (level >= 0)
5100 			bytenr = ref_path->nodes[level];
5101 		else
5102 			bytenr = ref_path->extent_start;
5103 
5104 		BUG_ON(bytenr == 0);
5105 
5106 		key.objectid = bytenr;
5107 		key.offset = 0;
5108 		key.type = BTRFS_EXTENT_REF_KEY;
5109 
5110 		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
5111 		if (ret < 0)
5112 			goto out;
5113 
5114 		leaf = path->nodes[0];
5115 		nritems = btrfs_header_nritems(leaf);
5116 		if (path->slots[0] >= nritems) {
5117 			ret = btrfs_next_leaf(extent_root, path);
5118 			if (ret < 0)
5119 				goto out;
5120 			if (ret > 0) {
5121 				/* the extent was freed by someone */
5122 				if (ref_path->lowest_level == level)
5123 					goto out;
5124 				btrfs_release_path(extent_root, path);
5125 				goto walk_down;
5126 			}
5127 			leaf = path->nodes[0];
5128 		}
5129 
5130 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5131 		if (found_key.objectid != bytenr ||
5132 				found_key.type != BTRFS_EXTENT_REF_KEY) {
5133 			/* the extent was freed by someone */
5134 			if (ref_path->lowest_level == level) {
5135 				ret = 1;
5136 				goto out;
5137 			}
5138 			btrfs_release_path(extent_root, path);
5139 			goto walk_down;
5140 		}
5141 found:
5142 		ref = btrfs_item_ptr(leaf, path->slots[0],
5143 				struct btrfs_extent_ref);
5144 		ref_objectid = btrfs_ref_objectid(leaf, ref);
5145 		if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
5146 			if (first_time) {
5147 				level = (int)ref_objectid;
5148 				BUG_ON(level >= BTRFS_MAX_LEVEL);
5149 				ref_path->lowest_level = level;
5150 				ref_path->current_level = level;
5151 				ref_path->nodes[level] = bytenr;
5152 			} else {
5153 				WARN_ON(ref_objectid != level);
5154 			}
5155 		} else {
5156 			WARN_ON(level != -1);
5157 		}
5158 		first_time = 0;
5159 
5160 		if (ref_path->lowest_level == level) {
5161 			ref_path->owner_objectid = ref_objectid;
5162 			ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
5163 		}
5164 
5165 		/*
5166 		 * the block is tree root or the block isn't in reference
5167 		 * counted tree.
5168 		 */
5169 		if (found_key.objectid == found_key.offset ||
5170 		    is_cowonly_root(btrfs_ref_root(leaf, ref))) {
5171 			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
5172 			ref_path->root_generation =
5173 				btrfs_ref_generation(leaf, ref);
5174 			if (level < 0) {
5175 				/* special reference from the tree log */
5176 				ref_path->nodes[0] = found_key.offset;
5177 				ref_path->current_level = 0;
5178 			}
5179 			ret = 0;
5180 			goto out;
5181 		}
5182 
5183 		level++;
5184 		BUG_ON(ref_path->nodes[level] != 0);
5185 		ref_path->nodes[level] = found_key.offset;
5186 		ref_path->current_level = level;
5187 
5188 		/*
5189 		 * the reference was created in the running transaction,
5190 		 * no need to continue walking up.
5191 		 */
5192 		if (btrfs_ref_generation(leaf, ref) == trans->transid) {
5193 			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
5194 			ref_path->root_generation =
5195 				btrfs_ref_generation(leaf, ref);
5196 			ret = 0;
5197 			goto out;
5198 		}
5199 
5200 		btrfs_release_path(extent_root, path);
5201 		cond_resched();
5202 	}
5203 	/* reached max tree level, but no tree root found. */
5204 	BUG();
5205 out:
5206 	btrfs_free_path(path);
5207 	return ret;
5208 }
5209 
5210 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
5211 				struct btrfs_root *extent_root,
5212 				struct btrfs_ref_path *ref_path,
5213 				u64 extent_start)
5214 {
5215 	memset(ref_path, 0, sizeof(*ref_path));
5216 	ref_path->extent_start = extent_start;
5217 
5218 	return __next_ref_path(trans, extent_root, ref_path, 1);
5219 }
5220 
5221 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
5222 			       struct btrfs_root *extent_root,
5223 			       struct btrfs_ref_path *ref_path)
5224 {
5225 	return __next_ref_path(trans, extent_root, ref_path, 0);
5226 }
5227 
5228 static noinline int get_new_locations(struct inode *reloc_inode,
5229 				      struct btrfs_key *extent_key,
5230 				      u64 offset, int no_fragment,
5231 				      struct disk_extent **extents,
5232 				      int *nr_extents)
5233 {
5234 	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
5235 	struct btrfs_path *path;
5236 	struct btrfs_file_extent_item *fi;
5237 	struct extent_buffer *leaf;
5238 	struct disk_extent *exts = *extents;
5239 	struct btrfs_key found_key;
5240 	u64 cur_pos;
5241 	u64 last_byte;
5242 	u32 nritems;
5243 	int nr = 0;
5244 	int max = *nr_extents;
5245 	int ret;
5246 
5247 	WARN_ON(!no_fragment && *extents);
5248 	if (!exts) {
5249 		max = 1;
5250 		exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
5251 		if (!exts)
5252 			return -ENOMEM;
5253 	}
5254 
5255 	path = btrfs_alloc_path();
5256 	BUG_ON(!path);
5257 
5258 	cur_pos = extent_key->objectid - offset;
5259 	last_byte = extent_key->objectid + extent_key->offset;
5260 	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
5261 				       cur_pos, 0);
5262 	if (ret < 0)
5263 		goto out;
5264 	if (ret > 0) {
5265 		ret = -ENOENT;
5266 		goto out;
5267 	}
5268 
5269 	while (1) {
5270 		leaf = path->nodes[0];
5271 		nritems = btrfs_header_nritems(leaf);
5272 		if (path->slots[0] >= nritems) {
5273 			ret = btrfs_next_leaf(root, path);
5274 			if (ret < 0)
5275 				goto out;
5276 			if (ret > 0)
5277 				break;
5278 			leaf = path->nodes[0];
5279 		}
5280 
5281 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5282 		if (found_key.offset != cur_pos ||
5283 		    found_key.type != BTRFS_EXTENT_DATA_KEY ||
5284 		    found_key.objectid != reloc_inode->i_ino)
5285 			break;
5286 
5287 		fi = btrfs_item_ptr(leaf, path->slots[0],
5288 				    struct btrfs_file_extent_item);
5289 		if (btrfs_file_extent_type(leaf, fi) !=
5290 		    BTRFS_FILE_EXTENT_REG ||
5291 		    btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
5292 			break;
5293 
5294 		if (nr == max) {
5295 			struct disk_extent *old = exts;
5296 			max *= 2;
5297 			exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
5298 			memcpy(exts, old, sizeof(*exts) * nr);
5299 			if (old != *extents)
5300 				kfree(old);
5301 		}
5302 
5303 		exts[nr].disk_bytenr =
5304 			btrfs_file_extent_disk_bytenr(leaf, fi);
5305 		exts[nr].disk_num_bytes =
5306 			btrfs_file_extent_disk_num_bytes(leaf, fi);
5307 		exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
5308 		exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
5309 		exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
5310 		exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
5311 		exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
5312 		exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
5313 									   fi);
5314 		BUG_ON(exts[nr].offset > 0);
5315 		BUG_ON(exts[nr].compression || exts[nr].encryption);
5316 		BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
5317 
5318 		cur_pos += exts[nr].num_bytes;
5319 		nr++;
5320 
5321 		if (cur_pos + offset >= last_byte)
5322 			break;
5323 
5324 		if (no_fragment) {
5325 			ret = 1;
5326 			goto out;
5327 		}
5328 		path->slots[0]++;
5329 	}
5330 
5331 	BUG_ON(cur_pos + offset > last_byte);
5332 	if (cur_pos + offset < last_byte) {
5333 		ret = -ENOENT;
5334 		goto out;
5335 	}
5336 	ret = 0;
5337 out:
5338 	btrfs_free_path(path);
5339 	if (ret) {
5340 		if (exts != *extents)
5341 			kfree(exts);
5342 	} else {
5343 		*extents = exts;
5344 		*nr_extents = nr;
5345 	}
5346 	return ret;
5347 }
5348 
5349 static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
5350 					struct btrfs_root *root,
5351 					struct btrfs_path *path,
5352 					struct btrfs_key *extent_key,
5353 					struct btrfs_key *leaf_key,
5354 					struct btrfs_ref_path *ref_path,
5355 					struct disk_extent *new_extents,
5356 					int nr_extents)
5357 {
5358 	struct extent_buffer *leaf;
5359 	struct btrfs_file_extent_item *fi;
5360 	struct inode *inode = NULL;
5361 	struct btrfs_key key;
5362 	u64 lock_start = 0;
5363 	u64 lock_end = 0;
5364 	u64 num_bytes;
5365 	u64 ext_offset;
5366 	u64 search_end = (u64)-1;
5367 	u32 nritems;
5368 	int nr_scaned = 0;
5369 	int extent_locked = 0;
5370 	int extent_type;
5371 	int ret;
5372 
5373 	memcpy(&key, leaf_key, sizeof(key));
5374 	if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
5375 		if (key.objectid < ref_path->owner_objectid ||
5376 		    (key.objectid == ref_path->owner_objectid &&
5377 		     key.type < BTRFS_EXTENT_DATA_KEY)) {
5378 			key.objectid = ref_path->owner_objectid;
5379 			key.type = BTRFS_EXTENT_DATA_KEY;
5380 			key.offset = 0;
5381 		}
5382 	}
5383 
5384 	while (1) {
5385 		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5386 		if (ret < 0)
5387 			goto out;
5388 
5389 		leaf = path->nodes[0];
5390 		nritems = btrfs_header_nritems(leaf);
5391 next:
5392 		if (extent_locked && ret > 0) {
5393 			/*
5394 			 * the file extent item was modified by someone
5395 			 * before the extent got locked.
5396 			 */
5397 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
5398 				      lock_end, GFP_NOFS);
5399 			extent_locked = 0;
5400 		}
5401 
5402 		if (path->slots[0] >= nritems) {
5403 			if (++nr_scaned > 2)
5404 				break;
5405 
5406 			BUG_ON(extent_locked);
5407 			ret = btrfs_next_leaf(root, path);
5408 			if (ret < 0)
5409 				goto out;
5410 			if (ret > 0)
5411 				break;
5412 			leaf = path->nodes[0];
5413 			nritems = btrfs_header_nritems(leaf);
5414 		}
5415 
5416 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5417 
5418 		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
5419 			if ((key.objectid > ref_path->owner_objectid) ||
5420 			    (key.objectid == ref_path->owner_objectid &&
5421 			     key.type > BTRFS_EXTENT_DATA_KEY) ||
5422 			    key.offset >= search_end)
5423 				break;
5424 		}
5425 
5426 		if (inode && key.objectid != inode->i_ino) {
5427 			BUG_ON(extent_locked);
5428 			btrfs_release_path(root, path);
5429 			mutex_unlock(&inode->i_mutex);
5430 			iput(inode);
5431 			inode = NULL;
5432 			continue;
5433 		}
5434 
5435 		if (key.type != BTRFS_EXTENT_DATA_KEY) {
5436 			path->slots[0]++;
5437 			ret = 1;
5438 			goto next;
5439 		}
5440 		fi = btrfs_item_ptr(leaf, path->slots[0],
5441 				    struct btrfs_file_extent_item);
5442 		extent_type = btrfs_file_extent_type(leaf, fi);
5443 		if ((extent_type != BTRFS_FILE_EXTENT_REG &&
5444 		     extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
5445 		    (btrfs_file_extent_disk_bytenr(leaf, fi) !=
5446 		     extent_key->objectid)) {
5447 			path->slots[0]++;
5448 			ret = 1;
5449 			goto next;
5450 		}
5451 
5452 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
5453 		ext_offset = btrfs_file_extent_offset(leaf, fi);
5454 
5455 		if (search_end == (u64)-1) {
5456 			search_end = key.offset - ext_offset +
5457 				btrfs_file_extent_ram_bytes(leaf, fi);
5458 		}
5459 
5460 		if (!extent_locked) {
5461 			lock_start = key.offset;
5462 			lock_end = lock_start + num_bytes - 1;
5463 		} else {
5464 			if (lock_start > key.offset ||
5465 			    lock_end + 1 < key.offset + num_bytes) {
5466 				unlock_extent(&BTRFS_I(inode)->io_tree,
5467 					      lock_start, lock_end, GFP_NOFS);
5468 				extent_locked = 0;
5469 			}
5470 		}
5471 
5472 		if (!inode) {
5473 			btrfs_release_path(root, path);
5474 
5475 			inode = btrfs_iget_locked(root->fs_info->sb,
5476 						  key.objectid, root);
5477 			if (inode->i_state & I_NEW) {
5478 				BTRFS_I(inode)->root = root;
5479 				BTRFS_I(inode)->location.objectid =
5480 					key.objectid;
5481 				BTRFS_I(inode)->location.type =
5482 					BTRFS_INODE_ITEM_KEY;
5483 				BTRFS_I(inode)->location.offset = 0;
5484 				btrfs_read_locked_inode(inode);
5485 				unlock_new_inode(inode);
5486 			}
5487 			/*
5488 			 * some code call btrfs_commit_transaction while
5489 			 * holding the i_mutex, so we can't use mutex_lock
5490 			 * here.
5491 			 */
5492 			if (is_bad_inode(inode) ||
5493 			    !mutex_trylock(&inode->i_mutex)) {
5494 				iput(inode);
5495 				inode = NULL;
5496 				key.offset = (u64)-1;
5497 				goto skip;
5498 			}
5499 		}
5500 
5501 		if (!extent_locked) {
5502 			struct btrfs_ordered_extent *ordered;
5503 
5504 			btrfs_release_path(root, path);
5505 
5506 			lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
5507 				    lock_end, GFP_NOFS);
5508 			ordered = btrfs_lookup_first_ordered_extent(inode,
5509 								    lock_end);
5510 			if (ordered &&
5511 			    ordered->file_offset <= lock_end &&
5512 			    ordered->file_offset + ordered->len > lock_start) {
5513 				unlock_extent(&BTRFS_I(inode)->io_tree,
5514 					      lock_start, lock_end, GFP_NOFS);
5515 				btrfs_start_ordered_extent(inode, ordered, 1);
5516 				btrfs_put_ordered_extent(ordered);
5517 				key.offset += num_bytes;
5518 				goto skip;
5519 			}
5520 			if (ordered)
5521 				btrfs_put_ordered_extent(ordered);
5522 
5523 			extent_locked = 1;
5524 			continue;
5525 		}
5526 
5527 		if (nr_extents == 1) {
5528 			/* update extent pointer in place */
5529 			btrfs_set_file_extent_disk_bytenr(leaf, fi,
5530 						new_extents[0].disk_bytenr);
5531 			btrfs_set_file_extent_disk_num_bytes(leaf, fi,
5532 						new_extents[0].disk_num_bytes);
5533 			btrfs_mark_buffer_dirty(leaf);
5534 
5535 			btrfs_drop_extent_cache(inode, key.offset,
5536 						key.offset + num_bytes - 1, 0);
5537 
5538 			ret = btrfs_inc_extent_ref(trans, root,
5539 						new_extents[0].disk_bytenr,
5540 						new_extents[0].disk_num_bytes,
5541 						leaf->start,
5542 						root->root_key.objectid,
5543 						trans->transid,
5544 						key.objectid);
5545 			BUG_ON(ret);
5546 
5547 			ret = btrfs_free_extent(trans, root,
5548 						extent_key->objectid,
5549 						extent_key->offset,
5550 						leaf->start,
5551 						btrfs_header_owner(leaf),
5552 						btrfs_header_generation(leaf),
5553 						key.objectid, 0);
5554 			BUG_ON(ret);
5555 
5556 			btrfs_release_path(root, path);
5557 			key.offset += num_bytes;
5558 		} else {
5559 			BUG_ON(1);
5560 #if 0
5561 			u64 alloc_hint;
5562 			u64 extent_len;
5563 			int i;
5564 			/*
5565 			 * drop old extent pointer at first, then insert the
5566 			 * new pointers one bye one
5567 			 */
5568 			btrfs_release_path(root, path);
5569 			ret = btrfs_drop_extents(trans, root, inode, key.offset,
5570 						 key.offset + num_bytes,
5571 						 key.offset, &alloc_hint);
5572 			BUG_ON(ret);
5573 
5574 			for (i = 0; i < nr_extents; i++) {
5575 				if (ext_offset >= new_extents[i].num_bytes) {
5576 					ext_offset -= new_extents[i].num_bytes;
5577 					continue;
5578 				}
5579 				extent_len = min(new_extents[i].num_bytes -
5580 						 ext_offset, num_bytes);
5581 
5582 				ret = btrfs_insert_empty_item(trans, root,
5583 							      path, &key,
5584 							      sizeof(*fi));
5585 				BUG_ON(ret);
5586 
5587 				leaf = path->nodes[0];
5588 				fi = btrfs_item_ptr(leaf, path->slots[0],
5589 						struct btrfs_file_extent_item);
5590 				btrfs_set_file_extent_generation(leaf, fi,
5591 							trans->transid);
5592 				btrfs_set_file_extent_type(leaf, fi,
5593 							BTRFS_FILE_EXTENT_REG);
5594 				btrfs_set_file_extent_disk_bytenr(leaf, fi,
5595 						new_extents[i].disk_bytenr);
5596 				btrfs_set_file_extent_disk_num_bytes(leaf, fi,
5597 						new_extents[i].disk_num_bytes);
5598 				btrfs_set_file_extent_ram_bytes(leaf, fi,
5599 						new_extents[i].ram_bytes);
5600 
5601 				btrfs_set_file_extent_compression(leaf, fi,
5602 						new_extents[i].compression);
5603 				btrfs_set_file_extent_encryption(leaf, fi,
5604 						new_extents[i].encryption);
5605 				btrfs_set_file_extent_other_encoding(leaf, fi,
5606 						new_extents[i].other_encoding);
5607 
5608 				btrfs_set_file_extent_num_bytes(leaf, fi,
5609 							extent_len);
5610 				ext_offset += new_extents[i].offset;
5611 				btrfs_set_file_extent_offset(leaf, fi,
5612 							ext_offset);
5613 				btrfs_mark_buffer_dirty(leaf);
5614 
5615 				btrfs_drop_extent_cache(inode, key.offset,
5616 						key.offset + extent_len - 1, 0);
5617 
5618 				ret = btrfs_inc_extent_ref(trans, root,
5619 						new_extents[i].disk_bytenr,
5620 						new_extents[i].disk_num_bytes,
5621 						leaf->start,
5622 						root->root_key.objectid,
5623 						trans->transid, key.objectid);
5624 				BUG_ON(ret);
5625 				btrfs_release_path(root, path);
5626 
5627 				inode_add_bytes(inode, extent_len);
5628 
5629 				ext_offset = 0;
5630 				num_bytes -= extent_len;
5631 				key.offset += extent_len;
5632 
5633 				if (num_bytes == 0)
5634 					break;
5635 			}
5636 			BUG_ON(i >= nr_extents);
5637 #endif
5638 		}
5639 
5640 		if (extent_locked) {
5641 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
5642 				      lock_end, GFP_NOFS);
5643 			extent_locked = 0;
5644 		}
5645 skip:
5646 		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
5647 		    key.offset >= search_end)
5648 			break;
5649 
5650 		cond_resched();
5651 	}
5652 	ret = 0;
5653 out:
5654 	btrfs_release_path(root, path);
5655 	if (inode) {
5656 		mutex_unlock(&inode->i_mutex);
5657 		if (extent_locked) {
5658 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
5659 				      lock_end, GFP_NOFS);
5660 		}
5661 		iput(inode);
5662 	}
5663 	return ret;
5664 }
5665 
5666 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
5667 			       struct btrfs_root *root,
5668 			       struct extent_buffer *buf, u64 orig_start)
5669 {
5670 	int level;
5671 	int ret;
5672 
5673 	BUG_ON(btrfs_header_generation(buf) != trans->transid);
5674 	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5675 
5676 	level = btrfs_header_level(buf);
5677 	if (level == 0) {
5678 		struct btrfs_leaf_ref *ref;
5679 		struct btrfs_leaf_ref *orig_ref;
5680 
5681 		orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
5682 		if (!orig_ref)
5683 			return -ENOENT;
5684 
5685 		ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
5686 		if (!ref) {
5687 			btrfs_free_leaf_ref(root, orig_ref);
5688 			return -ENOMEM;
5689 		}
5690 
5691 		ref->nritems = orig_ref->nritems;
5692 		memcpy(ref->extents, orig_ref->extents,
5693 			sizeof(ref->extents[0]) * ref->nritems);
5694 
5695 		btrfs_free_leaf_ref(root, orig_ref);
5696 
5697 		ref->root_gen = trans->transid;
5698 		ref->bytenr = buf->start;
5699 		ref->owner = btrfs_header_owner(buf);
5700 		ref->generation = btrfs_header_generation(buf);
5701 
5702 		ret = btrfs_add_leaf_ref(root, ref, 0);
5703 		WARN_ON(ret);
5704 		btrfs_free_leaf_ref(root, ref);
5705 	}
5706 	return 0;
5707 }
5708 
5709 static noinline int invalidate_extent_cache(struct btrfs_root *root,
5710 					struct extent_buffer *leaf,
5711 					struct btrfs_block_group_cache *group,
5712 					struct btrfs_root *target_root)
5713 {
5714 	struct btrfs_key key;
5715 	struct inode *inode = NULL;
5716 	struct btrfs_file_extent_item *fi;
5717 	u64 num_bytes;
5718 	u64 skip_objectid = 0;
5719 	u32 nritems;
5720 	u32 i;
5721 
5722 	nritems = btrfs_header_nritems(leaf);
5723 	for (i = 0; i < nritems; i++) {
5724 		btrfs_item_key_to_cpu(leaf, &key, i);
5725 		if (key.objectid == skip_objectid ||
5726 		    key.type != BTRFS_EXTENT_DATA_KEY)
5727 			continue;
5728 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5729 		if (btrfs_file_extent_type(leaf, fi) ==
5730 		    BTRFS_FILE_EXTENT_INLINE)
5731 			continue;
5732 		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
5733 			continue;
5734 		if (!inode || inode->i_ino != key.objectid) {
5735 			iput(inode);
5736 			inode = btrfs_ilookup(target_root->fs_info->sb,
5737 					      key.objectid, target_root, 1);
5738 		}
5739 		if (!inode) {
5740 			skip_objectid = key.objectid;
5741 			continue;
5742 		}
5743 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
5744 
5745 		lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
5746 			    key.offset + num_bytes - 1, GFP_NOFS);
5747 		btrfs_drop_extent_cache(inode, key.offset,
5748 					key.offset + num_bytes - 1, 1);
5749 		unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
5750 			      key.offset + num_bytes - 1, GFP_NOFS);
5751 		cond_resched();
5752 	}
5753 	iput(inode);
5754 	return 0;
5755 }
5756 
5757 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
5758 					struct btrfs_root *root,
5759 					struct extent_buffer *leaf,
5760 					struct btrfs_block_group_cache *group,
5761 					struct inode *reloc_inode)
5762 {
5763 	struct btrfs_key key;
5764 	struct btrfs_key extent_key;
5765 	struct btrfs_file_extent_item *fi;
5766 	struct btrfs_leaf_ref *ref;
5767 	struct disk_extent *new_extent;
5768 	u64 bytenr;
5769 	u64 num_bytes;
5770 	u32 nritems;
5771 	u32 i;
5772 	int ext_index;
5773 	int nr_extent;
5774 	int ret;
5775 
5776 	new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
5777 	BUG_ON(!new_extent);
5778 
5779 	ref = btrfs_lookup_leaf_ref(root, leaf->start);
5780 	BUG_ON(!ref);
5781 
5782 	ext_index = -1;
5783 	nritems = btrfs_header_nritems(leaf);
5784 	for (i = 0; i < nritems; i++) {
5785 		btrfs_item_key_to_cpu(leaf, &key, i);
5786 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
5787 			continue;
5788 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
5789 		if (btrfs_file_extent_type(leaf, fi) ==
5790 		    BTRFS_FILE_EXTENT_INLINE)
5791 			continue;
5792 		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5793 		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
5794 		if (bytenr == 0)
5795 			continue;
5796 
5797 		ext_index++;
5798 		if (bytenr >= group->key.objectid + group->key.offset ||
5799 		    bytenr + num_bytes <= group->key.objectid)
5800 			continue;
5801 
5802 		extent_key.objectid = bytenr;
5803 		extent_key.offset = num_bytes;
5804 		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
5805 		nr_extent = 1;
5806 		ret = get_new_locations(reloc_inode, &extent_key,
5807 					group->key.objectid, 1,
5808 					&new_extent, &nr_extent);
5809 		if (ret > 0)
5810 			continue;
5811 		BUG_ON(ret < 0);
5812 
5813 		BUG_ON(ref->extents[ext_index].bytenr != bytenr);
5814 		BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
5815 		ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
5816 		ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
5817 
5818 		btrfs_set_file_extent_disk_bytenr(leaf, fi,
5819 						new_extent->disk_bytenr);
5820 		btrfs_set_file_extent_disk_num_bytes(leaf, fi,
5821 						new_extent->disk_num_bytes);
5822 		btrfs_mark_buffer_dirty(leaf);
5823 
5824 		ret = btrfs_inc_extent_ref(trans, root,
5825 					new_extent->disk_bytenr,
5826 					new_extent->disk_num_bytes,
5827 					leaf->start,
5828 					root->root_key.objectid,
5829 					trans->transid, key.objectid);
5830 		BUG_ON(ret);
5831 
5832 		ret = btrfs_free_extent(trans, root,
5833 					bytenr, num_bytes, leaf->start,
5834 					btrfs_header_owner(leaf),
5835 					btrfs_header_generation(leaf),
5836 					key.objectid, 0);
5837 		BUG_ON(ret);
5838 		cond_resched();
5839 	}
5840 	kfree(new_extent);
5841 	BUG_ON(ext_index + 1 != ref->nritems);
5842 	btrfs_free_leaf_ref(root, ref);
5843 	return 0;
5844 }
5845 
5846 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
5847 			  struct btrfs_root *root)
5848 {
5849 	struct btrfs_root *reloc_root;
5850 	int ret;
5851 
5852 	if (root->reloc_root) {
5853 		reloc_root = root->reloc_root;
5854 		root->reloc_root = NULL;
5855 		list_add(&reloc_root->dead_list,
5856 			 &root->fs_info->dead_reloc_roots);
5857 
5858 		btrfs_set_root_bytenr(&reloc_root->root_item,
5859 				      reloc_root->node->start);
5860 		btrfs_set_root_level(&root->root_item,
5861 				     btrfs_header_level(reloc_root->node));
5862 		memset(&reloc_root->root_item.drop_progress, 0,
5863 			sizeof(struct btrfs_disk_key));
5864 		reloc_root->root_item.drop_level = 0;
5865 
5866 		ret = btrfs_update_root(trans, root->fs_info->tree_root,
5867 					&reloc_root->root_key,
5868 					&reloc_root->root_item);
5869 		BUG_ON(ret);
5870 	}
5871 	return 0;
5872 }
5873 
5874 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
5875 {
5876 	struct btrfs_trans_handle *trans;
5877 	struct btrfs_root *reloc_root;
5878 	struct btrfs_root *prev_root = NULL;
5879 	struct list_head dead_roots;
5880 	int ret;
5881 	unsigned long nr;
5882 
5883 	INIT_LIST_HEAD(&dead_roots);
5884 	list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
5885 
5886 	while (!list_empty(&dead_roots)) {
5887 		reloc_root = list_entry(dead_roots.prev,
5888 					struct btrfs_root, dead_list);
5889 		list_del_init(&reloc_root->dead_list);
5890 
5891 		BUG_ON(reloc_root->commit_root != NULL);
5892 		while (1) {
5893 			trans = btrfs_join_transaction(root, 1);
5894 			BUG_ON(!trans);
5895 
5896 			mutex_lock(&root->fs_info->drop_mutex);
5897 			ret = btrfs_drop_snapshot(trans, reloc_root);
5898 			if (ret != -EAGAIN)
5899 				break;
5900 			mutex_unlock(&root->fs_info->drop_mutex);
5901 
5902 			nr = trans->blocks_used;
5903 			ret = btrfs_end_transaction(trans, root);
5904 			BUG_ON(ret);
5905 			btrfs_btree_balance_dirty(root, nr);
5906 		}
5907 
5908 		free_extent_buffer(reloc_root->node);
5909 
5910 		ret = btrfs_del_root(trans, root->fs_info->tree_root,
5911 				     &reloc_root->root_key);
5912 		BUG_ON(ret);
5913 		mutex_unlock(&root->fs_info->drop_mutex);
5914 
5915 		nr = trans->blocks_used;
5916 		ret = btrfs_end_transaction(trans, root);
5917 		BUG_ON(ret);
5918 		btrfs_btree_balance_dirty(root, nr);
5919 
5920 		kfree(prev_root);
5921 		prev_root = reloc_root;
5922 	}
5923 	if (prev_root) {
5924 		btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
5925 		kfree(prev_root);
5926 	}
5927 	return 0;
5928 }
5929 
5930 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
5931 {
5932 	list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
5933 	return 0;
5934 }
5935 
5936 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
5937 {
5938 	struct btrfs_root *reloc_root;
5939 	struct btrfs_trans_handle *trans;
5940 	struct btrfs_key location;
5941 	int found;
5942 	int ret;
5943 
5944 	mutex_lock(&root->fs_info->tree_reloc_mutex);
5945 	ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
5946 	BUG_ON(ret);
5947 	found = !list_empty(&root->fs_info->dead_reloc_roots);
5948 	mutex_unlock(&root->fs_info->tree_reloc_mutex);
5949 
5950 	if (found) {
5951 		trans = btrfs_start_transaction(root, 1);
5952 		BUG_ON(!trans);
5953 		ret = btrfs_commit_transaction(trans, root);
5954 		BUG_ON(ret);
5955 	}
5956 
5957 	location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5958 	location.offset = (u64)-1;
5959 	location.type = BTRFS_ROOT_ITEM_KEY;
5960 
5961 	reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
5962 	BUG_ON(!reloc_root);
5963 	btrfs_orphan_cleanup(reloc_root);
5964 	return 0;
5965 }
5966 
5967 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
5968 				    struct btrfs_root *root)
5969 {
5970 	struct btrfs_root *reloc_root;
5971 	struct extent_buffer *eb;
5972 	struct btrfs_root_item *root_item;
5973 	struct btrfs_key root_key;
5974 	int ret;
5975 
5976 	BUG_ON(!root->ref_cows);
5977 	if (root->reloc_root)
5978 		return 0;
5979 
5980 	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
5981 	BUG_ON(!root_item);
5982 
5983 	ret = btrfs_copy_root(trans, root, root->commit_root,
5984 			      &eb, BTRFS_TREE_RELOC_OBJECTID);
5985 	BUG_ON(ret);
5986 
5987 	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
5988 	root_key.offset = root->root_key.objectid;
5989 	root_key.type = BTRFS_ROOT_ITEM_KEY;
5990 
5991 	memcpy(root_item, &root->root_item, sizeof(root_item));
5992 	btrfs_set_root_refs(root_item, 0);
5993 	btrfs_set_root_bytenr(root_item, eb->start);
5994 	btrfs_set_root_level(root_item, btrfs_header_level(eb));
5995 	btrfs_set_root_generation(root_item, trans->transid);
5996 
5997 	btrfs_tree_unlock(eb);
5998 	free_extent_buffer(eb);
5999 
6000 	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
6001 				&root_key, root_item);
6002 	BUG_ON(ret);
6003 	kfree(root_item);
6004 
6005 	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
6006 						 &root_key);
6007 	BUG_ON(!reloc_root);
6008 	reloc_root->last_trans = trans->transid;
6009 	reloc_root->commit_root = NULL;
6010 	reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
6011 
6012 	root->reloc_root = reloc_root;
6013 	return 0;
6014 }
6015 
6016 /*
6017  * Core function of space balance.
6018  *
6019  * The idea is using reloc trees to relocate tree blocks in reference
6020  * counted roots. There is one reloc tree for each subvol, and all
6021  * reloc trees share same root key objectid. Reloc trees are snapshots
6022  * of the latest committed roots of subvols (root->commit_root).
6023  *
6024  * To relocate a tree block referenced by a subvol, there are two steps.
6025  * COW the block through subvol's reloc tree, then update block pointer
6026  * in the subvol to point to the new block. Since all reloc trees share
6027  * same root key objectid, doing special handing for tree blocks owned
6028  * by them is easy. Once a tree block has been COWed in one reloc tree,
6029  * we can use the resulting new block directly when the same block is
6030  * required to COW again through other reloc trees. By this way, relocated
6031  * tree blocks are shared between reloc trees, so they are also shared
6032  * between subvols.
6033  */
6034 static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
6035 				      struct btrfs_root *root,
6036 				      struct btrfs_path *path,
6037 				      struct btrfs_key *first_key,
6038 				      struct btrfs_ref_path *ref_path,
6039 				      struct btrfs_block_group_cache *group,
6040 				      struct inode *reloc_inode)
6041 {
6042 	struct btrfs_root *reloc_root;
6043 	struct extent_buffer *eb = NULL;
6044 	struct btrfs_key *keys;
6045 	u64 *nodes;
6046 	int level;
6047 	int shared_level;
6048 	int lowest_level = 0;
6049 	int ret;
6050 
6051 	if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
6052 		lowest_level = ref_path->owner_objectid;
6053 
6054 	if (!root->ref_cows) {
6055 		path->lowest_level = lowest_level;
6056 		ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
6057 		BUG_ON(ret < 0);
6058 		path->lowest_level = 0;
6059 		btrfs_release_path(root, path);
6060 		return 0;
6061 	}
6062 
6063 	mutex_lock(&root->fs_info->tree_reloc_mutex);
6064 	ret = init_reloc_tree(trans, root);
6065 	BUG_ON(ret);
6066 	reloc_root = root->reloc_root;
6067 
6068 	shared_level = ref_path->shared_level;
6069 	ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
6070 
6071 	keys = ref_path->node_keys;
6072 	nodes = ref_path->new_nodes;
6073 	memset(&keys[shared_level + 1], 0,
6074 	       sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
6075 	memset(&nodes[shared_level + 1], 0,
6076 	       sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
6077 
6078 	if (nodes[lowest_level] == 0) {
6079 		path->lowest_level = lowest_level;
6080 		ret = btrfs_search_slot(trans, reloc_root, first_key, path,
6081 					0, 1);
6082 		BUG_ON(ret);
6083 		for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
6084 			eb = path->nodes[level];
6085 			if (!eb || eb == reloc_root->node)
6086 				break;
6087 			nodes[level] = eb->start;
6088 			if (level == 0)
6089 				btrfs_item_key_to_cpu(eb, &keys[level], 0);
6090 			else
6091 				btrfs_node_key_to_cpu(eb, &keys[level], 0);
6092 		}
6093 		if (nodes[0] &&
6094 		    ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
6095 			eb = path->nodes[0];
6096 			ret = replace_extents_in_leaf(trans, reloc_root, eb,
6097 						      group, reloc_inode);
6098 			BUG_ON(ret);
6099 		}
6100 		btrfs_release_path(reloc_root, path);
6101 	} else {
6102 		ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
6103 				       lowest_level);
6104 		BUG_ON(ret);
6105 	}
6106 
6107 	/*
6108 	 * replace tree blocks in the fs tree with tree blocks in
6109 	 * the reloc tree.
6110 	 */
6111 	ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
6112 	BUG_ON(ret < 0);
6113 
6114 	if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
6115 		ret = btrfs_search_slot(trans, reloc_root, first_key, path,
6116 					0, 0);
6117 		BUG_ON(ret);
6118 		extent_buffer_get(path->nodes[0]);
6119 		eb = path->nodes[0];
6120 		btrfs_release_path(reloc_root, path);
6121 		ret = invalidate_extent_cache(reloc_root, eb, group, root);
6122 		BUG_ON(ret);
6123 		free_extent_buffer(eb);
6124 	}
6125 
6126 	mutex_unlock(&root->fs_info->tree_reloc_mutex);
6127 	path->lowest_level = 0;
6128 	return 0;
6129 }
6130 
6131 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
6132 					struct btrfs_root *root,
6133 					struct btrfs_path *path,
6134 					struct btrfs_key *first_key,
6135 					struct btrfs_ref_path *ref_path)
6136 {
6137 	int ret;
6138 
6139 	ret = relocate_one_path(trans, root, path, first_key,
6140 				ref_path, NULL, NULL);
6141 	BUG_ON(ret);
6142 
6143 	return 0;
6144 }
6145 
6146 static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
6147 				    struct btrfs_root *extent_root,
6148 				    struct btrfs_path *path,
6149 				    struct btrfs_key *extent_key)
6150 {
6151 	int ret;
6152 
6153 	ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
6154 	if (ret)
6155 		goto out;
6156 	ret = btrfs_del_item(trans, extent_root, path);
6157 out:
6158 	btrfs_release_path(extent_root, path);
6159 	return ret;
6160 }
6161 
6162 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
6163 						struct btrfs_ref_path *ref_path)
6164 {
6165 	struct btrfs_key root_key;
6166 
6167 	root_key.objectid = ref_path->root_objectid;
6168 	root_key.type = BTRFS_ROOT_ITEM_KEY;
6169 	if (is_cowonly_root(ref_path->root_objectid))
6170 		root_key.offset = 0;
6171 	else
6172 		root_key.offset = (u64)-1;
6173 
6174 	return btrfs_read_fs_root_no_name(fs_info, &root_key);
6175 }
6176 
6177 static noinline int relocate_one_extent(struct btrfs_root *extent_root,
6178 					struct btrfs_path *path,
6179 					struct btrfs_key *extent_key,
6180 					struct btrfs_block_group_cache *group,
6181 					struct inode *reloc_inode, int pass)
6182 {
6183 	struct btrfs_trans_handle *trans;
6184 	struct btrfs_root *found_root;
6185 	struct btrfs_ref_path *ref_path = NULL;
6186 	struct disk_extent *new_extents = NULL;
6187 	int nr_extents = 0;
6188 	int loops;
6189 	int ret;
6190 	int level;
6191 	struct btrfs_key first_key;
6192 	u64 prev_block = 0;
6193 
6194 
6195 	trans = btrfs_start_transaction(extent_root, 1);
6196 	BUG_ON(!trans);
6197 
6198 	if (extent_key->objectid == 0) {
6199 		ret = del_extent_zero(trans, extent_root, path, extent_key);
6200 		goto out;
6201 	}
6202 
6203 	ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
6204 	if (!ref_path) {
6205 		ret = -ENOMEM;
6206 		goto out;
6207 	}
6208 
6209 	for (loops = 0; ; loops++) {
6210 		if (loops == 0) {
6211 			ret = btrfs_first_ref_path(trans, extent_root, ref_path,
6212 						   extent_key->objectid);
6213 		} else {
6214 			ret = btrfs_next_ref_path(trans, extent_root, ref_path);
6215 		}
6216 		if (ret < 0)
6217 			goto out;
6218 		if (ret > 0)
6219 			break;
6220 
6221 		if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6222 		    ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
6223 			continue;
6224 
6225 		found_root = read_ref_root(extent_root->fs_info, ref_path);
6226 		BUG_ON(!found_root);
6227 		/*
6228 		 * for reference counted tree, only process reference paths
6229 		 * rooted at the latest committed root.
6230 		 */
6231 		if (found_root->ref_cows &&
6232 		    ref_path->root_generation != found_root->root_key.offset)
6233 			continue;
6234 
6235 		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
6236 			if (pass == 0) {
6237 				/*
6238 				 * copy data extents to new locations
6239 				 */
6240 				u64 group_start = group->key.objectid;
6241 				ret = relocate_data_extent(reloc_inode,
6242 							   extent_key,
6243 							   group_start);
6244 				if (ret < 0)
6245 					goto out;
6246 				break;
6247 			}
6248 			level = 0;
6249 		} else {
6250 			level = ref_path->owner_objectid;
6251 		}
6252 
6253 		if (prev_block != ref_path->nodes[level]) {
6254 			struct extent_buffer *eb;
6255 			u64 block_start = ref_path->nodes[level];
6256 			u64 block_size = btrfs_level_size(found_root, level);
6257 
6258 			eb = read_tree_block(found_root, block_start,
6259 					     block_size, 0);
6260 			btrfs_tree_lock(eb);
6261 			BUG_ON(level != btrfs_header_level(eb));
6262 
6263 			if (level == 0)
6264 				btrfs_item_key_to_cpu(eb, &first_key, 0);
6265 			else
6266 				btrfs_node_key_to_cpu(eb, &first_key, 0);
6267 
6268 			btrfs_tree_unlock(eb);
6269 			free_extent_buffer(eb);
6270 			prev_block = block_start;
6271 		}
6272 
6273 		mutex_lock(&extent_root->fs_info->trans_mutex);
6274 		btrfs_record_root_in_trans(found_root);
6275 		mutex_unlock(&extent_root->fs_info->trans_mutex);
6276 		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
6277 			/*
6278 			 * try to update data extent references while
6279 			 * keeping metadata shared between snapshots.
6280 			 */
6281 			if (pass == 1) {
6282 				ret = relocate_one_path(trans, found_root,
6283 						path, &first_key, ref_path,
6284 						group, reloc_inode);
6285 				if (ret < 0)
6286 					goto out;
6287 				continue;
6288 			}
6289 			/*
6290 			 * use fallback method to process the remaining
6291 			 * references.
6292 			 */
6293 			if (!new_extents) {
6294 				u64 group_start = group->key.objectid;
6295 				new_extents = kmalloc(sizeof(*new_extents),
6296 						      GFP_NOFS);
6297 				nr_extents = 1;
6298 				ret = get_new_locations(reloc_inode,
6299 							extent_key,
6300 							group_start, 1,
6301 							&new_extents,
6302 							&nr_extents);
6303 				if (ret)
6304 					goto out;
6305 			}
6306 			ret = replace_one_extent(trans, found_root,
6307 						path, extent_key,
6308 						&first_key, ref_path,
6309 						new_extents, nr_extents);
6310 		} else {
6311 			ret = relocate_tree_block(trans, found_root, path,
6312 						  &first_key, ref_path);
6313 		}
6314 		if (ret < 0)
6315 			goto out;
6316 	}
6317 	ret = 0;
6318 out:
6319 	btrfs_end_transaction(trans, extent_root);
6320 	kfree(new_extents);
6321 	kfree(ref_path);
6322 	return ret;
6323 }
6324 #endif
6325 
6326 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6327 {
6328 	u64 num_devices;
6329 	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
6330 		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
6331 
6332 	num_devices = root->fs_info->fs_devices->rw_devices;
6333 	if (num_devices == 1) {
6334 		stripped |= BTRFS_BLOCK_GROUP_DUP;
6335 		stripped = flags & ~stripped;
6336 
6337 		/* turn raid0 into single device chunks */
6338 		if (flags & BTRFS_BLOCK_GROUP_RAID0)
6339 			return stripped;
6340 
6341 		/* turn mirroring into duplication */
6342 		if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
6343 			     BTRFS_BLOCK_GROUP_RAID10))
6344 			return stripped | BTRFS_BLOCK_GROUP_DUP;
6345 		return flags;
6346 	} else {
6347 		/* they already had raid on here, just return */
6348 		if (flags & stripped)
6349 			return flags;
6350 
6351 		stripped |= BTRFS_BLOCK_GROUP_DUP;
6352 		stripped = flags & ~stripped;
6353 
6354 		/* switch duplicated blocks with raid1 */
6355 		if (flags & BTRFS_BLOCK_GROUP_DUP)
6356 			return stripped | BTRFS_BLOCK_GROUP_RAID1;
6357 
6358 		/* turn single device chunks into raid0 */
6359 		return stripped | BTRFS_BLOCK_GROUP_RAID0;
6360 	}
6361 	return flags;
6362 }
6363 
6364 static int __alloc_chunk_for_shrink(struct btrfs_root *root,
6365 		     struct btrfs_block_group_cache *shrink_block_group,
6366 		     int force)
6367 {
6368 	struct btrfs_trans_handle *trans;
6369 	u64 new_alloc_flags;
6370 	u64 calc;
6371 
6372 	spin_lock(&shrink_block_group->lock);
6373 	if (btrfs_block_group_used(&shrink_block_group->item) +
6374 	    shrink_block_group->reserved > 0) {
6375 		spin_unlock(&shrink_block_group->lock);
6376 
6377 		trans = btrfs_start_transaction(root, 1);
6378 		spin_lock(&shrink_block_group->lock);
6379 
6380 		new_alloc_flags = update_block_group_flags(root,
6381 						   shrink_block_group->flags);
6382 		if (new_alloc_flags != shrink_block_group->flags) {
6383 			calc =
6384 			     btrfs_block_group_used(&shrink_block_group->item);
6385 		} else {
6386 			calc = shrink_block_group->key.offset;
6387 		}
6388 		spin_unlock(&shrink_block_group->lock);
6389 
6390 		do_chunk_alloc(trans, root->fs_info->extent_root,
6391 			       calc + 2 * 1024 * 1024, new_alloc_flags, force);
6392 
6393 		btrfs_end_transaction(trans, root);
6394 	} else
6395 		spin_unlock(&shrink_block_group->lock);
6396 	return 0;
6397 }
6398 
6399 
6400 int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6401 					 struct btrfs_block_group_cache *group)
6402 
6403 {
6404 	__alloc_chunk_for_shrink(root, group, 1);
6405 	set_block_group_readonly(group);
6406 	return 0;
6407 }
6408 
6409 #if 0
6410 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
6411 				 struct btrfs_root *root,
6412 				 u64 objectid, u64 size)
6413 {
6414 	struct btrfs_path *path;
6415 	struct btrfs_inode_item *item;
6416 	struct extent_buffer *leaf;
6417 	int ret;
6418 
6419 	path = btrfs_alloc_path();
6420 	if (!path)
6421 		return -ENOMEM;
6422 
6423 	path->leave_spinning = 1;
6424 	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
6425 	if (ret)
6426 		goto out;
6427 
6428 	leaf = path->nodes[0];
6429 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
6430 	memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
6431 	btrfs_set_inode_generation(leaf, item, 1);
6432 	btrfs_set_inode_size(leaf, item, size);
6433 	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
6434 	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
6435 	btrfs_mark_buffer_dirty(leaf);
6436 	btrfs_release_path(root, path);
6437 out:
6438 	btrfs_free_path(path);
6439 	return ret;
6440 }
6441 
6442 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
6443 					struct btrfs_block_group_cache *group)
6444 {
6445 	struct inode *inode = NULL;
6446 	struct btrfs_trans_handle *trans;
6447 	struct btrfs_root *root;
6448 	struct btrfs_key root_key;
6449 	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
6450 	int err = 0;
6451 
6452 	root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
6453 	root_key.type = BTRFS_ROOT_ITEM_KEY;
6454 	root_key.offset = (u64)-1;
6455 	root = btrfs_read_fs_root_no_name(fs_info, &root_key);
6456 	if (IS_ERR(root))
6457 		return ERR_CAST(root);
6458 
6459 	trans = btrfs_start_transaction(root, 1);
6460 	BUG_ON(!trans);
6461 
6462 	err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
6463 	if (err)
6464 		goto out;
6465 
6466 	err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
6467 	BUG_ON(err);
6468 
6469 	err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
6470 				       group->key.offset, 0, group->key.offset,
6471 				       0, 0, 0);
6472 	BUG_ON(err);
6473 
6474 	inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
6475 	if (inode->i_state & I_NEW) {
6476 		BTRFS_I(inode)->root = root;
6477 		BTRFS_I(inode)->location.objectid = objectid;
6478 		BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
6479 		BTRFS_I(inode)->location.offset = 0;
6480 		btrfs_read_locked_inode(inode);
6481 		unlock_new_inode(inode);
6482 		BUG_ON(is_bad_inode(inode));
6483 	} else {
6484 		BUG_ON(1);
6485 	}
6486 	BTRFS_I(inode)->index_cnt = group->key.objectid;
6487 
6488 	err = btrfs_orphan_add(trans, inode);
6489 out:
6490 	btrfs_end_transaction(trans, root);
6491 	if (err) {
6492 		if (inode)
6493 			iput(inode);
6494 		inode = ERR_PTR(err);
6495 	}
6496 	return inode;
6497 }
6498 
6499 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
6500 {
6501 
6502 	struct btrfs_ordered_sum *sums;
6503 	struct btrfs_sector_sum *sector_sum;
6504 	struct btrfs_ordered_extent *ordered;
6505 	struct btrfs_root *root = BTRFS_I(inode)->root;
6506 	struct list_head list;
6507 	size_t offset;
6508 	int ret;
6509 	u64 disk_bytenr;
6510 
6511 	INIT_LIST_HEAD(&list);
6512 
6513 	ordered = btrfs_lookup_ordered_extent(inode, file_pos);
6514 	BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
6515 
6516 	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
6517 	ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
6518 				       disk_bytenr + len - 1, &list);
6519 
6520 	while (!list_empty(&list)) {
6521 		sums = list_entry(list.next, struct btrfs_ordered_sum, list);
6522 		list_del_init(&sums->list);
6523 
6524 		sector_sum = sums->sums;
6525 		sums->bytenr = ordered->start;
6526 
6527 		offset = 0;
6528 		while (offset < sums->len) {
6529 			sector_sum->bytenr += ordered->start - disk_bytenr;
6530 			sector_sum++;
6531 			offset += root->sectorsize;
6532 		}
6533 
6534 		btrfs_add_ordered_sum(inode, ordered, sums);
6535 	}
6536 	btrfs_put_ordered_extent(ordered);
6537 	return 0;
6538 }
6539 
6540 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
6541 {
6542 	struct btrfs_trans_handle *trans;
6543 	struct btrfs_path *path;
6544 	struct btrfs_fs_info *info = root->fs_info;
6545 	struct extent_buffer *leaf;
6546 	struct inode *reloc_inode;
6547 	struct btrfs_block_group_cache *block_group;
6548 	struct btrfs_key key;
6549 	u64 skipped;
6550 	u64 cur_byte;
6551 	u64 total_found;
6552 	u32 nritems;
6553 	int ret;
6554 	int progress;
6555 	int pass = 0;
6556 
6557 	root = root->fs_info->extent_root;
6558 
6559 	block_group = btrfs_lookup_block_group(info, group_start);
6560 	BUG_ON(!block_group);
6561 
6562 	printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
6563 	       (unsigned long long)block_group->key.objectid,
6564 	       (unsigned long long)block_group->flags);
6565 
6566 	path = btrfs_alloc_path();
6567 	BUG_ON(!path);
6568 
6569 	reloc_inode = create_reloc_inode(info, block_group);
6570 	BUG_ON(IS_ERR(reloc_inode));
6571 
6572 	__alloc_chunk_for_shrink(root, block_group, 1);
6573 	set_block_group_readonly(block_group);
6574 
6575 	btrfs_start_delalloc_inodes(info->tree_root);
6576 	btrfs_wait_ordered_extents(info->tree_root, 0);
6577 again:
6578 	skipped = 0;
6579 	total_found = 0;
6580 	progress = 0;
6581 	key.objectid = block_group->key.objectid;
6582 	key.offset = 0;
6583 	key.type = 0;
6584 	cur_byte = key.objectid;
6585 
6586 	trans = btrfs_start_transaction(info->tree_root, 1);
6587 	btrfs_commit_transaction(trans, info->tree_root);
6588 
6589 	mutex_lock(&root->fs_info->cleaner_mutex);
6590 	btrfs_clean_old_snapshots(info->tree_root);
6591 	btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
6592 	mutex_unlock(&root->fs_info->cleaner_mutex);
6593 
6594 	trans = btrfs_start_transaction(info->tree_root, 1);
6595 	btrfs_commit_transaction(trans, info->tree_root);
6596 
6597 	while (1) {
6598 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6599 		if (ret < 0)
6600 			goto out;
6601 next:
6602 		leaf = path->nodes[0];
6603 		nritems = btrfs_header_nritems(leaf);
6604 		if (path->slots[0] >= nritems) {
6605 			ret = btrfs_next_leaf(root, path);
6606 			if (ret < 0)
6607 				goto out;
6608 			if (ret == 1) {
6609 				ret = 0;
6610 				break;
6611 			}
6612 			leaf = path->nodes[0];
6613 			nritems = btrfs_header_nritems(leaf);
6614 		}
6615 
6616 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6617 
6618 		if (key.objectid >= block_group->key.objectid +
6619 		    block_group->key.offset)
6620 			break;
6621 
6622 		if (progress && need_resched()) {
6623 			btrfs_release_path(root, path);
6624 			cond_resched();
6625 			progress = 0;
6626 			continue;
6627 		}
6628 		progress = 1;
6629 
6630 		if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
6631 		    key.objectid + key.offset <= cur_byte) {
6632 			path->slots[0]++;
6633 			goto next;
6634 		}
6635 
6636 		total_found++;
6637 		cur_byte = key.objectid + key.offset;
6638 		btrfs_release_path(root, path);
6639 
6640 		__alloc_chunk_for_shrink(root, block_group, 0);
6641 		ret = relocate_one_extent(root, path, &key, block_group,
6642 					  reloc_inode, pass);
6643 		BUG_ON(ret < 0);
6644 		if (ret > 0)
6645 			skipped++;
6646 
6647 		key.objectid = cur_byte;
6648 		key.type = 0;
6649 		key.offset = 0;
6650 	}
6651 
6652 	btrfs_release_path(root, path);
6653 
6654 	if (pass == 0) {
6655 		btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
6656 		invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
6657 	}
6658 
6659 	if (total_found > 0) {
6660 		printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
6661 		       (unsigned long long)total_found, pass);
6662 		pass++;
6663 		if (total_found == skipped && pass > 2) {
6664 			iput(reloc_inode);
6665 			reloc_inode = create_reloc_inode(info, block_group);
6666 			pass = 0;
6667 		}
6668 		goto again;
6669 	}
6670 
6671 	/* delete reloc_inode */
6672 	iput(reloc_inode);
6673 
6674 	/* unpin extents in this range */
6675 	trans = btrfs_start_transaction(info->tree_root, 1);
6676 	btrfs_commit_transaction(trans, info->tree_root);
6677 
6678 	spin_lock(&block_group->lock);
6679 	WARN_ON(block_group->pinned > 0);
6680 	WARN_ON(block_group->reserved > 0);
6681 	WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
6682 	spin_unlock(&block_group->lock);
6683 	btrfs_put_block_group(block_group);
6684 	ret = 0;
6685 out:
6686 	btrfs_free_path(path);
6687 	return ret;
6688 }
6689 #endif
6690 
6691 static int find_first_block_group(struct btrfs_root *root,
6692 		struct btrfs_path *path, struct btrfs_key *key)
6693 {
6694 	int ret = 0;
6695 	struct btrfs_key found_key;
6696 	struct extent_buffer *leaf;
6697 	int slot;
6698 
6699 	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
6700 	if (ret < 0)
6701 		goto out;
6702 
6703 	while (1) {
6704 		slot = path->slots[0];
6705 		leaf = path->nodes[0];
6706 		if (slot >= btrfs_header_nritems(leaf)) {
6707 			ret = btrfs_next_leaf(root, path);
6708 			if (ret == 0)
6709 				continue;
6710 			if (ret < 0)
6711 				goto out;
6712 			break;
6713 		}
6714 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
6715 
6716 		if (found_key.objectid >= key->objectid &&
6717 		    found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6718 			ret = 0;
6719 			goto out;
6720 		}
6721 		path->slots[0]++;
6722 	}
6723 	ret = -ENOENT;
6724 out:
6725 	return ret;
6726 }
6727 
6728 int btrfs_free_block_groups(struct btrfs_fs_info *info)
6729 {
6730 	struct btrfs_block_group_cache *block_group;
6731 	struct btrfs_space_info *space_info;
6732 	struct rb_node *n;
6733 
6734 	spin_lock(&info->block_group_cache_lock);
6735 	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
6736 		block_group = rb_entry(n, struct btrfs_block_group_cache,
6737 				       cache_node);
6738 		rb_erase(&block_group->cache_node,
6739 			 &info->block_group_cache_tree);
6740 		spin_unlock(&info->block_group_cache_lock);
6741 
6742 		btrfs_remove_free_space_cache(block_group);
6743 		down_write(&block_group->space_info->groups_sem);
6744 		list_del(&block_group->list);
6745 		up_write(&block_group->space_info->groups_sem);
6746 
6747 		WARN_ON(atomic_read(&block_group->count) != 1);
6748 		kfree(block_group);
6749 
6750 		spin_lock(&info->block_group_cache_lock);
6751 	}
6752 	spin_unlock(&info->block_group_cache_lock);
6753 
6754 	/* now that all the block groups are freed, go through and
6755 	 * free all the space_info structs.  This is only called during
6756 	 * the final stages of unmount, and so we know nobody is
6757 	 * using them.  We call synchronize_rcu() once before we start,
6758 	 * just to be on the safe side.
6759 	 */
6760 	synchronize_rcu();
6761 
6762 	while(!list_empty(&info->space_info)) {
6763 		space_info = list_entry(info->space_info.next,
6764 					struct btrfs_space_info,
6765 					list);
6766 
6767 		list_del(&space_info->list);
6768 		kfree(space_info);
6769 	}
6770 	return 0;
6771 }
6772 
6773 int btrfs_read_block_groups(struct btrfs_root *root)
6774 {
6775 	struct btrfs_path *path;
6776 	int ret;
6777 	struct btrfs_block_group_cache *cache;
6778 	struct btrfs_fs_info *info = root->fs_info;
6779 	struct btrfs_space_info *space_info;
6780 	struct btrfs_key key;
6781 	struct btrfs_key found_key;
6782 	struct extent_buffer *leaf;
6783 
6784 	root = info->extent_root;
6785 	key.objectid = 0;
6786 	key.offset = 0;
6787 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
6788 	path = btrfs_alloc_path();
6789 	if (!path)
6790 		return -ENOMEM;
6791 
6792 	while (1) {
6793 		ret = find_first_block_group(root, path, &key);
6794 		if (ret > 0) {
6795 			ret = 0;
6796 			goto error;
6797 		}
6798 		if (ret != 0)
6799 			goto error;
6800 
6801 		leaf = path->nodes[0];
6802 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6803 		cache = kzalloc(sizeof(*cache), GFP_NOFS);
6804 		if (!cache) {
6805 			ret = -ENOMEM;
6806 			break;
6807 		}
6808 
6809 		atomic_set(&cache->count, 1);
6810 		spin_lock_init(&cache->lock);
6811 		spin_lock_init(&cache->tree_lock);
6812 		mutex_init(&cache->cache_mutex);
6813 		INIT_LIST_HEAD(&cache->list);
6814 		INIT_LIST_HEAD(&cache->cluster_list);
6815 		read_extent_buffer(leaf, &cache->item,
6816 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
6817 				   sizeof(cache->item));
6818 		memcpy(&cache->key, &found_key, sizeof(found_key));
6819 
6820 		key.objectid = found_key.objectid + found_key.offset;
6821 		btrfs_release_path(root, path);
6822 		cache->flags = btrfs_block_group_flags(&cache->item);
6823 
6824 		ret = update_space_info(info, cache->flags, found_key.offset,
6825 					btrfs_block_group_used(&cache->item),
6826 					&space_info);
6827 		BUG_ON(ret);
6828 		cache->space_info = space_info;
6829 		down_write(&space_info->groups_sem);
6830 		list_add_tail(&cache->list, &space_info->block_groups);
6831 		up_write(&space_info->groups_sem);
6832 
6833 		ret = btrfs_add_block_group_cache(root->fs_info, cache);
6834 		BUG_ON(ret);
6835 
6836 		set_avail_alloc_bits(root->fs_info, cache->flags);
6837 		if (btrfs_chunk_readonly(root, cache->key.objectid))
6838 			set_block_group_readonly(cache);
6839 	}
6840 	ret = 0;
6841 error:
6842 	btrfs_free_path(path);
6843 	return ret;
6844 }
6845 
6846 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6847 			   struct btrfs_root *root, u64 bytes_used,
6848 			   u64 type, u64 chunk_objectid, u64 chunk_offset,
6849 			   u64 size)
6850 {
6851 	int ret;
6852 	struct btrfs_root *extent_root;
6853 	struct btrfs_block_group_cache *cache;
6854 
6855 	extent_root = root->fs_info->extent_root;
6856 
6857 	root->fs_info->last_trans_log_full_commit = trans->transid;
6858 
6859 	cache = kzalloc(sizeof(*cache), GFP_NOFS);
6860 	if (!cache)
6861 		return -ENOMEM;
6862 
6863 	cache->key.objectid = chunk_offset;
6864 	cache->key.offset = size;
6865 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
6866 	atomic_set(&cache->count, 1);
6867 	spin_lock_init(&cache->lock);
6868 	spin_lock_init(&cache->tree_lock);
6869 	mutex_init(&cache->cache_mutex);
6870 	INIT_LIST_HEAD(&cache->list);
6871 	INIT_LIST_HEAD(&cache->cluster_list);
6872 
6873 	btrfs_set_block_group_used(&cache->item, bytes_used);
6874 	btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
6875 	cache->flags = type;
6876 	btrfs_set_block_group_flags(&cache->item, type);
6877 
6878 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
6879 				&cache->space_info);
6880 	BUG_ON(ret);
6881 	down_write(&cache->space_info->groups_sem);
6882 	list_add_tail(&cache->list, &cache->space_info->block_groups);
6883 	up_write(&cache->space_info->groups_sem);
6884 
6885 	ret = btrfs_add_block_group_cache(root->fs_info, cache);
6886 	BUG_ON(ret);
6887 
6888 	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
6889 				sizeof(cache->item));
6890 	BUG_ON(ret);
6891 
6892 	set_avail_alloc_bits(extent_root->fs_info, type);
6893 
6894 	return 0;
6895 }
6896 
6897 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
6898 			     struct btrfs_root *root, u64 group_start)
6899 {
6900 	struct btrfs_path *path;
6901 	struct btrfs_block_group_cache *block_group;
6902 	struct btrfs_free_cluster *cluster;
6903 	struct btrfs_key key;
6904 	int ret;
6905 
6906 	root = root->fs_info->extent_root;
6907 
6908 	block_group = btrfs_lookup_block_group(root->fs_info, group_start);
6909 	BUG_ON(!block_group);
6910 	BUG_ON(!block_group->ro);
6911 
6912 	memcpy(&key, &block_group->key, sizeof(key));
6913 
6914 	/* make sure this block group isn't part of an allocation cluster */
6915 	cluster = &root->fs_info->data_alloc_cluster;
6916 	spin_lock(&cluster->refill_lock);
6917 	btrfs_return_cluster_to_free_space(block_group, cluster);
6918 	spin_unlock(&cluster->refill_lock);
6919 
6920 	/*
6921 	 * make sure this block group isn't part of a metadata
6922 	 * allocation cluster
6923 	 */
6924 	cluster = &root->fs_info->meta_alloc_cluster;
6925 	spin_lock(&cluster->refill_lock);
6926 	btrfs_return_cluster_to_free_space(block_group, cluster);
6927 	spin_unlock(&cluster->refill_lock);
6928 
6929 	path = btrfs_alloc_path();
6930 	BUG_ON(!path);
6931 
6932 	spin_lock(&root->fs_info->block_group_cache_lock);
6933 	rb_erase(&block_group->cache_node,
6934 		 &root->fs_info->block_group_cache_tree);
6935 	spin_unlock(&root->fs_info->block_group_cache_lock);
6936 	btrfs_remove_free_space_cache(block_group);
6937 	down_write(&block_group->space_info->groups_sem);
6938 	/*
6939 	 * we must use list_del_init so people can check to see if they
6940 	 * are still on the list after taking the semaphore
6941 	 */
6942 	list_del_init(&block_group->list);
6943 	up_write(&block_group->space_info->groups_sem);
6944 
6945 	spin_lock(&block_group->space_info->lock);
6946 	block_group->space_info->total_bytes -= block_group->key.offset;
6947 	block_group->space_info->bytes_readonly -= block_group->key.offset;
6948 	spin_unlock(&block_group->space_info->lock);
6949 	block_group->space_info->full = 0;
6950 
6951 	btrfs_put_block_group(block_group);
6952 	btrfs_put_block_group(block_group);
6953 
6954 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6955 	if (ret > 0)
6956 		ret = -EIO;
6957 	if (ret < 0)
6958 		goto out;
6959 
6960 	ret = btrfs_del_item(trans, root, path);
6961 out:
6962 	btrfs_free_path(path);
6963 	return ret;
6964 }
6965