xref: /linux/fs/btrfs/free-space-tree.c (revision 0eb4aaa230d725fa9b1cd758c0f17abca5597af6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19 
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 					struct btrfs_block_group *block_group,
22 					struct btrfs_path *path);
23 
btrfs_free_space_root(struct btrfs_block_group * block_group)24 static struct btrfs_root *btrfs_free_space_root(
25 				struct btrfs_block_group *block_group)
26 {
27 	struct btrfs_key key = {
28 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29 		.type = BTRFS_ROOT_ITEM_KEY,
30 		.offset = 0,
31 	};
32 
33 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34 		key.offset = block_group->global_root_id;
35 	return btrfs_global_root(block_group->fs_info, &key);
36 }
37 
set_free_space_tree_thresholds(struct btrfs_block_group * cache)38 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39 {
40 	u32 bitmap_range;
41 	size_t bitmap_size;
42 	u64 num_bitmaps, total_bitmap_size;
43 
44 	if (WARN_ON(cache->length == 0))
45 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
46 			   cache->start);
47 
48 	/*
49 	 * We convert to bitmaps when the disk space required for using extents
50 	 * exceeds that required for using bitmaps.
51 	 */
52 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55 	total_bitmap_size = num_bitmaps * bitmap_size;
56 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57 					    sizeof(struct btrfs_item));
58 
59 	/*
60 	 * We allow for a small buffer between the high threshold and low
61 	 * threshold to avoid thrashing back and forth between the two formats.
62 	 */
63 	if (cache->bitmap_high_thresh > 100)
64 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65 	else
66 		cache->bitmap_low_thresh = 0;
67 }
68 
add_new_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)69 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70 				   struct btrfs_block_group *block_group,
71 				   struct btrfs_path *path)
72 {
73 	struct btrfs_root *root = btrfs_free_space_root(block_group);
74 	struct btrfs_free_space_info *info;
75 	struct btrfs_key key;
76 	struct extent_buffer *leaf;
77 	int ret;
78 
79 	key.objectid = block_group->start;
80 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
81 	key.offset = block_group->length;
82 
83 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84 	if (ret)
85 		goto out;
86 
87 	leaf = path->nodes[0];
88 	info = btrfs_item_ptr(leaf, path->slots[0],
89 			      struct btrfs_free_space_info);
90 	btrfs_set_free_space_extent_count(leaf, info, 0);
91 	btrfs_set_free_space_flags(leaf, info, 0);
92 
93 	ret = 0;
94 out:
95 	btrfs_release_path(path);
96 	return ret;
97 }
98 
99 EXPORT_FOR_TESTS
search_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int cow)100 struct btrfs_free_space_info *search_free_space_info(
101 		struct btrfs_trans_handle *trans,
102 		struct btrfs_block_group *block_group,
103 		struct btrfs_path *path, int cow)
104 {
105 	struct btrfs_fs_info *fs_info = block_group->fs_info;
106 	struct btrfs_root *root = btrfs_free_space_root(block_group);
107 	struct btrfs_key key;
108 	int ret;
109 
110 	key.objectid = block_group->start;
111 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
112 	key.offset = block_group->length;
113 
114 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
115 	if (ret < 0)
116 		return ERR_PTR(ret);
117 	if (ret != 0) {
118 		btrfs_warn(fs_info, "missing free space info for %llu",
119 			   block_group->start);
120 		ASSERT(0);
121 		return ERR_PTR(-ENOENT);
122 	}
123 
124 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
125 			      struct btrfs_free_space_info);
126 }
127 
128 /*
129  * btrfs_search_slot() but we're looking for the greatest key less than the
130  * passed key.
131  */
btrfs_search_prev_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)132 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
133 				  struct btrfs_root *root,
134 				  struct btrfs_key *key, struct btrfs_path *p,
135 				  int ins_len, int cow)
136 {
137 	int ret;
138 
139 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
140 	if (ret < 0)
141 		return ret;
142 
143 	if (ret == 0) {
144 		ASSERT(0);
145 		return -EIO;
146 	}
147 
148 	if (p->slots[0] == 0) {
149 		ASSERT(0);
150 		return -EIO;
151 	}
152 	p->slots[0]--;
153 
154 	return 0;
155 }
156 
free_space_bitmap_size(const struct btrfs_fs_info * fs_info,u64 size)157 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
158 					 u64 size)
159 {
160 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
161 }
162 
alloc_bitmap(u32 bitmap_size)163 static unsigned long *alloc_bitmap(u32 bitmap_size)
164 {
165 	unsigned long *ret;
166 	unsigned int nofs_flag;
167 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
168 
169 	/*
170 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
171 	 * into the filesystem as the free space bitmap can be modified in the
172 	 * critical section of a transaction commit.
173 	 *
174 	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
175 	 * know that recursion is unsafe.
176 	 */
177 	nofs_flag = memalloc_nofs_save();
178 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
179 	memalloc_nofs_restore(nofs_flag);
180 	return ret;
181 }
182 
le_bitmap_set(unsigned long * map,unsigned int start,int len)183 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
184 {
185 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
186 	const unsigned int size = start + len;
187 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
188 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
189 
190 	while (len - bits_to_set >= 0) {
191 		*p |= mask_to_set;
192 		len -= bits_to_set;
193 		bits_to_set = BITS_PER_BYTE;
194 		mask_to_set = ~0;
195 		p++;
196 	}
197 	if (len) {
198 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
199 		*p |= mask_to_set;
200 	}
201 }
202 
203 EXPORT_FOR_TESTS
convert_free_space_to_bitmaps(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)204 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
205 				  struct btrfs_block_group *block_group,
206 				  struct btrfs_path *path)
207 {
208 	struct btrfs_fs_info *fs_info = trans->fs_info;
209 	struct btrfs_root *root = btrfs_free_space_root(block_group);
210 	struct btrfs_free_space_info *info;
211 	struct btrfs_key key, found_key;
212 	struct extent_buffer *leaf;
213 	unsigned long *bitmap;
214 	char *bitmap_cursor;
215 	u64 start, end;
216 	u64 bitmap_range, i;
217 	u32 bitmap_size, flags, expected_extent_count;
218 	u32 extent_count = 0;
219 	int done = 0, nr;
220 	int ret;
221 
222 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
223 	bitmap = alloc_bitmap(bitmap_size);
224 	if (!bitmap) {
225 		ret = -ENOMEM;
226 		goto out;
227 	}
228 
229 	start = block_group->start;
230 	end = block_group->start + block_group->length;
231 
232 	key.objectid = end - 1;
233 	key.type = (u8)-1;
234 	key.offset = (u64)-1;
235 
236 	while (!done) {
237 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
238 		if (ret)
239 			goto out;
240 
241 		leaf = path->nodes[0];
242 		nr = 0;
243 		path->slots[0]++;
244 		while (path->slots[0] > 0) {
245 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
246 
247 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
248 				ASSERT(found_key.objectid == block_group->start);
249 				ASSERT(found_key.offset == block_group->length);
250 				done = 1;
251 				break;
252 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
253 				u64 first, last;
254 
255 				ASSERT(found_key.objectid >= start);
256 				ASSERT(found_key.objectid < end);
257 				ASSERT(found_key.objectid + found_key.offset <= end);
258 
259 				first = div_u64(found_key.objectid - start,
260 						fs_info->sectorsize);
261 				last = div_u64(found_key.objectid + found_key.offset - start,
262 					       fs_info->sectorsize);
263 				le_bitmap_set(bitmap, first, last - first);
264 
265 				extent_count++;
266 				nr++;
267 				path->slots[0]--;
268 			} else {
269 				ASSERT(0);
270 			}
271 		}
272 
273 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
274 		if (ret)
275 			goto out;
276 		btrfs_release_path(path);
277 	}
278 
279 	info = search_free_space_info(trans, block_group, path, 1);
280 	if (IS_ERR(info)) {
281 		ret = PTR_ERR(info);
282 		goto out;
283 	}
284 	leaf = path->nodes[0];
285 	flags = btrfs_free_space_flags(leaf, info);
286 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
287 	btrfs_set_free_space_flags(leaf, info, flags);
288 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
289 	btrfs_release_path(path);
290 
291 	if (extent_count != expected_extent_count) {
292 		btrfs_err(fs_info,
293 			  "incorrect extent count for %llu; counted %u, expected %u",
294 			  block_group->start, extent_count,
295 			  expected_extent_count);
296 		ASSERT(0);
297 		ret = -EIO;
298 		goto out;
299 	}
300 
301 	bitmap_cursor = (char *)bitmap;
302 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
303 	i = start;
304 	while (i < end) {
305 		unsigned long ptr;
306 		u64 extent_size;
307 		u32 data_size;
308 
309 		extent_size = min(end - i, bitmap_range);
310 		data_size = free_space_bitmap_size(fs_info, extent_size);
311 
312 		key.objectid = i;
313 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
314 		key.offset = extent_size;
315 
316 		ret = btrfs_insert_empty_item(trans, root, path, &key,
317 					      data_size);
318 		if (ret)
319 			goto out;
320 
321 		leaf = path->nodes[0];
322 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
323 		write_extent_buffer(leaf, bitmap_cursor, ptr,
324 				    data_size);
325 		btrfs_release_path(path);
326 
327 		i += extent_size;
328 		bitmap_cursor += data_size;
329 	}
330 
331 	ret = 0;
332 out:
333 	kvfree(bitmap);
334 	if (ret)
335 		btrfs_abort_transaction(trans, ret);
336 	return ret;
337 }
338 
339 EXPORT_FOR_TESTS
convert_free_space_to_extents(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)340 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
341 				  struct btrfs_block_group *block_group,
342 				  struct btrfs_path *path)
343 {
344 	struct btrfs_fs_info *fs_info = trans->fs_info;
345 	struct btrfs_root *root = btrfs_free_space_root(block_group);
346 	struct btrfs_free_space_info *info;
347 	struct btrfs_key key, found_key;
348 	struct extent_buffer *leaf;
349 	unsigned long *bitmap;
350 	u64 start, end;
351 	u32 bitmap_size, flags, expected_extent_count;
352 	unsigned long nrbits, start_bit, end_bit;
353 	u32 extent_count = 0;
354 	int done = 0, nr;
355 	int ret;
356 
357 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
358 	bitmap = alloc_bitmap(bitmap_size);
359 	if (!bitmap) {
360 		ret = -ENOMEM;
361 		goto out;
362 	}
363 
364 	start = block_group->start;
365 	end = block_group->start + block_group->length;
366 
367 	key.objectid = end - 1;
368 	key.type = (u8)-1;
369 	key.offset = (u64)-1;
370 
371 	while (!done) {
372 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
373 		if (ret)
374 			goto out;
375 
376 		leaf = path->nodes[0];
377 		nr = 0;
378 		path->slots[0]++;
379 		while (path->slots[0] > 0) {
380 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
381 
382 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
383 				ASSERT(found_key.objectid == block_group->start);
384 				ASSERT(found_key.offset == block_group->length);
385 				done = 1;
386 				break;
387 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
388 				unsigned long ptr;
389 				char *bitmap_cursor;
390 				u32 bitmap_pos, data_size;
391 
392 				ASSERT(found_key.objectid >= start);
393 				ASSERT(found_key.objectid < end);
394 				ASSERT(found_key.objectid + found_key.offset <= end);
395 
396 				bitmap_pos = div_u64(found_key.objectid - start,
397 						     fs_info->sectorsize *
398 						     BITS_PER_BYTE);
399 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
400 				data_size = free_space_bitmap_size(fs_info,
401 								found_key.offset);
402 
403 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
404 				read_extent_buffer(leaf, bitmap_cursor, ptr,
405 						   data_size);
406 
407 				nr++;
408 				path->slots[0]--;
409 			} else {
410 				ASSERT(0);
411 			}
412 		}
413 
414 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
415 		if (ret)
416 			goto out;
417 		btrfs_release_path(path);
418 	}
419 
420 	info = search_free_space_info(trans, block_group, path, 1);
421 	if (IS_ERR(info)) {
422 		ret = PTR_ERR(info);
423 		goto out;
424 	}
425 	leaf = path->nodes[0];
426 	flags = btrfs_free_space_flags(leaf, info);
427 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
428 	btrfs_set_free_space_flags(leaf, info, flags);
429 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
430 	btrfs_release_path(path);
431 
432 	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
433 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
434 
435 	while (start_bit < nrbits) {
436 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
437 		ASSERT(start_bit < end_bit);
438 
439 		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
440 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
441 		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
442 
443 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
444 		if (ret)
445 			goto out;
446 		btrfs_release_path(path);
447 
448 		extent_count++;
449 
450 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
451 	}
452 
453 	if (extent_count != expected_extent_count) {
454 		btrfs_err(fs_info,
455 			  "incorrect extent count for %llu; counted %u, expected %u",
456 			  block_group->start, extent_count,
457 			  expected_extent_count);
458 		ASSERT(0);
459 		ret = -EIO;
460 		goto out;
461 	}
462 
463 	ret = 0;
464 out:
465 	kvfree(bitmap);
466 	if (ret)
467 		btrfs_abort_transaction(trans, ret);
468 	return ret;
469 }
470 
update_free_space_extent_count(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int new_extents)471 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
472 					  struct btrfs_block_group *block_group,
473 					  struct btrfs_path *path,
474 					  int new_extents)
475 {
476 	struct btrfs_free_space_info *info;
477 	u32 flags;
478 	u32 extent_count;
479 	int ret = 0;
480 
481 	if (new_extents == 0)
482 		return 0;
483 
484 	info = search_free_space_info(trans, block_group, path, 1);
485 	if (IS_ERR(info)) {
486 		ret = PTR_ERR(info);
487 		goto out;
488 	}
489 	flags = btrfs_free_space_flags(path->nodes[0], info);
490 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
491 
492 	extent_count += new_extents;
493 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
494 	btrfs_release_path(path);
495 
496 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
497 	    extent_count > block_group->bitmap_high_thresh) {
498 		ret = convert_free_space_to_bitmaps(trans, block_group, path);
499 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
500 		   extent_count < block_group->bitmap_low_thresh) {
501 		ret = convert_free_space_to_extents(trans, block_group, path);
502 	}
503 
504 out:
505 	return ret;
506 }
507 
508 EXPORT_FOR_TESTS
free_space_test_bit(struct btrfs_block_group * block_group,struct btrfs_path * path,u64 offset)509 int free_space_test_bit(struct btrfs_block_group *block_group,
510 			struct btrfs_path *path, u64 offset)
511 {
512 	struct extent_buffer *leaf;
513 	struct btrfs_key key;
514 	u64 found_start, found_end;
515 	unsigned long ptr, i;
516 
517 	leaf = path->nodes[0];
518 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
519 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
520 
521 	found_start = key.objectid;
522 	found_end = key.objectid + key.offset;
523 	ASSERT(offset >= found_start && offset < found_end);
524 
525 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
526 	i = div_u64(offset - found_start,
527 		    block_group->fs_info->sectorsize);
528 	return !!extent_buffer_test_bit(leaf, ptr, i);
529 }
530 
free_space_set_bits(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 * start,u64 * size,int bit)531 static void free_space_set_bits(struct btrfs_trans_handle *trans,
532 				struct btrfs_block_group *block_group,
533 				struct btrfs_path *path, u64 *start, u64 *size,
534 				int bit)
535 {
536 	struct btrfs_fs_info *fs_info = block_group->fs_info;
537 	struct extent_buffer *leaf;
538 	struct btrfs_key key;
539 	u64 end = *start + *size;
540 	u64 found_start, found_end;
541 	unsigned long ptr, first, last;
542 
543 	leaf = path->nodes[0];
544 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
545 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
546 
547 	found_start = key.objectid;
548 	found_end = key.objectid + key.offset;
549 	ASSERT(*start >= found_start && *start < found_end);
550 	ASSERT(end > found_start);
551 
552 	if (end > found_end)
553 		end = found_end;
554 
555 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
556 	first = (*start - found_start) >> fs_info->sectorsize_bits;
557 	last = (end - found_start) >> fs_info->sectorsize_bits;
558 	if (bit)
559 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
560 	else
561 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
562 	btrfs_mark_buffer_dirty(trans, leaf);
563 
564 	*size -= end - *start;
565 	*start = end;
566 }
567 
568 /*
569  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
570  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
571  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
572  * looking for.
573  */
free_space_next_bitmap(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * p)574 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
575 				  struct btrfs_root *root, struct btrfs_path *p)
576 {
577 	struct btrfs_key key;
578 
579 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
580 		p->slots[0]++;
581 		return 0;
582 	}
583 
584 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
585 	btrfs_release_path(p);
586 
587 	key.objectid += key.offset;
588 	key.type = (u8)-1;
589 	key.offset = (u64)-1;
590 
591 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
592 }
593 
594 /*
595  * If remove is 1, then we are removing free space, thus clearing bits in the
596  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
597  * the bitmap.
598  */
modify_free_space_bitmap(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size,int remove)599 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
600 				    struct btrfs_block_group *block_group,
601 				    struct btrfs_path *path,
602 				    u64 start, u64 size, int remove)
603 {
604 	struct btrfs_root *root = btrfs_free_space_root(block_group);
605 	struct btrfs_key key;
606 	u64 end = start + size;
607 	u64 cur_start, cur_size;
608 	int prev_bit, next_bit;
609 	int new_extents;
610 	int ret;
611 
612 	/*
613 	 * Read the bit for the block immediately before the extent of space if
614 	 * that block is within the block group.
615 	 */
616 	if (start > block_group->start) {
617 		u64 prev_block = start - block_group->fs_info->sectorsize;
618 
619 		key.objectid = prev_block;
620 		key.type = (u8)-1;
621 		key.offset = (u64)-1;
622 
623 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
624 		if (ret)
625 			goto out;
626 
627 		prev_bit = free_space_test_bit(block_group, path, prev_block);
628 
629 		/* The previous block may have been in the previous bitmap. */
630 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
631 		if (start >= key.objectid + key.offset) {
632 			ret = free_space_next_bitmap(trans, root, path);
633 			if (ret)
634 				goto out;
635 		}
636 	} else {
637 		key.objectid = start;
638 		key.type = (u8)-1;
639 		key.offset = (u64)-1;
640 
641 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
642 		if (ret)
643 			goto out;
644 
645 		prev_bit = -1;
646 	}
647 
648 	/*
649 	 * Iterate over all of the bitmaps overlapped by the extent of space,
650 	 * clearing/setting bits as required.
651 	 */
652 	cur_start = start;
653 	cur_size = size;
654 	while (1) {
655 		free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
656 				    !remove);
657 		if (cur_size == 0)
658 			break;
659 		ret = free_space_next_bitmap(trans, root, path);
660 		if (ret)
661 			goto out;
662 	}
663 
664 	/*
665 	 * Read the bit for the block immediately after the extent of space if
666 	 * that block is within the block group.
667 	 */
668 	if (end < block_group->start + block_group->length) {
669 		/* The next block may be in the next bitmap. */
670 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
671 		if (end >= key.objectid + key.offset) {
672 			ret = free_space_next_bitmap(trans, root, path);
673 			if (ret)
674 				goto out;
675 		}
676 
677 		next_bit = free_space_test_bit(block_group, path, end);
678 	} else {
679 		next_bit = -1;
680 	}
681 
682 	if (remove) {
683 		new_extents = -1;
684 		if (prev_bit == 1) {
685 			/* Leftover on the left. */
686 			new_extents++;
687 		}
688 		if (next_bit == 1) {
689 			/* Leftover on the right. */
690 			new_extents++;
691 		}
692 	} else {
693 		new_extents = 1;
694 		if (prev_bit == 1) {
695 			/* Merging with neighbor on the left. */
696 			new_extents--;
697 		}
698 		if (next_bit == 1) {
699 			/* Merging with neighbor on the right. */
700 			new_extents--;
701 		}
702 	}
703 
704 	btrfs_release_path(path);
705 	ret = update_free_space_extent_count(trans, block_group, path,
706 					     new_extents);
707 
708 out:
709 	return ret;
710 }
711 
remove_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)712 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
713 				    struct btrfs_block_group *block_group,
714 				    struct btrfs_path *path,
715 				    u64 start, u64 size)
716 {
717 	struct btrfs_root *root = btrfs_free_space_root(block_group);
718 	struct btrfs_key key;
719 	u64 found_start, found_end;
720 	u64 end = start + size;
721 	int new_extents = -1;
722 	int ret;
723 
724 	key.objectid = start;
725 	key.type = (u8)-1;
726 	key.offset = (u64)-1;
727 
728 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
729 	if (ret)
730 		goto out;
731 
732 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
733 
734 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
735 
736 	found_start = key.objectid;
737 	found_end = key.objectid + key.offset;
738 	ASSERT(start >= found_start && end <= found_end);
739 
740 	/*
741 	 * Okay, now that we've found the free space extent which contains the
742 	 * free space that we are removing, there are four cases:
743 	 *
744 	 * 1. We're using the whole extent: delete the key we found and
745 	 * decrement the free space extent count.
746 	 * 2. We are using part of the extent starting at the beginning: delete
747 	 * the key we found and insert a new key representing the leftover at
748 	 * the end. There is no net change in the number of extents.
749 	 * 3. We are using part of the extent ending at the end: delete the key
750 	 * we found and insert a new key representing the leftover at the
751 	 * beginning. There is no net change in the number of extents.
752 	 * 4. We are using part of the extent in the middle: delete the key we
753 	 * found and insert two new keys representing the leftovers on each
754 	 * side. Where we used to have one extent, we now have two, so increment
755 	 * the extent count. We may need to convert the block group to bitmaps
756 	 * as a result.
757 	 */
758 
759 	/* Delete the existing key (cases 1-4). */
760 	ret = btrfs_del_item(trans, root, path);
761 	if (ret)
762 		goto out;
763 
764 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
765 	if (start > found_start) {
766 		key.objectid = found_start;
767 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
768 		key.offset = start - found_start;
769 
770 		btrfs_release_path(path);
771 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
772 		if (ret)
773 			goto out;
774 		new_extents++;
775 	}
776 
777 	/* Add a key for leftovers at the end (cases 2 and 4). */
778 	if (end < found_end) {
779 		key.objectid = end;
780 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
781 		key.offset = found_end - end;
782 
783 		btrfs_release_path(path);
784 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
785 		if (ret)
786 			goto out;
787 		new_extents++;
788 	}
789 
790 	btrfs_release_path(path);
791 	ret = update_free_space_extent_count(trans, block_group, path,
792 					     new_extents);
793 
794 out:
795 	return ret;
796 }
797 
798 EXPORT_FOR_TESTS
__remove_from_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)799 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
800 				  struct btrfs_block_group *block_group,
801 				  struct btrfs_path *path, u64 start, u64 size)
802 {
803 	struct btrfs_free_space_info *info;
804 	u32 flags;
805 	int ret;
806 
807 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
808 		ret = __add_block_group_free_space(trans, block_group, path);
809 		if (ret)
810 			return ret;
811 	}
812 
813 	info = search_free_space_info(NULL, block_group, path, 0);
814 	if (IS_ERR(info))
815 		return PTR_ERR(info);
816 	flags = btrfs_free_space_flags(path->nodes[0], info);
817 	btrfs_release_path(path);
818 
819 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
820 		return modify_free_space_bitmap(trans, block_group, path,
821 						start, size, 1);
822 	} else {
823 		return remove_free_space_extent(trans, block_group, path,
824 						start, size);
825 	}
826 }
827 
remove_from_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)828 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
829 				u64 start, u64 size)
830 {
831 	struct btrfs_block_group *block_group;
832 	struct btrfs_path *path;
833 	int ret;
834 
835 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
836 		return 0;
837 
838 	path = btrfs_alloc_path();
839 	if (!path) {
840 		ret = -ENOMEM;
841 		goto out;
842 	}
843 
844 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
845 	if (!block_group) {
846 		ASSERT(0);
847 		ret = -ENOENT;
848 		goto out;
849 	}
850 
851 	mutex_lock(&block_group->free_space_lock);
852 	ret = __remove_from_free_space_tree(trans, block_group, path, start,
853 					    size);
854 	mutex_unlock(&block_group->free_space_lock);
855 
856 	btrfs_put_block_group(block_group);
857 out:
858 	btrfs_free_path(path);
859 	if (ret)
860 		btrfs_abort_transaction(trans, ret);
861 	return ret;
862 }
863 
add_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)864 static int add_free_space_extent(struct btrfs_trans_handle *trans,
865 				 struct btrfs_block_group *block_group,
866 				 struct btrfs_path *path,
867 				 u64 start, u64 size)
868 {
869 	struct btrfs_root *root = btrfs_free_space_root(block_group);
870 	struct btrfs_key key, new_key;
871 	u64 found_start, found_end;
872 	u64 end = start + size;
873 	int new_extents = 1;
874 	int ret;
875 
876 	/*
877 	 * We are adding a new extent of free space, but we need to merge
878 	 * extents. There are four cases here:
879 	 *
880 	 * 1. The new extent does not have any immediate neighbors to merge
881 	 * with: add the new key and increment the free space extent count. We
882 	 * may need to convert the block group to bitmaps as a result.
883 	 * 2. The new extent has an immediate neighbor before it: remove the
884 	 * previous key and insert a new key combining both of them. There is no
885 	 * net change in the number of extents.
886 	 * 3. The new extent has an immediate neighbor after it: remove the next
887 	 * key and insert a new key combining both of them. There is no net
888 	 * change in the number of extents.
889 	 * 4. The new extent has immediate neighbors on both sides: remove both
890 	 * of the keys and insert a new key combining all of them. Where we used
891 	 * to have two extents, we now have one, so decrement the extent count.
892 	 */
893 
894 	new_key.objectid = start;
895 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
896 	new_key.offset = size;
897 
898 	/* Search for a neighbor on the left. */
899 	if (start == block_group->start)
900 		goto right;
901 	key.objectid = start - 1;
902 	key.type = (u8)-1;
903 	key.offset = (u64)-1;
904 
905 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
906 	if (ret)
907 		goto out;
908 
909 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
910 
911 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
912 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
913 		btrfs_release_path(path);
914 		goto right;
915 	}
916 
917 	found_start = key.objectid;
918 	found_end = key.objectid + key.offset;
919 	ASSERT(found_start >= block_group->start &&
920 	       found_end > block_group->start);
921 	ASSERT(found_start < start && found_end <= start);
922 
923 	/*
924 	 * Delete the neighbor on the left and absorb it into the new key (cases
925 	 * 2 and 4).
926 	 */
927 	if (found_end == start) {
928 		ret = btrfs_del_item(trans, root, path);
929 		if (ret)
930 			goto out;
931 		new_key.objectid = found_start;
932 		new_key.offset += key.offset;
933 		new_extents--;
934 	}
935 	btrfs_release_path(path);
936 
937 right:
938 	/* Search for a neighbor on the right. */
939 	if (end == block_group->start + block_group->length)
940 		goto insert;
941 	key.objectid = end;
942 	key.type = (u8)-1;
943 	key.offset = (u64)-1;
944 
945 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
946 	if (ret)
947 		goto out;
948 
949 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
950 
951 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
952 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
953 		btrfs_release_path(path);
954 		goto insert;
955 	}
956 
957 	found_start = key.objectid;
958 	found_end = key.objectid + key.offset;
959 	ASSERT(found_start >= block_group->start &&
960 	       found_end > block_group->start);
961 	ASSERT((found_start < start && found_end <= start) ||
962 	       (found_start >= end && found_end > end));
963 
964 	/*
965 	 * Delete the neighbor on the right and absorb it into the new key
966 	 * (cases 3 and 4).
967 	 */
968 	if (found_start == end) {
969 		ret = btrfs_del_item(trans, root, path);
970 		if (ret)
971 			goto out;
972 		new_key.offset += key.offset;
973 		new_extents--;
974 	}
975 	btrfs_release_path(path);
976 
977 insert:
978 	/* Insert the new key (cases 1-4). */
979 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
980 	if (ret)
981 		goto out;
982 
983 	btrfs_release_path(path);
984 	ret = update_free_space_extent_count(trans, block_group, path,
985 					     new_extents);
986 
987 out:
988 	return ret;
989 }
990 
991 EXPORT_FOR_TESTS
__add_to_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)992 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
993 			     struct btrfs_block_group *block_group,
994 			     struct btrfs_path *path, u64 start, u64 size)
995 {
996 	struct btrfs_free_space_info *info;
997 	u32 flags;
998 	int ret;
999 
1000 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1001 		ret = __add_block_group_free_space(trans, block_group, path);
1002 		if (ret)
1003 			return ret;
1004 	}
1005 
1006 	info = search_free_space_info(NULL, block_group, path, 0);
1007 	if (IS_ERR(info))
1008 		return PTR_ERR(info);
1009 	flags = btrfs_free_space_flags(path->nodes[0], info);
1010 	btrfs_release_path(path);
1011 
1012 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1013 		return modify_free_space_bitmap(trans, block_group, path,
1014 						start, size, 0);
1015 	} else {
1016 		return add_free_space_extent(trans, block_group, path, start,
1017 					     size);
1018 	}
1019 }
1020 
add_to_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)1021 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1022 			   u64 start, u64 size)
1023 {
1024 	struct btrfs_block_group *block_group;
1025 	struct btrfs_path *path;
1026 	int ret;
1027 
1028 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1029 		return 0;
1030 
1031 	path = btrfs_alloc_path();
1032 	if (!path) {
1033 		ret = -ENOMEM;
1034 		goto out;
1035 	}
1036 
1037 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1038 	if (!block_group) {
1039 		ASSERT(0);
1040 		ret = -ENOENT;
1041 		goto out;
1042 	}
1043 
1044 	mutex_lock(&block_group->free_space_lock);
1045 	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1046 	mutex_unlock(&block_group->free_space_lock);
1047 
1048 	btrfs_put_block_group(block_group);
1049 out:
1050 	btrfs_free_path(path);
1051 	if (ret)
1052 		btrfs_abort_transaction(trans, ret);
1053 	return ret;
1054 }
1055 
1056 /*
1057  * Populate the free space tree by walking the extent tree. Operations on the
1058  * extent tree that happen as a result of writes to the free space tree will go
1059  * through the normal add/remove hooks.
1060  */
populate_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1061 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1062 				    struct btrfs_block_group *block_group)
1063 {
1064 	struct btrfs_root *extent_root;
1065 	struct btrfs_path *path, *path2;
1066 	struct btrfs_key key;
1067 	u64 start, end;
1068 	int ret;
1069 
1070 	path = btrfs_alloc_path();
1071 	if (!path)
1072 		return -ENOMEM;
1073 	path->reada = READA_FORWARD;
1074 
1075 	path2 = btrfs_alloc_path();
1076 	if (!path2) {
1077 		btrfs_free_path(path);
1078 		return -ENOMEM;
1079 	}
1080 
1081 	ret = add_new_free_space_info(trans, block_group, path2);
1082 	if (ret)
1083 		goto out;
1084 
1085 	mutex_lock(&block_group->free_space_lock);
1086 
1087 	/*
1088 	 * Iterate through all of the extent and metadata items in this block
1089 	 * group, adding the free space between them and the free space at the
1090 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1091 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1092 	 * contained in.
1093 	 */
1094 	key.objectid = block_group->start;
1095 	key.type = BTRFS_EXTENT_ITEM_KEY;
1096 	key.offset = 0;
1097 
1098 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1099 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1100 	if (ret < 0)
1101 		goto out_locked;
1102 	ASSERT(ret == 0);
1103 
1104 	start = block_group->start;
1105 	end = block_group->start + block_group->length;
1106 	while (1) {
1107 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1108 
1109 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1110 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1111 			if (key.objectid >= end)
1112 				break;
1113 
1114 			if (start < key.objectid) {
1115 				ret = __add_to_free_space_tree(trans,
1116 							       block_group,
1117 							       path2, start,
1118 							       key.objectid -
1119 							       start);
1120 				if (ret)
1121 					goto out_locked;
1122 			}
1123 			start = key.objectid;
1124 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1125 				start += trans->fs_info->nodesize;
1126 			else
1127 				start += key.offset;
1128 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1129 			if (key.objectid != block_group->start)
1130 				break;
1131 		}
1132 
1133 		ret = btrfs_next_item(extent_root, path);
1134 		if (ret < 0)
1135 			goto out_locked;
1136 		if (ret)
1137 			break;
1138 	}
1139 	if (start < end) {
1140 		ret = __add_to_free_space_tree(trans, block_group, path2,
1141 					       start, end - start);
1142 		if (ret)
1143 			goto out_locked;
1144 	}
1145 
1146 	ret = 0;
1147 out_locked:
1148 	mutex_unlock(&block_group->free_space_lock);
1149 out:
1150 	btrfs_free_path(path2);
1151 	btrfs_free_path(path);
1152 	return ret;
1153 }
1154 
btrfs_create_free_space_tree(struct btrfs_fs_info * fs_info)1155 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1156 {
1157 	struct btrfs_trans_handle *trans;
1158 	struct btrfs_root *tree_root = fs_info->tree_root;
1159 	struct btrfs_root *free_space_root;
1160 	struct btrfs_block_group *block_group;
1161 	struct rb_node *node;
1162 	int ret;
1163 
1164 	trans = btrfs_start_transaction(tree_root, 0);
1165 	if (IS_ERR(trans))
1166 		return PTR_ERR(trans);
1167 
1168 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1169 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1170 	free_space_root = btrfs_create_tree(trans,
1171 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1172 	if (IS_ERR(free_space_root)) {
1173 		ret = PTR_ERR(free_space_root);
1174 		btrfs_abort_transaction(trans, ret);
1175 		btrfs_end_transaction(trans);
1176 		goto out_clear;
1177 	}
1178 	ret = btrfs_global_root_insert(free_space_root);
1179 	if (ret) {
1180 		btrfs_put_root(free_space_root);
1181 		btrfs_abort_transaction(trans, ret);
1182 		btrfs_end_transaction(trans);
1183 		goto out_clear;
1184 	}
1185 
1186 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1187 	while (node) {
1188 		block_group = rb_entry(node, struct btrfs_block_group,
1189 				       cache_node);
1190 		ret = populate_free_space_tree(trans, block_group);
1191 		if (ret) {
1192 			btrfs_abort_transaction(trans, ret);
1193 			btrfs_end_transaction(trans);
1194 			goto out_clear;
1195 		}
1196 		node = rb_next(node);
1197 	}
1198 
1199 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1200 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1201 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1202 	ret = btrfs_commit_transaction(trans);
1203 
1204 	/*
1205 	 * Now that we've committed the transaction any reading of our commit
1206 	 * root will be safe, so we can cache from the free space tree now.
1207 	 */
1208 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1209 	return ret;
1210 
1211 out_clear:
1212 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1213 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214 	return ret;
1215 }
1216 
clear_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root)1217 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1218 				 struct btrfs_root *root)
1219 {
1220 	struct btrfs_path *path;
1221 	struct btrfs_key key;
1222 	int nr;
1223 	int ret;
1224 
1225 	path = btrfs_alloc_path();
1226 	if (!path)
1227 		return -ENOMEM;
1228 
1229 	key.objectid = 0;
1230 	key.type = 0;
1231 	key.offset = 0;
1232 
1233 	while (1) {
1234 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1235 		if (ret < 0)
1236 			goto out;
1237 
1238 		nr = btrfs_header_nritems(path->nodes[0]);
1239 		if (!nr)
1240 			break;
1241 
1242 		path->slots[0] = 0;
1243 		ret = btrfs_del_items(trans, root, path, 0, nr);
1244 		if (ret)
1245 			goto out;
1246 
1247 		btrfs_release_path(path);
1248 	}
1249 
1250 	ret = 0;
1251 out:
1252 	btrfs_free_path(path);
1253 	return ret;
1254 }
1255 
btrfs_delete_free_space_tree(struct btrfs_fs_info * fs_info)1256 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1257 {
1258 	struct btrfs_trans_handle *trans;
1259 	struct btrfs_root *tree_root = fs_info->tree_root;
1260 	struct btrfs_key key = {
1261 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1262 		.type = BTRFS_ROOT_ITEM_KEY,
1263 		.offset = 0,
1264 	};
1265 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1266 	int ret;
1267 
1268 	trans = btrfs_start_transaction(tree_root, 0);
1269 	if (IS_ERR(trans))
1270 		return PTR_ERR(trans);
1271 
1272 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1273 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1274 
1275 	ret = clear_free_space_tree(trans, free_space_root);
1276 	if (ret) {
1277 		btrfs_abort_transaction(trans, ret);
1278 		btrfs_end_transaction(trans);
1279 		return ret;
1280 	}
1281 
1282 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1283 	if (ret) {
1284 		btrfs_abort_transaction(trans, ret);
1285 		btrfs_end_transaction(trans);
1286 		return ret;
1287 	}
1288 
1289 	btrfs_global_root_delete(free_space_root);
1290 
1291 	spin_lock(&fs_info->trans_lock);
1292 	list_del(&free_space_root->dirty_list);
1293 	spin_unlock(&fs_info->trans_lock);
1294 
1295 	btrfs_tree_lock(free_space_root->node);
1296 	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1297 	btrfs_tree_unlock(free_space_root->node);
1298 	ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1299 				    free_space_root->node, 0, 1);
1300 	btrfs_put_root(free_space_root);
1301 	if (ret < 0) {
1302 		btrfs_abort_transaction(trans, ret);
1303 		btrfs_end_transaction(trans);
1304 		return ret;
1305 	}
1306 
1307 	return btrfs_commit_transaction(trans);
1308 }
1309 
btrfs_rebuild_free_space_tree(struct btrfs_fs_info * fs_info)1310 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1311 {
1312 	struct btrfs_trans_handle *trans;
1313 	struct btrfs_key key = {
1314 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1315 		.type = BTRFS_ROOT_ITEM_KEY,
1316 		.offset = 0,
1317 	};
1318 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1319 	struct rb_node *node;
1320 	int ret;
1321 
1322 	trans = btrfs_start_transaction(free_space_root, 1);
1323 	if (IS_ERR(trans))
1324 		return PTR_ERR(trans);
1325 
1326 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1327 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1328 
1329 	ret = clear_free_space_tree(trans, free_space_root);
1330 	if (ret) {
1331 		btrfs_abort_transaction(trans, ret);
1332 		btrfs_end_transaction(trans);
1333 		return ret;
1334 	}
1335 
1336 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1337 	while (node) {
1338 		struct btrfs_block_group *block_group;
1339 
1340 		block_group = rb_entry(node, struct btrfs_block_group,
1341 				       cache_node);
1342 		ret = populate_free_space_tree(trans, block_group);
1343 		if (ret) {
1344 			btrfs_abort_transaction(trans, ret);
1345 			btrfs_end_transaction(trans);
1346 			return ret;
1347 		}
1348 		if (btrfs_should_end_transaction(trans)) {
1349 			btrfs_end_transaction(trans);
1350 			trans = btrfs_start_transaction(free_space_root, 1);
1351 			if (IS_ERR(trans))
1352 				return PTR_ERR(trans);
1353 		}
1354 		node = rb_next(node);
1355 	}
1356 
1357 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1358 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1359 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1360 
1361 	ret = btrfs_commit_transaction(trans);
1362 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1363 	return ret;
1364 }
1365 
__add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1366 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1367 					struct btrfs_block_group *block_group,
1368 					struct btrfs_path *path)
1369 {
1370 	int ret;
1371 
1372 	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1373 
1374 	ret = add_new_free_space_info(trans, block_group, path);
1375 	if (ret)
1376 		return ret;
1377 
1378 	return __add_to_free_space_tree(trans, block_group, path,
1379 					block_group->start,
1380 					block_group->length);
1381 }
1382 
add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1383 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1384 			       struct btrfs_block_group *block_group)
1385 {
1386 	struct btrfs_fs_info *fs_info = trans->fs_info;
1387 	struct btrfs_path *path = NULL;
1388 	int ret = 0;
1389 
1390 	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1391 		return 0;
1392 
1393 	mutex_lock(&block_group->free_space_lock);
1394 	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1395 		goto out;
1396 
1397 	path = btrfs_alloc_path();
1398 	if (!path) {
1399 		ret = -ENOMEM;
1400 		goto out;
1401 	}
1402 
1403 	ret = __add_block_group_free_space(trans, block_group, path);
1404 
1405 out:
1406 	btrfs_free_path(path);
1407 	mutex_unlock(&block_group->free_space_lock);
1408 	if (ret)
1409 		btrfs_abort_transaction(trans, ret);
1410 	return ret;
1411 }
1412 
remove_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1413 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1414 				  struct btrfs_block_group *block_group)
1415 {
1416 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1417 	struct btrfs_path *path;
1418 	struct btrfs_key key, found_key;
1419 	struct extent_buffer *leaf;
1420 	u64 start, end;
1421 	int done = 0, nr;
1422 	int ret;
1423 
1424 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1425 		return 0;
1426 
1427 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1428 		/* We never added this block group to the free space tree. */
1429 		return 0;
1430 	}
1431 
1432 	path = btrfs_alloc_path();
1433 	if (!path) {
1434 		ret = -ENOMEM;
1435 		goto out;
1436 	}
1437 
1438 	start = block_group->start;
1439 	end = block_group->start + block_group->length;
1440 
1441 	key.objectid = end - 1;
1442 	key.type = (u8)-1;
1443 	key.offset = (u64)-1;
1444 
1445 	while (!done) {
1446 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1447 		if (ret)
1448 			goto out;
1449 
1450 		leaf = path->nodes[0];
1451 		nr = 0;
1452 		path->slots[0]++;
1453 		while (path->slots[0] > 0) {
1454 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1455 
1456 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1457 				ASSERT(found_key.objectid == block_group->start);
1458 				ASSERT(found_key.offset == block_group->length);
1459 				done = 1;
1460 				nr++;
1461 				path->slots[0]--;
1462 				break;
1463 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1464 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1465 				ASSERT(found_key.objectid >= start);
1466 				ASSERT(found_key.objectid < end);
1467 				ASSERT(found_key.objectid + found_key.offset <= end);
1468 				nr++;
1469 				path->slots[0]--;
1470 			} else {
1471 				ASSERT(0);
1472 			}
1473 		}
1474 
1475 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1476 		if (ret)
1477 			goto out;
1478 		btrfs_release_path(path);
1479 	}
1480 
1481 	ret = 0;
1482 out:
1483 	btrfs_free_path(path);
1484 	if (ret)
1485 		btrfs_abort_transaction(trans, ret);
1486 	return ret;
1487 }
1488 
load_free_space_bitmaps(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1489 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1490 				   struct btrfs_path *path,
1491 				   u32 expected_extent_count)
1492 {
1493 	struct btrfs_block_group *block_group;
1494 	struct btrfs_fs_info *fs_info;
1495 	struct btrfs_root *root;
1496 	struct btrfs_key key;
1497 	int prev_bit = 0, bit;
1498 	/* Initialize to silence GCC. */
1499 	u64 extent_start = 0;
1500 	u64 end, offset;
1501 	u64 total_found = 0;
1502 	u32 extent_count = 0;
1503 	int ret;
1504 
1505 	block_group = caching_ctl->block_group;
1506 	fs_info = block_group->fs_info;
1507 	root = btrfs_free_space_root(block_group);
1508 
1509 	end = block_group->start + block_group->length;
1510 
1511 	while (1) {
1512 		ret = btrfs_next_item(root, path);
1513 		if (ret < 0)
1514 			goto out;
1515 		if (ret)
1516 			break;
1517 
1518 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1519 
1520 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1521 			break;
1522 
1523 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1524 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1525 
1526 		offset = key.objectid;
1527 		while (offset < key.objectid + key.offset) {
1528 			bit = free_space_test_bit(block_group, path, offset);
1529 			if (prev_bit == 0 && bit == 1) {
1530 				extent_start = offset;
1531 			} else if (prev_bit == 1 && bit == 0) {
1532 				u64 space_added;
1533 
1534 				ret = btrfs_add_new_free_space(block_group,
1535 							       extent_start,
1536 							       offset,
1537 							       &space_added);
1538 				if (ret)
1539 					goto out;
1540 				total_found += space_added;
1541 				if (total_found > CACHING_CTL_WAKE_UP) {
1542 					total_found = 0;
1543 					wake_up(&caching_ctl->wait);
1544 				}
1545 				extent_count++;
1546 			}
1547 			prev_bit = bit;
1548 			offset += fs_info->sectorsize;
1549 		}
1550 	}
1551 	if (prev_bit == 1) {
1552 		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1553 		if (ret)
1554 			goto out;
1555 		extent_count++;
1556 	}
1557 
1558 	if (extent_count != expected_extent_count) {
1559 		btrfs_err(fs_info,
1560 			  "incorrect extent count for %llu; counted %u, expected %u",
1561 			  block_group->start, extent_count,
1562 			  expected_extent_count);
1563 		ASSERT(0);
1564 		ret = -EIO;
1565 		goto out;
1566 	}
1567 
1568 	ret = 0;
1569 out:
1570 	return ret;
1571 }
1572 
load_free_space_extents(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1573 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1574 				   struct btrfs_path *path,
1575 				   u32 expected_extent_count)
1576 {
1577 	struct btrfs_block_group *block_group;
1578 	struct btrfs_fs_info *fs_info;
1579 	struct btrfs_root *root;
1580 	struct btrfs_key key;
1581 	u64 end;
1582 	u64 total_found = 0;
1583 	u32 extent_count = 0;
1584 	int ret;
1585 
1586 	block_group = caching_ctl->block_group;
1587 	fs_info = block_group->fs_info;
1588 	root = btrfs_free_space_root(block_group);
1589 
1590 	end = block_group->start + block_group->length;
1591 
1592 	while (1) {
1593 		u64 space_added;
1594 
1595 		ret = btrfs_next_item(root, path);
1596 		if (ret < 0)
1597 			goto out;
1598 		if (ret)
1599 			break;
1600 
1601 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1602 
1603 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1604 			break;
1605 
1606 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1607 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1608 
1609 		ret = btrfs_add_new_free_space(block_group, key.objectid,
1610 					       key.objectid + key.offset,
1611 					       &space_added);
1612 		if (ret)
1613 			goto out;
1614 		total_found += space_added;
1615 		if (total_found > CACHING_CTL_WAKE_UP) {
1616 			total_found = 0;
1617 			wake_up(&caching_ctl->wait);
1618 		}
1619 		extent_count++;
1620 	}
1621 
1622 	if (extent_count != expected_extent_count) {
1623 		btrfs_err(fs_info,
1624 			  "incorrect extent count for %llu; counted %u, expected %u",
1625 			  block_group->start, extent_count,
1626 			  expected_extent_count);
1627 		ASSERT(0);
1628 		ret = -EIO;
1629 		goto out;
1630 	}
1631 
1632 	ret = 0;
1633 out:
1634 	return ret;
1635 }
1636 
load_free_space_tree(struct btrfs_caching_control * caching_ctl)1637 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1638 {
1639 	struct btrfs_block_group *block_group;
1640 	struct btrfs_free_space_info *info;
1641 	struct btrfs_path *path;
1642 	u32 extent_count, flags;
1643 	int ret;
1644 
1645 	block_group = caching_ctl->block_group;
1646 
1647 	path = btrfs_alloc_path();
1648 	if (!path)
1649 		return -ENOMEM;
1650 
1651 	/*
1652 	 * Just like caching_thread() doesn't want to deadlock on the extent
1653 	 * tree, we don't want to deadlock on the free space tree.
1654 	 */
1655 	path->skip_locking = 1;
1656 	path->search_commit_root = 1;
1657 	path->reada = READA_FORWARD;
1658 
1659 	info = search_free_space_info(NULL, block_group, path, 0);
1660 	if (IS_ERR(info)) {
1661 		ret = PTR_ERR(info);
1662 		goto out;
1663 	}
1664 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1665 	flags = btrfs_free_space_flags(path->nodes[0], info);
1666 
1667 	/*
1668 	 * We left path pointing to the free space info item, so now
1669 	 * load_free_space_foo can just iterate through the free space tree from
1670 	 * there.
1671 	 */
1672 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1673 		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1674 	else
1675 		ret = load_free_space_extents(caching_ctl, path, extent_count);
1676 
1677 out:
1678 	btrfs_free_path(path);
1679 	return ret;
1680 }
1681