xref: /linux/fs/btrfs/free-space-tree.c (revision 82f78acd5a9270370ef4aa3f032ede25f3dc91ee)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Facebook.  All rights reserved.
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19 
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 					struct btrfs_block_group *block_group,
22 					struct btrfs_path *path);
23 
btrfs_free_space_root(struct btrfs_block_group * block_group)24 static struct btrfs_root *btrfs_free_space_root(
25 				struct btrfs_block_group *block_group)
26 {
27 	struct btrfs_key key = {
28 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29 		.type = BTRFS_ROOT_ITEM_KEY,
30 		.offset = 0,
31 	};
32 
33 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34 		key.offset = block_group->global_root_id;
35 	return btrfs_global_root(block_group->fs_info, &key);
36 }
37 
btrfs_set_free_space_tree_thresholds(struct btrfs_block_group * cache)38 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39 {
40 	u32 bitmap_range;
41 	size_t bitmap_size;
42 	u64 num_bitmaps, total_bitmap_size;
43 
44 	if (WARN_ON(cache->length == 0))
45 		btrfs_warn(cache->fs_info, "block group %llu length is zero",
46 			   cache->start);
47 
48 	/*
49 	 * We convert to bitmaps when the disk space required for using extents
50 	 * exceeds that required for using bitmaps.
51 	 */
52 	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53 	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54 	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55 	total_bitmap_size = num_bitmaps * bitmap_size;
56 	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57 					    sizeof(struct btrfs_item));
58 
59 	/*
60 	 * We allow for a small buffer between the high threshold and low
61 	 * threshold to avoid thrashing back and forth between the two formats.
62 	 */
63 	if (cache->bitmap_high_thresh > 100)
64 		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65 	else
66 		cache->bitmap_low_thresh = 0;
67 }
68 
add_new_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)69 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70 				   struct btrfs_block_group *block_group,
71 				   struct btrfs_path *path)
72 {
73 	struct btrfs_root *root = btrfs_free_space_root(block_group);
74 	struct btrfs_free_space_info *info;
75 	struct btrfs_key key;
76 	struct extent_buffer *leaf;
77 	int ret;
78 
79 	key.objectid = block_group->start;
80 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
81 	key.offset = block_group->length;
82 
83 	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84 	if (ret)
85 		return ret;
86 
87 	leaf = path->nodes[0];
88 	info = btrfs_item_ptr(leaf, path->slots[0],
89 			      struct btrfs_free_space_info);
90 	btrfs_set_free_space_extent_count(leaf, info, 0);
91 	btrfs_set_free_space_flags(leaf, info, 0);
92 	btrfs_release_path(path);
93 	return 0;
94 }
95 
96 EXPORT_FOR_TESTS
btrfs_search_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int cow)97 struct btrfs_free_space_info *btrfs_search_free_space_info(
98 		struct btrfs_trans_handle *trans,
99 		struct btrfs_block_group *block_group,
100 		struct btrfs_path *path, int cow)
101 {
102 	struct btrfs_fs_info *fs_info = block_group->fs_info;
103 	struct btrfs_root *root = btrfs_free_space_root(block_group);
104 	struct btrfs_key key;
105 	int ret;
106 
107 	key.objectid = block_group->start;
108 	key.type = BTRFS_FREE_SPACE_INFO_KEY;
109 	key.offset = block_group->length;
110 
111 	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
112 	if (ret < 0)
113 		return ERR_PTR(ret);
114 	if (ret != 0) {
115 		btrfs_warn(fs_info, "missing free space info for %llu",
116 			   block_group->start);
117 		DEBUG_WARN();
118 		return ERR_PTR(-ENOENT);
119 	}
120 
121 	return btrfs_item_ptr(path->nodes[0], path->slots[0],
122 			      struct btrfs_free_space_info);
123 }
124 
125 /*
126  * btrfs_search_slot() but we're looking for the greatest key less than the
127  * passed key.
128  */
btrfs_search_prev_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
130 				  struct btrfs_root *root,
131 				  struct btrfs_key *key, struct btrfs_path *p,
132 				  int ins_len, int cow)
133 {
134 	int ret;
135 
136 	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
137 	if (ret < 0)
138 		return ret;
139 
140 	if (unlikely(ret == 0)) {
141 		DEBUG_WARN();
142 		return -EIO;
143 	}
144 
145 	if (unlikely(p->slots[0] == 0)) {
146 		DEBUG_WARN("no previous slot found");
147 		return -EIO;
148 	}
149 	p->slots[0]--;
150 
151 	return 0;
152 }
153 
free_space_bitmap_size(const struct btrfs_fs_info * fs_info,u64 size)154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
155 					 u64 size)
156 {
157 	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
158 }
159 
alloc_bitmap(u32 bitmap_size)160 static unsigned long *alloc_bitmap(u32 bitmap_size)
161 {
162 	unsigned long *ret;
163 	unsigned int nofs_flag;
164 	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
165 
166 	/*
167 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
168 	 * into the filesystem here. All callers hold a transaction handle
169 	 * open, so if a GFP_KERNEL allocation recurses into the filesystem
170 	 * and triggers a transaction commit, we would deadlock.
171 	 */
172 	nofs_flag = memalloc_nofs_save();
173 	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
174 	memalloc_nofs_restore(nofs_flag);
175 	return ret;
176 }
177 
le_bitmap_set(unsigned long * map,unsigned int start,int len)178 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
179 {
180 	u8 *p = ((u8 *)map) + BIT_BYTE(start);
181 	const unsigned int size = start + len;
182 	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
183 	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
184 
185 	while (len - bits_to_set >= 0) {
186 		*p |= mask_to_set;
187 		len -= bits_to_set;
188 		bits_to_set = BITS_PER_BYTE;
189 		mask_to_set = ~0;
190 		p++;
191 	}
192 	if (len) {
193 		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
194 		*p |= mask_to_set;
195 	}
196 }
197 
198 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)199 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
200 					struct btrfs_block_group *block_group,
201 					struct btrfs_path *path)
202 {
203 	struct btrfs_fs_info *fs_info = trans->fs_info;
204 	struct btrfs_root *root = btrfs_free_space_root(block_group);
205 	struct btrfs_free_space_info *info;
206 	struct btrfs_key key, found_key;
207 	struct extent_buffer *leaf;
208 	unsigned long *bitmap;
209 	char *bitmap_cursor;
210 	u64 start, end;
211 	u64 bitmap_range, i;
212 	u32 bitmap_size, flags, expected_extent_count;
213 	u32 extent_count = 0;
214 	int done = 0, nr;
215 	int ret;
216 
217 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
218 	bitmap = alloc_bitmap(bitmap_size);
219 	if (unlikely(!bitmap))
220 		return 0;
221 
222 	start = block_group->start;
223 	end = block_group->start + block_group->length;
224 
225 	key.objectid = end - 1;
226 	key.type = (u8)-1;
227 	key.offset = (u64)-1;
228 
229 	while (!done) {
230 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
231 		if (unlikely(ret)) {
232 			btrfs_abort_transaction(trans, ret);
233 			goto out;
234 		}
235 
236 		leaf = path->nodes[0];
237 		nr = 0;
238 		path->slots[0]++;
239 		while (path->slots[0] > 0) {
240 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
241 
242 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
243 				ASSERT(found_key.objectid == block_group->start);
244 				ASSERT(found_key.offset == block_group->length);
245 				done = 1;
246 				break;
247 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
248 				u64 first, last;
249 
250 				ASSERT(found_key.objectid >= start);
251 				ASSERT(found_key.objectid < end);
252 				ASSERT(found_key.objectid + found_key.offset <= end);
253 
254 				first = div_u64(found_key.objectid - start,
255 						fs_info->sectorsize);
256 				last = div_u64(found_key.objectid + found_key.offset - start,
257 					       fs_info->sectorsize);
258 				le_bitmap_set(bitmap, first, last - first);
259 
260 				extent_count++;
261 				nr++;
262 				path->slots[0]--;
263 			} else {
264 				ASSERT(0);
265 			}
266 		}
267 
268 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
269 		if (unlikely(ret)) {
270 			btrfs_abort_transaction(trans, ret);
271 			goto out;
272 		}
273 		btrfs_release_path(path);
274 	}
275 
276 	info = btrfs_search_free_space_info(trans, block_group, path, 1);
277 	if (IS_ERR(info)) {
278 		ret = PTR_ERR(info);
279 		btrfs_abort_transaction(trans, ret);
280 		goto out;
281 	}
282 	leaf = path->nodes[0];
283 	flags = btrfs_free_space_flags(leaf, info);
284 	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
285 	block_group->using_free_space_bitmaps = true;
286 	block_group->using_free_space_bitmaps_cached = true;
287 	btrfs_set_free_space_flags(leaf, info, flags);
288 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
289 	btrfs_release_path(path);
290 
291 	if (unlikely(extent_count != expected_extent_count)) {
292 		btrfs_err(fs_info,
293 			  "incorrect extent count for %llu; counted %u, expected %u",
294 			  block_group->start, extent_count,
295 			  expected_extent_count);
296 		ret = -EIO;
297 		btrfs_abort_transaction(trans, ret);
298 		goto out;
299 	}
300 
301 	bitmap_cursor = (char *)bitmap;
302 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
303 	i = start;
304 	while (i < end) {
305 		unsigned long ptr;
306 		u64 extent_size;
307 		u32 data_size;
308 
309 		extent_size = min(end - i, bitmap_range);
310 		data_size = free_space_bitmap_size(fs_info, extent_size);
311 
312 		key.objectid = i;
313 		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
314 		key.offset = extent_size;
315 
316 		ret = btrfs_insert_empty_item(trans, root, path, &key,
317 					      data_size);
318 		if (unlikely(ret)) {
319 			btrfs_abort_transaction(trans, ret);
320 			goto out;
321 		}
322 
323 		leaf = path->nodes[0];
324 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
325 		write_extent_buffer(leaf, bitmap_cursor, ptr,
326 				    data_size);
327 		btrfs_release_path(path);
328 
329 		i += extent_size;
330 		bitmap_cursor += data_size;
331 	}
332 
333 	ret = 0;
334 out:
335 	kvfree(bitmap);
336 	return ret;
337 }
338 
339 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_extents(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)340 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
341 					struct btrfs_block_group *block_group,
342 					struct btrfs_path *path)
343 {
344 	struct btrfs_fs_info *fs_info = trans->fs_info;
345 	struct btrfs_root *root = btrfs_free_space_root(block_group);
346 	struct btrfs_free_space_info *info;
347 	struct btrfs_key key, found_key;
348 	struct extent_buffer *leaf;
349 	unsigned long *bitmap;
350 	u64 start, end;
351 	u32 bitmap_size, flags, expected_extent_count;
352 	unsigned long nrbits, start_bit, end_bit;
353 	u32 extent_count = 0;
354 	int done = 0, nr;
355 	int ret;
356 
357 	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
358 	bitmap = alloc_bitmap(bitmap_size);
359 	if (unlikely(!bitmap))
360 		return 0;
361 
362 	start = block_group->start;
363 	end = block_group->start + block_group->length;
364 
365 	key.objectid = end - 1;
366 	key.type = (u8)-1;
367 	key.offset = (u64)-1;
368 
369 	while (!done) {
370 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
371 		if (unlikely(ret)) {
372 			btrfs_abort_transaction(trans, ret);
373 			goto out;
374 		}
375 
376 		leaf = path->nodes[0];
377 		nr = 0;
378 		path->slots[0]++;
379 		while (path->slots[0] > 0) {
380 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
381 
382 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
383 				ASSERT(found_key.objectid == block_group->start);
384 				ASSERT(found_key.offset == block_group->length);
385 				done = 1;
386 				break;
387 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
388 				unsigned long ptr;
389 				char *bitmap_cursor;
390 				u32 bitmap_pos, data_size;
391 
392 				ASSERT(found_key.objectid >= start);
393 				ASSERT(found_key.objectid < end);
394 				ASSERT(found_key.objectid + found_key.offset <= end);
395 
396 				bitmap_pos = div_u64(found_key.objectid - start,
397 						     fs_info->sectorsize *
398 						     BITS_PER_BYTE);
399 				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
400 				data_size = free_space_bitmap_size(fs_info,
401 								found_key.offset);
402 
403 				path->slots[0]--;
404 				ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
405 				read_extent_buffer(leaf, bitmap_cursor, ptr,
406 						   data_size);
407 
408 				nr++;
409 			} else {
410 				ASSERT(0);
411 			}
412 		}
413 
414 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
415 		if (unlikely(ret)) {
416 			btrfs_abort_transaction(trans, ret);
417 			goto out;
418 		}
419 		btrfs_release_path(path);
420 	}
421 
422 	info = btrfs_search_free_space_info(trans, block_group, path, 1);
423 	if (IS_ERR(info)) {
424 		ret = PTR_ERR(info);
425 		btrfs_abort_transaction(trans, ret);
426 		goto out;
427 	}
428 	leaf = path->nodes[0];
429 	flags = btrfs_free_space_flags(leaf, info);
430 	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
431 	block_group->using_free_space_bitmaps = false;
432 	block_group->using_free_space_bitmaps_cached = true;
433 	btrfs_set_free_space_flags(leaf, info, flags);
434 	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
435 	btrfs_release_path(path);
436 
437 	nrbits = block_group->length >> fs_info->sectorsize_bits;
438 	start_bit = find_next_bit_le(bitmap, nrbits, 0);
439 
440 	while (start_bit < nrbits) {
441 		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
442 		ASSERT(start_bit < end_bit);
443 
444 		key.objectid = start + start_bit * fs_info->sectorsize;
445 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
446 		key.offset = (end_bit - start_bit) * fs_info->sectorsize;
447 
448 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
449 		if (unlikely(ret)) {
450 			btrfs_abort_transaction(trans, ret);
451 			goto out;
452 		}
453 		btrfs_release_path(path);
454 
455 		extent_count++;
456 
457 		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
458 	}
459 
460 	if (unlikely(extent_count != expected_extent_count)) {
461 		btrfs_err(fs_info,
462 			  "incorrect extent count for %llu; counted %u, expected %u",
463 			  block_group->start, extent_count,
464 			  expected_extent_count);
465 		ret = -EIO;
466 		btrfs_abort_transaction(trans, ret);
467 		goto out;
468 	}
469 
470 	ret = 0;
471 out:
472 	kvfree(bitmap);
473 	return ret;
474 }
475 
update_free_space_extent_count(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int new_extents)476 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
477 					  struct btrfs_block_group *block_group,
478 					  struct btrfs_path *path,
479 					  int new_extents)
480 {
481 	struct btrfs_free_space_info *info;
482 	u32 flags;
483 	u32 extent_count;
484 	int ret = 0;
485 
486 	if (new_extents == 0)
487 		return 0;
488 
489 	info = btrfs_search_free_space_info(trans, block_group, path, 1);
490 	if (IS_ERR(info))
491 		return PTR_ERR(info);
492 
493 	flags = btrfs_free_space_flags(path->nodes[0], info);
494 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
495 
496 	extent_count += new_extents;
497 	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
498 	btrfs_release_path(path);
499 
500 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
501 	    extent_count > block_group->bitmap_high_thresh) {
502 		ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
503 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
504 		   extent_count < block_group->bitmap_low_thresh) {
505 		ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
506 	}
507 
508 	return ret;
509 }
510 
511 EXPORT_FOR_TESTS
btrfs_free_space_test_bit(struct btrfs_block_group * block_group,struct btrfs_path * path,u64 offset)512 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
513 			       struct btrfs_path *path, u64 offset)
514 {
515 	struct extent_buffer *leaf;
516 	struct btrfs_key key;
517 	u64 found_start, found_end;
518 	unsigned long ptr, i;
519 
520 	leaf = path->nodes[0];
521 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
522 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
523 
524 	found_start = key.objectid;
525 	found_end = key.objectid + key.offset;
526 	ASSERT(offset >= found_start && offset < found_end);
527 
528 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
529 	i = div_u64(offset - found_start,
530 		    block_group->fs_info->sectorsize);
531 	return extent_buffer_test_bit(leaf, ptr, i);
532 }
533 
free_space_modify_bits(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 * start,u64 * size,bool set_bits)534 static void free_space_modify_bits(struct btrfs_trans_handle *trans,
535 				   struct btrfs_block_group *block_group,
536 				   struct btrfs_path *path, u64 *start, u64 *size,
537 				   bool set_bits)
538 {
539 	struct btrfs_fs_info *fs_info = block_group->fs_info;
540 	struct extent_buffer *leaf;
541 	struct btrfs_key key;
542 	u64 end = *start + *size;
543 	u64 found_start, found_end;
544 	unsigned long ptr, first, last;
545 
546 	leaf = path->nodes[0];
547 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
548 	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
549 
550 	found_start = key.objectid;
551 	found_end = key.objectid + key.offset;
552 	ASSERT(*start >= found_start && *start < found_end);
553 	ASSERT(end > found_start);
554 
555 	if (end > found_end)
556 		end = found_end;
557 
558 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
559 	first = (*start - found_start) >> fs_info->sectorsize_bits;
560 	last = (end - found_start) >> fs_info->sectorsize_bits;
561 	if (set_bits)
562 		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
563 	else
564 		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
565 	btrfs_mark_buffer_dirty(trans, leaf);
566 
567 	*size -= end - *start;
568 	*start = end;
569 }
570 
571 /*
572  * We can't use btrfs_next_item() in modify_free_space_bitmap() because
573  * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
574  * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
575  * looking for.
576  */
free_space_next_bitmap(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * p)577 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
578 				  struct btrfs_root *root, struct btrfs_path *p)
579 {
580 	struct btrfs_key key;
581 
582 	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
583 		p->slots[0]++;
584 		return 0;
585 	}
586 
587 	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
588 	btrfs_release_path(p);
589 
590 	key.objectid += key.offset;
591 	key.type = (u8)-1;
592 	key.offset = (u64)-1;
593 
594 	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
595 }
596 
597 /*
598  * If remove is 1, then we are removing free space, thus clearing bits in the
599  * bitmap. If remove is 0, then we are adding free space, thus setting bits in
600  * the bitmap.
601  */
modify_free_space_bitmap(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size,bool remove)602 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
603 				    struct btrfs_block_group *block_group,
604 				    struct btrfs_path *path,
605 				    u64 start, u64 size, bool remove)
606 {
607 	struct btrfs_root *root = btrfs_free_space_root(block_group);
608 	struct btrfs_key key;
609 	u64 end = start + size;
610 	u64 cur_start, cur_size;
611 	bool prev_bit_set = false;
612 	bool next_bit_set = false;
613 	int new_extents;
614 	int ret;
615 
616 	/*
617 	 * Read the bit for the block immediately before the extent of space if
618 	 * that block is within the block group.
619 	 */
620 	if (start > block_group->start) {
621 		u64 prev_block = start - block_group->fs_info->sectorsize;
622 
623 		key.objectid = prev_block;
624 		key.type = (u8)-1;
625 		key.offset = (u64)-1;
626 
627 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
628 		if (ret)
629 			return ret;
630 
631 		prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
632 
633 		/* The previous block may have been in the previous bitmap. */
634 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
635 		if (start >= key.objectid + key.offset) {
636 			ret = free_space_next_bitmap(trans, root, path);
637 			if (ret)
638 				return ret;
639 		}
640 	} else {
641 		key.objectid = start;
642 		key.type = (u8)-1;
643 		key.offset = (u64)-1;
644 
645 		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
646 		if (ret)
647 			return ret;
648 	}
649 
650 	/*
651 	 * Iterate over all of the bitmaps overlapped by the extent of space,
652 	 * clearing/setting bits as required.
653 	 */
654 	cur_start = start;
655 	cur_size = size;
656 	while (1) {
657 		free_space_modify_bits(trans, block_group, path, &cur_start,
658 				       &cur_size, !remove);
659 		if (cur_size == 0)
660 			break;
661 		ret = free_space_next_bitmap(trans, root, path);
662 		if (ret)
663 			return ret;
664 	}
665 
666 	/*
667 	 * Read the bit for the block immediately after the extent of space if
668 	 * that block is within the block group.
669 	 */
670 	if (end < block_group->start + block_group->length) {
671 		/* The next block may be in the next bitmap. */
672 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
673 		if (end >= key.objectid + key.offset) {
674 			ret = free_space_next_bitmap(trans, root, path);
675 			if (ret)
676 				return ret;
677 		}
678 
679 		next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
680 	}
681 
682 	if (remove) {
683 		new_extents = -1;
684 		if (prev_bit_set) {
685 			/* Leftover on the left. */
686 			new_extents++;
687 		}
688 		if (next_bit_set) {
689 			/* Leftover on the right. */
690 			new_extents++;
691 		}
692 	} else {
693 		new_extents = 1;
694 		if (prev_bit_set) {
695 			/* Merging with neighbor on the left. */
696 			new_extents--;
697 		}
698 		if (next_bit_set) {
699 			/* Merging with neighbor on the right. */
700 			new_extents--;
701 		}
702 	}
703 
704 	btrfs_release_path(path);
705 	return update_free_space_extent_count(trans, block_group, path, new_extents);
706 }
707 
remove_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)708 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
709 				    struct btrfs_block_group *block_group,
710 				    struct btrfs_path *path,
711 				    u64 start, u64 size)
712 {
713 	struct btrfs_root *root = btrfs_free_space_root(block_group);
714 	struct btrfs_key key;
715 	u64 found_start, found_end;
716 	u64 end = start + size;
717 	int new_extents = -1;
718 	int ret;
719 
720 	key.objectid = start;
721 	key.type = (u8)-1;
722 	key.offset = (u64)-1;
723 
724 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
725 	if (ret)
726 		return ret;
727 
728 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
729 
730 	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
731 
732 	found_start = key.objectid;
733 	found_end = key.objectid + key.offset;
734 	ASSERT(start >= found_start && end <= found_end);
735 
736 	/*
737 	 * Okay, now that we've found the free space extent which contains the
738 	 * free space that we are removing, there are four cases:
739 	 *
740 	 * 1. We're using the whole extent: delete the key we found and
741 	 * decrement the free space extent count.
742 	 * 2. We are using part of the extent starting at the beginning: delete
743 	 * the key we found and insert a new key representing the leftover at
744 	 * the end. There is no net change in the number of extents.
745 	 * 3. We are using part of the extent ending at the end: delete the key
746 	 * we found and insert a new key representing the leftover at the
747 	 * beginning. There is no net change in the number of extents.
748 	 * 4. We are using part of the extent in the middle: delete the key we
749 	 * found and insert two new keys representing the leftovers on each
750 	 * side. Where we used to have one extent, we now have two, so increment
751 	 * the extent count. We may need to convert the block group to bitmaps
752 	 * as a result.
753 	 */
754 
755 	/* Delete the existing key (cases 1-4). */
756 	ret = btrfs_del_item(trans, root, path);
757 	if (ret)
758 		return ret;
759 
760 	/* Add a key for leftovers at the beginning (cases 3 and 4). */
761 	if (start > found_start) {
762 		key.objectid = found_start;
763 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
764 		key.offset = start - found_start;
765 
766 		btrfs_release_path(path);
767 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
768 		if (ret)
769 			return ret;
770 		new_extents++;
771 	}
772 
773 	/* Add a key for leftovers at the end (cases 2 and 4). */
774 	if (end < found_end) {
775 		key.objectid = end;
776 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
777 		key.offset = found_end - end;
778 
779 		btrfs_release_path(path);
780 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
781 		if (ret)
782 			return ret;
783 		new_extents++;
784 	}
785 
786 	btrfs_release_path(path);
787 	return update_free_space_extent_count(trans, block_group, path, new_extents);
788 }
789 
using_bitmaps(struct btrfs_block_group * bg,struct btrfs_path * path)790 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
791 {
792 	struct btrfs_free_space_info *info;
793 	u32 flags;
794 
795 	if (bg->using_free_space_bitmaps_cached)
796 		return bg->using_free_space_bitmaps;
797 
798 	info = btrfs_search_free_space_info(NULL, bg, path, 0);
799 	if (IS_ERR(info))
800 		return PTR_ERR(info);
801 	flags = btrfs_free_space_flags(path->nodes[0], info);
802 	btrfs_release_path(path);
803 
804 	bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
805 	bg->using_free_space_bitmaps_cached = true;
806 
807 	return bg->using_free_space_bitmaps;
808 }
809 
810 EXPORT_FOR_TESTS
__btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)811 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
812 					struct btrfs_block_group *block_group,
813 					struct btrfs_path *path, u64 start, u64 size)
814 {
815 	int ret;
816 
817 	ret = __add_block_group_free_space(trans, block_group, path);
818 	if (ret)
819 		return ret;
820 
821 	ret = using_bitmaps(block_group, path);
822 	if (ret < 0)
823 		return ret;
824 
825 	if (ret)
826 		return modify_free_space_bitmap(trans, block_group, path,
827 						start, size, true);
828 
829 	return remove_free_space_extent(trans, block_group, path, start, size);
830 }
831 
btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)832 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
833 				      u64 start, u64 size)
834 {
835 	struct btrfs_block_group *block_group;
836 	BTRFS_PATH_AUTO_FREE(path);
837 	int ret;
838 
839 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
840 		return 0;
841 
842 	path = btrfs_alloc_path();
843 	if (unlikely(!path)) {
844 		ret = -ENOMEM;
845 		btrfs_abort_transaction(trans, ret);
846 		return ret;
847 	}
848 
849 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
850 	if (unlikely(!block_group)) {
851 		DEBUG_WARN("no block group found for start=%llu", start);
852 		ret = -ENOENT;
853 		btrfs_abort_transaction(trans, ret);
854 		return ret;
855 	}
856 
857 	mutex_lock(&block_group->free_space_lock);
858 	ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
859 	mutex_unlock(&block_group->free_space_lock);
860 	if (ret)
861 		btrfs_abort_transaction(trans, ret);
862 
863 	btrfs_put_block_group(block_group);
864 
865 	return ret;
866 }
867 
add_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)868 static int add_free_space_extent(struct btrfs_trans_handle *trans,
869 				 struct btrfs_block_group *block_group,
870 				 struct btrfs_path *path,
871 				 u64 start, u64 size)
872 {
873 	struct btrfs_root *root = btrfs_free_space_root(block_group);
874 	struct btrfs_key key, new_key;
875 	u64 found_start, found_end;
876 	u64 end = start + size;
877 	int new_extents = 1;
878 	int ret;
879 
880 	/*
881 	 * We are adding a new extent of free space, but we need to merge
882 	 * extents. There are four cases here:
883 	 *
884 	 * 1. The new extent does not have any immediate neighbors to merge
885 	 * with: add the new key and increment the free space extent count. We
886 	 * may need to convert the block group to bitmaps as a result.
887 	 * 2. The new extent has an immediate neighbor before it: remove the
888 	 * previous key and insert a new key combining both of them. There is no
889 	 * net change in the number of extents.
890 	 * 3. The new extent has an immediate neighbor after it: remove the next
891 	 * key and insert a new key combining both of them. There is no net
892 	 * change in the number of extents.
893 	 * 4. The new extent has immediate neighbors on both sides: remove both
894 	 * of the keys and insert a new key combining all of them. Where we used
895 	 * to have two extents, we now have one, so decrement the extent count.
896 	 */
897 
898 	new_key.objectid = start;
899 	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
900 	new_key.offset = size;
901 
902 	/* Search for a neighbor on the left. */
903 	if (start == block_group->start)
904 		goto right;
905 	key.objectid = start - 1;
906 	key.type = (u8)-1;
907 	key.offset = (u64)-1;
908 
909 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
910 	if (ret)
911 		return ret;
912 
913 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
914 
915 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
916 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
917 		btrfs_release_path(path);
918 		goto right;
919 	}
920 
921 	found_start = key.objectid;
922 	found_end = key.objectid + key.offset;
923 	ASSERT(found_start >= block_group->start &&
924 	       found_end > block_group->start);
925 	ASSERT(found_start < start && found_end <= start);
926 
927 	/*
928 	 * Delete the neighbor on the left and absorb it into the new key (cases
929 	 * 2 and 4).
930 	 */
931 	if (found_end == start) {
932 		ret = btrfs_del_item(trans, root, path);
933 		if (ret)
934 			return ret;
935 		new_key.objectid = found_start;
936 		new_key.offset += key.offset;
937 		new_extents--;
938 	}
939 	btrfs_release_path(path);
940 
941 right:
942 	/* Search for a neighbor on the right. */
943 	if (end == block_group->start + block_group->length)
944 		goto insert;
945 	key.objectid = end;
946 	key.type = (u8)-1;
947 	key.offset = (u64)-1;
948 
949 	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
950 	if (ret)
951 		return ret;
952 
953 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
954 
955 	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
956 		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
957 		btrfs_release_path(path);
958 		goto insert;
959 	}
960 
961 	found_start = key.objectid;
962 	found_end = key.objectid + key.offset;
963 	ASSERT(found_start >= block_group->start &&
964 	       found_end > block_group->start);
965 	ASSERT((found_start < start && found_end <= start) ||
966 	       (found_start >= end && found_end > end));
967 
968 	/*
969 	 * Delete the neighbor on the right and absorb it into the new key
970 	 * (cases 3 and 4).
971 	 */
972 	if (found_start == end) {
973 		ret = btrfs_del_item(trans, root, path);
974 		if (ret)
975 			return ret;
976 		new_key.offset += key.offset;
977 		new_extents--;
978 	}
979 	btrfs_release_path(path);
980 
981 insert:
982 	/* Insert the new key (cases 1-4). */
983 	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
984 	if (ret)
985 		return ret;
986 
987 	btrfs_release_path(path);
988 	return update_free_space_extent_count(trans, block_group, path, new_extents);
989 }
990 
991 EXPORT_FOR_TESTS
__btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)992 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
993 				   struct btrfs_block_group *block_group,
994 				   struct btrfs_path *path, u64 start, u64 size)
995 {
996 	int ret;
997 
998 	ret = __add_block_group_free_space(trans, block_group, path);
999 	if (ret)
1000 		return ret;
1001 
1002 	ret = using_bitmaps(block_group, path);
1003 	if (ret < 0)
1004 		return ret;
1005 
1006 	if (ret)
1007 		return modify_free_space_bitmap(trans, block_group, path,
1008 						start, size, false);
1009 
1010 	return add_free_space_extent(trans, block_group, path, start, size);
1011 }
1012 
btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)1013 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
1014 				 u64 start, u64 size)
1015 {
1016 	struct btrfs_block_group *block_group;
1017 	BTRFS_PATH_AUTO_FREE(path);
1018 	int ret;
1019 
1020 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1021 		return 0;
1022 
1023 	path = btrfs_alloc_path();
1024 	if (unlikely(!path)) {
1025 		ret = -ENOMEM;
1026 		btrfs_abort_transaction(trans, ret);
1027 		return ret;
1028 	}
1029 
1030 	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1031 	if (unlikely(!block_group)) {
1032 		DEBUG_WARN("no block group found for start=%llu", start);
1033 		ret = -ENOENT;
1034 		btrfs_abort_transaction(trans, ret);
1035 		return ret;
1036 	}
1037 
1038 	mutex_lock(&block_group->free_space_lock);
1039 	ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
1040 	mutex_unlock(&block_group->free_space_lock);
1041 	if (ret)
1042 		btrfs_abort_transaction(trans, ret);
1043 
1044 	btrfs_put_block_group(block_group);
1045 
1046 	return ret;
1047 }
1048 
1049 /*
1050  * Populate the free space tree by walking the extent tree. Operations on the
1051  * extent tree that happen as a result of writes to the free space tree will go
1052  * through the normal add/remove hooks.
1053  */
populate_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1054 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1055 				    struct btrfs_block_group *block_group)
1056 {
1057 	struct btrfs_root *extent_root;
1058 	BTRFS_PATH_AUTO_FREE(path);
1059 	BTRFS_PATH_AUTO_FREE(path2);
1060 	struct btrfs_key key;
1061 	u64 start, end;
1062 	int ret;
1063 
1064 	path = btrfs_alloc_path();
1065 	if (!path)
1066 		return -ENOMEM;
1067 
1068 	path2 = btrfs_alloc_path();
1069 	if (!path2)
1070 		return -ENOMEM;
1071 
1072 	path->reada = READA_FORWARD;
1073 
1074 	ret = add_new_free_space_info(trans, block_group, path2);
1075 	if (ret)
1076 		return ret;
1077 
1078 	mutex_lock(&block_group->free_space_lock);
1079 
1080 	/*
1081 	 * Iterate through all of the extent and metadata items in this block
1082 	 * group, adding the free space between them and the free space at the
1083 	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1084 	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1085 	 * contained in.
1086 	 */
1087 	key.objectid = block_group->start;
1088 	key.type = BTRFS_EXTENT_ITEM_KEY;
1089 	key.offset = 0;
1090 
1091 	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1092 	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1093 	if (ret < 0)
1094 		goto out_locked;
1095 	/*
1096 	 * If ret is 1 (no key found), it means this is an empty block group,
1097 	 * without any extents allocated from it and there's no block group
1098 	 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
1099 	 * because we are using the block group tree feature (so block group
1100 	 * items are stored in the block group tree) or this is a new block
1101 	 * group created in the current transaction and its block group item
1102 	 * was not yet inserted in the extent tree (that happens in
1103 	 * btrfs_create_pending_block_groups() -> insert_block_group_item()).
1104 	 * It also means there are no extents allocated for block groups with a
1105 	 * start offset beyond this block group's end offset (this is the last,
1106 	 * highest, block group).
1107 	 */
1108 	start = block_group->start;
1109 	end = block_group->start + block_group->length;
1110 	while (ret == 0) {
1111 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1112 
1113 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1114 		    key.type == BTRFS_METADATA_ITEM_KEY) {
1115 			if (key.objectid >= end)
1116 				break;
1117 
1118 			if (start < key.objectid) {
1119 				ret = __btrfs_add_to_free_space_tree(trans,
1120 								     block_group,
1121 								     path2, start,
1122 								     key.objectid -
1123 								     start);
1124 				if (ret)
1125 					goto out_locked;
1126 			}
1127 			start = key.objectid;
1128 			if (key.type == BTRFS_METADATA_ITEM_KEY)
1129 				start += trans->fs_info->nodesize;
1130 			else
1131 				start += key.offset;
1132 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1133 			if (key.objectid != block_group->start)
1134 				break;
1135 		}
1136 
1137 		ret = btrfs_next_item(extent_root, path);
1138 		if (ret < 0)
1139 			goto out_locked;
1140 	}
1141 	if (start < end) {
1142 		ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
1143 						     start, end - start);
1144 		if (ret)
1145 			goto out_locked;
1146 	}
1147 
1148 	ret = 0;
1149 out_locked:
1150 	mutex_unlock(&block_group->free_space_lock);
1151 
1152 	return ret;
1153 }
1154 
btrfs_create_free_space_tree(struct btrfs_fs_info * fs_info)1155 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1156 {
1157 	struct btrfs_trans_handle *trans;
1158 	struct btrfs_root *tree_root = fs_info->tree_root;
1159 	struct btrfs_root *free_space_root;
1160 	struct btrfs_block_group *block_group;
1161 	struct rb_node *node;
1162 	int ret;
1163 
1164 	trans = btrfs_start_transaction(tree_root, 0);
1165 	if (IS_ERR(trans))
1166 		return PTR_ERR(trans);
1167 
1168 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1169 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1170 	free_space_root = btrfs_create_tree(trans,
1171 					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1172 	if (IS_ERR(free_space_root)) {
1173 		ret = PTR_ERR(free_space_root);
1174 		btrfs_abort_transaction(trans, ret);
1175 		btrfs_end_transaction(trans);
1176 		goto out_clear;
1177 	}
1178 	ret = btrfs_global_root_insert(free_space_root);
1179 	if (unlikely(ret)) {
1180 		btrfs_put_root(free_space_root);
1181 		btrfs_abort_transaction(trans, ret);
1182 		btrfs_end_transaction(trans);
1183 		goto out_clear;
1184 	}
1185 
1186 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1187 	while (node) {
1188 		block_group = rb_entry(node, struct btrfs_block_group,
1189 				       cache_node);
1190 		ret = populate_free_space_tree(trans, block_group);
1191 		if (unlikely(ret)) {
1192 			btrfs_abort_transaction(trans, ret);
1193 			btrfs_end_transaction(trans);
1194 			goto out_clear;
1195 		}
1196 		node = rb_next(node);
1197 	}
1198 
1199 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1200 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1201 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1202 	ret = btrfs_commit_transaction(trans);
1203 
1204 	/*
1205 	 * Now that we've committed the transaction any reading of our commit
1206 	 * root will be safe, so we can cache from the free space tree now.
1207 	 */
1208 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1209 	return ret;
1210 
1211 out_clear:
1212 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1213 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214 	return ret;
1215 }
1216 
clear_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root)1217 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1218 				 struct btrfs_root *root)
1219 {
1220 	BTRFS_PATH_AUTO_FREE(path);
1221 	struct btrfs_key key;
1222 	struct rb_node *node;
1223 	int nr;
1224 	int ret;
1225 
1226 	path = btrfs_alloc_path();
1227 	if (!path)
1228 		return -ENOMEM;
1229 
1230 	key.objectid = 0;
1231 	key.type = 0;
1232 	key.offset = 0;
1233 
1234 	while (1) {
1235 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1236 		if (ret < 0)
1237 			return ret;
1238 
1239 		nr = btrfs_header_nritems(path->nodes[0]);
1240 		if (!nr)
1241 			break;
1242 
1243 		path->slots[0] = 0;
1244 		ret = btrfs_del_items(trans, root, path, 0, nr);
1245 		if (ret)
1246 			return ret;
1247 
1248 		btrfs_release_path(path);
1249 	}
1250 
1251 	node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
1252 	while (node) {
1253 		struct btrfs_block_group *bg;
1254 
1255 		bg = rb_entry(node, struct btrfs_block_group, cache_node);
1256 		clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
1257 		node = rb_next(node);
1258 		cond_resched();
1259 	}
1260 
1261 	return 0;
1262 }
1263 
btrfs_delete_free_space_tree(struct btrfs_fs_info * fs_info)1264 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1265 {
1266 	struct btrfs_trans_handle *trans;
1267 	struct btrfs_root *tree_root = fs_info->tree_root;
1268 	struct btrfs_key key = {
1269 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1270 		.type = BTRFS_ROOT_ITEM_KEY,
1271 		.offset = 0,
1272 	};
1273 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1274 	int ret;
1275 
1276 	trans = btrfs_start_transaction(tree_root, 0);
1277 	if (IS_ERR(trans))
1278 		return PTR_ERR(trans);
1279 
1280 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1281 	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1282 
1283 	ret = clear_free_space_tree(trans, free_space_root);
1284 	if (unlikely(ret)) {
1285 		btrfs_abort_transaction(trans, ret);
1286 		btrfs_end_transaction(trans);
1287 		return ret;
1288 	}
1289 
1290 	ret = btrfs_del_root(trans, &free_space_root->root_key);
1291 	if (unlikely(ret)) {
1292 		btrfs_abort_transaction(trans, ret);
1293 		btrfs_end_transaction(trans);
1294 		return ret;
1295 	}
1296 
1297 	btrfs_global_root_delete(free_space_root);
1298 
1299 	spin_lock(&fs_info->trans_lock);
1300 	list_del(&free_space_root->dirty_list);
1301 	spin_unlock(&fs_info->trans_lock);
1302 
1303 	btrfs_tree_lock(free_space_root->node);
1304 	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1305 	btrfs_tree_unlock(free_space_root->node);
1306 	ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1307 				    free_space_root->node, 0, 1);
1308 	btrfs_put_root(free_space_root);
1309 	if (unlikely(ret < 0)) {
1310 		btrfs_abort_transaction(trans, ret);
1311 		btrfs_end_transaction(trans);
1312 		return ret;
1313 	}
1314 
1315 	return btrfs_commit_transaction(trans);
1316 }
1317 
btrfs_rebuild_free_space_tree(struct btrfs_fs_info * fs_info)1318 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1319 {
1320 	struct btrfs_trans_handle *trans;
1321 	struct btrfs_key key = {
1322 		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1323 		.type = BTRFS_ROOT_ITEM_KEY,
1324 		.offset = 0,
1325 	};
1326 	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1327 	struct rb_node *node;
1328 	int ret;
1329 
1330 	trans = btrfs_start_transaction(free_space_root, 1);
1331 	if (IS_ERR(trans))
1332 		return PTR_ERR(trans);
1333 
1334 	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1335 	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1336 
1337 	ret = clear_free_space_tree(trans, free_space_root);
1338 	if (unlikely(ret)) {
1339 		btrfs_abort_transaction(trans, ret);
1340 		btrfs_end_transaction(trans);
1341 		return ret;
1342 	}
1343 
1344 	node = rb_first_cached(&fs_info->block_group_cache_tree);
1345 	while (node) {
1346 		struct btrfs_block_group *block_group;
1347 
1348 		block_group = rb_entry(node, struct btrfs_block_group,
1349 				       cache_node);
1350 
1351 		if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
1352 			     &block_group->runtime_flags))
1353 			goto next;
1354 
1355 		ret = populate_free_space_tree(trans, block_group);
1356 		if (unlikely(ret)) {
1357 			btrfs_abort_transaction(trans, ret);
1358 			btrfs_end_transaction(trans);
1359 			return ret;
1360 		}
1361 next:
1362 		if (btrfs_should_end_transaction(trans)) {
1363 			btrfs_end_transaction(trans);
1364 			trans = btrfs_start_transaction(free_space_root, 1);
1365 			if (IS_ERR(trans))
1366 				return PTR_ERR(trans);
1367 		}
1368 		node = rb_next(node);
1369 	}
1370 
1371 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1372 	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1373 	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1374 
1375 	ret = btrfs_commit_transaction(trans);
1376 	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1377 	return ret;
1378 }
1379 
__add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1380 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1381 					struct btrfs_block_group *block_group,
1382 					struct btrfs_path *path)
1383 {
1384 	bool own_path = false;
1385 	int ret;
1386 
1387 	if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
1388 				&block_group->runtime_flags))
1389 		return 0;
1390 
1391 	/*
1392 	 * While rebuilding the free space tree we may allocate new metadata
1393 	 * block groups while modifying the free space tree.
1394 	 *
1395 	 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
1396 	 * can use multiple transactions, every time btrfs_end_transaction() is
1397 	 * called at btrfs_rebuild_free_space_tree() we finish the creation of
1398 	 * new block groups by calling btrfs_create_pending_block_groups(), and
1399 	 * that in turn calls us, through add_block_group_free_space(), to add
1400 	 * a free space info item and a free space extent item for the block
1401 	 * group.
1402 	 *
1403 	 * Then later btrfs_rebuild_free_space_tree() may find such new block
1404 	 * groups and processes them with populate_free_space_tree(), which can
1405 	 * fail with EEXIST since there are already items for the block group in
1406 	 * the free space tree. Notice that we say "may find" because a new
1407 	 * block group may be added to the block groups rbtree in a node before
1408 	 * or after the block group currently being processed by the rebuild
1409 	 * process. So signal the rebuild process to skip such new block groups
1410 	 * if it finds them.
1411 	 */
1412 	set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
1413 
1414 	if (!path) {
1415 		path = btrfs_alloc_path();
1416 		if (unlikely(!path)) {
1417 			btrfs_abort_transaction(trans, -ENOMEM);
1418 			return -ENOMEM;
1419 		}
1420 		own_path = true;
1421 	}
1422 
1423 	ret = add_new_free_space_info(trans, block_group, path);
1424 	if (unlikely(ret)) {
1425 		btrfs_abort_transaction(trans, ret);
1426 		goto out;
1427 	}
1428 
1429 	ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
1430 					     block_group->start, block_group->length);
1431 	if (ret)
1432 		btrfs_abort_transaction(trans, ret);
1433 
1434 out:
1435 	if (own_path)
1436 		btrfs_free_path(path);
1437 
1438 	return ret;
1439 }
1440 
btrfs_add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1441 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
1442 				     struct btrfs_block_group *block_group)
1443 {
1444 	int ret;
1445 
1446 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1447 		return 0;
1448 
1449 	mutex_lock(&block_group->free_space_lock);
1450 	ret = __add_block_group_free_space(trans, block_group, NULL);
1451 	mutex_unlock(&block_group->free_space_lock);
1452 	return ret;
1453 }
1454 
btrfs_remove_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1455 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
1456 					struct btrfs_block_group *block_group)
1457 {
1458 	struct btrfs_root *root = btrfs_free_space_root(block_group);
1459 	BTRFS_PATH_AUTO_FREE(path);
1460 	struct btrfs_key key, found_key;
1461 	struct extent_buffer *leaf;
1462 	u64 start, end;
1463 	int done = 0, nr;
1464 	int ret;
1465 
1466 	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1467 		return 0;
1468 
1469 	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1470 		/* We never added this block group to the free space tree. */
1471 		return 0;
1472 	}
1473 
1474 	path = btrfs_alloc_path();
1475 	if (unlikely(!path)) {
1476 		ret = -ENOMEM;
1477 		btrfs_abort_transaction(trans, ret);
1478 		return ret;
1479 	}
1480 
1481 	start = block_group->start;
1482 	end = block_group->start + block_group->length;
1483 
1484 	key.objectid = end - 1;
1485 	key.type = (u8)-1;
1486 	key.offset = (u64)-1;
1487 
1488 	while (!done) {
1489 		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1490 		if (unlikely(ret)) {
1491 			btrfs_abort_transaction(trans, ret);
1492 			return ret;
1493 		}
1494 
1495 		leaf = path->nodes[0];
1496 		nr = 0;
1497 		path->slots[0]++;
1498 		while (path->slots[0] > 0) {
1499 			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1500 
1501 			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1502 				ASSERT(found_key.objectid == block_group->start);
1503 				ASSERT(found_key.offset == block_group->length);
1504 				done = 1;
1505 				nr++;
1506 				path->slots[0]--;
1507 				break;
1508 			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1509 				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1510 				ASSERT(found_key.objectid >= start);
1511 				ASSERT(found_key.objectid < end);
1512 				ASSERT(found_key.objectid + found_key.offset <= end);
1513 				nr++;
1514 				path->slots[0]--;
1515 			} else {
1516 				ASSERT(0);
1517 			}
1518 		}
1519 
1520 		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1521 		if (unlikely(ret)) {
1522 			btrfs_abort_transaction(trans, ret);
1523 			return ret;
1524 		}
1525 		btrfs_release_path(path);
1526 	}
1527 
1528 	ret = 0;
1529 
1530 	return ret;
1531 }
1532 
load_free_space_bitmaps(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1533 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1534 				   struct btrfs_path *path,
1535 				   u32 expected_extent_count)
1536 {
1537 	struct btrfs_block_group *block_group;
1538 	struct btrfs_fs_info *fs_info;
1539 	struct btrfs_root *root;
1540 	struct btrfs_key key;
1541 	bool prev_bit_set = false;
1542 	/* Initialize to silence GCC. */
1543 	u64 extent_start = 0;
1544 	u64 end, offset;
1545 	u64 total_found = 0;
1546 	u32 extent_count = 0;
1547 	int ret;
1548 
1549 	block_group = caching_ctl->block_group;
1550 	fs_info = block_group->fs_info;
1551 	root = btrfs_free_space_root(block_group);
1552 
1553 	end = block_group->start + block_group->length;
1554 
1555 	while (1) {
1556 		ret = btrfs_next_item(root, path);
1557 		if (ret < 0)
1558 			return ret;
1559 		if (ret)
1560 			break;
1561 
1562 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1563 
1564 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1565 			break;
1566 
1567 		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1568 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1569 
1570 		offset = key.objectid;
1571 		while (offset < key.objectid + key.offset) {
1572 			bool bit_set;
1573 
1574 			bit_set = btrfs_free_space_test_bit(block_group, path, offset);
1575 			if (!prev_bit_set && bit_set) {
1576 				extent_start = offset;
1577 			} else if (prev_bit_set && !bit_set) {
1578 				u64 space_added;
1579 
1580 				ret = btrfs_add_new_free_space(block_group,
1581 							       extent_start,
1582 							       offset,
1583 							       &space_added);
1584 				if (ret)
1585 					return ret;
1586 				total_found += space_added;
1587 				if (total_found > CACHING_CTL_WAKE_UP) {
1588 					total_found = 0;
1589 					wake_up(&caching_ctl->wait);
1590 				}
1591 				extent_count++;
1592 			}
1593 			prev_bit_set = bit_set;
1594 			offset += fs_info->sectorsize;
1595 		}
1596 	}
1597 	if (prev_bit_set) {
1598 		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1599 		if (ret)
1600 			return ret;
1601 		extent_count++;
1602 	}
1603 
1604 	if (unlikely(extent_count != expected_extent_count)) {
1605 		btrfs_err(fs_info,
1606 			  "incorrect extent count for %llu; counted %u, expected %u",
1607 			  block_group->start, extent_count,
1608 			  expected_extent_count);
1609 		DEBUG_WARN();
1610 		return -EIO;
1611 	}
1612 
1613 	return 0;
1614 }
1615 
load_free_space_extents(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1616 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1617 				   struct btrfs_path *path,
1618 				   u32 expected_extent_count)
1619 {
1620 	struct btrfs_block_group *block_group;
1621 	struct btrfs_fs_info *fs_info;
1622 	struct btrfs_root *root;
1623 	struct btrfs_key key;
1624 	u64 end;
1625 	u64 total_found = 0;
1626 	u32 extent_count = 0;
1627 	int ret;
1628 
1629 	block_group = caching_ctl->block_group;
1630 	fs_info = block_group->fs_info;
1631 	root = btrfs_free_space_root(block_group);
1632 
1633 	end = block_group->start + block_group->length;
1634 
1635 	while (1) {
1636 		u64 space_added;
1637 
1638 		ret = btrfs_next_item(root, path);
1639 		if (ret < 0)
1640 			return ret;
1641 		if (ret)
1642 			break;
1643 
1644 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1645 
1646 		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1647 			break;
1648 
1649 		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1650 		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1651 
1652 		ret = btrfs_add_new_free_space(block_group, key.objectid,
1653 					       key.objectid + key.offset,
1654 					       &space_added);
1655 		if (ret)
1656 			return ret;
1657 		total_found += space_added;
1658 		if (total_found > CACHING_CTL_WAKE_UP) {
1659 			total_found = 0;
1660 			wake_up(&caching_ctl->wait);
1661 		}
1662 		extent_count++;
1663 	}
1664 
1665 	if (unlikely(extent_count != expected_extent_count)) {
1666 		btrfs_err(fs_info,
1667 			  "incorrect extent count for %llu; counted %u, expected %u",
1668 			  block_group->start, extent_count,
1669 			  expected_extent_count);
1670 		DEBUG_WARN();
1671 		return -EIO;
1672 	}
1673 
1674 	return 0;
1675 }
1676 
btrfs_load_free_space_tree(struct btrfs_caching_control * caching_ctl)1677 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1678 {
1679 	struct btrfs_block_group *block_group;
1680 	struct btrfs_free_space_info *info;
1681 	BTRFS_PATH_AUTO_FREE(path);
1682 	u32 extent_count, flags;
1683 
1684 	block_group = caching_ctl->block_group;
1685 
1686 	path = btrfs_alloc_path();
1687 	if (!path)
1688 		return -ENOMEM;
1689 
1690 	/*
1691 	 * Just like caching_thread() doesn't want to deadlock on the extent
1692 	 * tree, we don't want to deadlock on the free space tree.
1693 	 */
1694 	path->skip_locking = true;
1695 	path->search_commit_root = true;
1696 	path->reada = READA_FORWARD;
1697 
1698 	info = btrfs_search_free_space_info(NULL, block_group, path, 0);
1699 	if (IS_ERR(info))
1700 		return PTR_ERR(info);
1701 
1702 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1703 	flags = btrfs_free_space_flags(path->nodes[0], info);
1704 
1705 	/*
1706 	 * We left path pointing to the free space info item, so now
1707 	 * load_free_space_foo can just iterate through the free space tree from
1708 	 * there.
1709 	 */
1710 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1711 		return load_free_space_bitmaps(caching_ctl, path, extent_count);
1712 	else
1713 		return load_free_space_extents(caching_ctl, path, extent_count);
1714 }
1715