1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2015 Facebook. All rights reserved.
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 struct btrfs_block_group *block_group,
22 struct btrfs_path *path);
23
btrfs_free_space_root(struct btrfs_block_group * block_group)24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group)
25 {
26 struct btrfs_key key = {
27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
28 .type = BTRFS_ROOT_ITEM_KEY,
29 .offset = 0,
30 };
31
32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
33 key.offset = block_group->global_root_id;
34 return btrfs_global_root(block_group->fs_info, &key);
35 }
36
btrfs_set_free_space_tree_thresholds(struct btrfs_block_group * cache)37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
38 {
39 u32 bitmap_range;
40 size_t bitmap_size;
41 u64 num_bitmaps, total_bitmap_size;
42
43 if (WARN_ON(cache->length == 0))
44 btrfs_warn(cache->fs_info, "block group %llu length is zero",
45 cache->start);
46
47 /*
48 * We convert to bitmaps when the disk space required for using extents
49 * exceeds that required for using bitmaps.
50 */
51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
54 total_bitmap_size = num_bitmaps * bitmap_size;
55 cache->bitmap_high_thresh = div_u64(total_bitmap_size,
56 sizeof(struct btrfs_item));
57
58 /*
59 * We allow for a small buffer between the high threshold and low
60 * threshold to avoid thrashing back and forth between the two formats.
61 */
62 if (cache->bitmap_high_thresh > 100)
63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
64 else
65 cache->bitmap_low_thresh = 0;
66 }
67
add_new_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)68 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
69 struct btrfs_block_group *block_group,
70 struct btrfs_path *path)
71 {
72 struct btrfs_root *root = btrfs_free_space_root(block_group);
73 struct btrfs_free_space_info *info;
74 struct btrfs_key key;
75 struct extent_buffer *leaf;
76 int ret;
77
78 key.objectid = block_group->start;
79 key.type = BTRFS_FREE_SPACE_INFO_KEY;
80 key.offset = block_group->length;
81
82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
83 if (ret)
84 return ret;
85
86 leaf = path->nodes[0];
87 info = btrfs_item_ptr(leaf, path->slots[0],
88 struct btrfs_free_space_info);
89 btrfs_set_free_space_extent_count(leaf, info, 0);
90 btrfs_set_free_space_flags(leaf, info, 0);
91 btrfs_release_path(path);
92 return 0;
93 }
94
btrfs_search_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int cow)95 struct btrfs_free_space_info *btrfs_search_free_space_info(
96 struct btrfs_trans_handle *trans,
97 struct btrfs_block_group *block_group,
98 struct btrfs_path *path, int cow)
99 {
100 struct btrfs_fs_info *fs_info = block_group->fs_info;
101 struct btrfs_root *root = btrfs_free_space_root(block_group);
102 struct btrfs_key key;
103 int ret;
104
105 key.objectid = block_group->start;
106 key.type = BTRFS_FREE_SPACE_INFO_KEY;
107 key.offset = block_group->length;
108
109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
110 if (ret < 0)
111 return ERR_PTR(ret);
112 if (ret != 0) {
113 btrfs_warn(fs_info, "missing free space info for %llu",
114 block_group->start);
115 DEBUG_WARN();
116 return ERR_PTR(-ENOENT);
117 }
118
119 return btrfs_item_ptr(path->nodes[0], path->slots[0],
120 struct btrfs_free_space_info);
121 }
122
123 /*
124 * btrfs_search_slot() but we're looking for the greatest key less than the
125 * passed key.
126 */
btrfs_search_prev_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root,
129 struct btrfs_key *key, struct btrfs_path *p,
130 int ins_len, int cow)
131 {
132 int ret;
133
134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
135 if (ret < 0)
136 return ret;
137
138 if (unlikely(ret == 0)) {
139 DEBUG_WARN();
140 return -EIO;
141 }
142
143 if (unlikely(p->slots[0] == 0)) {
144 DEBUG_WARN("no previous slot found");
145 return -EIO;
146 }
147 p->slots[0]--;
148
149 return 0;
150 }
151
free_space_bitmap_size(const struct btrfs_fs_info * fs_info,u64 size)152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
153 u64 size)
154 {
155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
156 }
157
alloc_bitmap(u32 bitmap_size)158 static unsigned long *alloc_bitmap(u32 bitmap_size)
159 {
160 unsigned long *ret;
161 unsigned int nofs_flag;
162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
163
164 /*
165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
166 * into the filesystem here. All callers hold a transaction handle
167 * open, so if a GFP_KERNEL allocation recurses into the filesystem
168 * and triggers a transaction commit, we would deadlock.
169 */
170 nofs_flag = memalloc_nofs_save();
171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
172 memalloc_nofs_restore(nofs_flag);
173 return ret;
174 }
175
le_bitmap_set(unsigned long * map,unsigned int start,int len)176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
177 {
178 u8 *p = ((u8 *)map) + BIT_BYTE(start);
179 const unsigned int size = start + len;
180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
182
183 while (len - bits_to_set >= 0) {
184 *p |= mask_to_set;
185 len -= bits_to_set;
186 bits_to_set = BITS_PER_BYTE;
187 mask_to_set = ~0;
188 p++;
189 }
190 if (len) {
191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
192 *p |= mask_to_set;
193 }
194 }
195
196 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
198 struct btrfs_block_group *block_group,
199 struct btrfs_path *path)
200 {
201 struct btrfs_fs_info *fs_info = trans->fs_info;
202 struct btrfs_root *root = btrfs_free_space_root(block_group);
203 struct btrfs_free_space_info *info;
204 struct btrfs_key key, found_key;
205 struct extent_buffer *leaf;
206 unsigned long *bitmap;
207 char *bitmap_cursor;
208 u64 start, end;
209 u64 bitmap_range, i;
210 u32 bitmap_size, flags, expected_extent_count;
211 u32 extent_count = 0;
212 int done = 0, nr;
213 int ret;
214
215 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
216 bitmap = alloc_bitmap(bitmap_size);
217 if (unlikely(!bitmap))
218 return 0;
219
220 start = block_group->start;
221 end = btrfs_block_group_end(block_group);
222
223 key.objectid = end - 1;
224 key.type = (u8)-1;
225 key.offset = (u64)-1;
226
227 while (!done) {
228 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
229 if (unlikely(ret)) {
230 btrfs_abort_transaction(trans, ret);
231 goto out;
232 }
233
234 leaf = path->nodes[0];
235 nr = 0;
236 path->slots[0]++;
237 while (path->slots[0] > 0) {
238 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
239
240 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
241 ASSERT(found_key.objectid == block_group->start);
242 ASSERT(found_key.offset == block_group->length);
243 done = 1;
244 break;
245 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
246 u64 first, last;
247
248 ASSERT(found_key.objectid >= start);
249 ASSERT(found_key.objectid < end);
250 ASSERT(found_key.objectid + found_key.offset <= end);
251
252 first = div_u64(found_key.objectid - start,
253 fs_info->sectorsize);
254 last = div_u64(found_key.objectid + found_key.offset - start,
255 fs_info->sectorsize);
256 le_bitmap_set(bitmap, first, last - first);
257
258 extent_count++;
259 nr++;
260 path->slots[0]--;
261 } else {
262 ASSERT(0);
263 }
264 }
265
266 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
267 if (unlikely(ret)) {
268 btrfs_abort_transaction(trans, ret);
269 goto out;
270 }
271 btrfs_release_path(path);
272 }
273
274 info = btrfs_search_free_space_info(trans, block_group, path, 1);
275 if (IS_ERR(info)) {
276 ret = PTR_ERR(info);
277 btrfs_abort_transaction(trans, ret);
278 goto out;
279 }
280 leaf = path->nodes[0];
281 flags = btrfs_free_space_flags(leaf, info);
282 flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
283 block_group->using_free_space_bitmaps = true;
284 block_group->using_free_space_bitmaps_cached = true;
285 btrfs_set_free_space_flags(leaf, info, flags);
286 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
287 btrfs_release_path(path);
288
289 if (unlikely(extent_count != expected_extent_count)) {
290 btrfs_err(fs_info,
291 "incorrect extent count for %llu; counted %u, expected %u",
292 block_group->start, extent_count,
293 expected_extent_count);
294 ret = -EIO;
295 btrfs_abort_transaction(trans, ret);
296 goto out;
297 }
298
299 bitmap_cursor = (char *)bitmap;
300 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
301 i = start;
302 while (i < end) {
303 unsigned long ptr;
304 u64 extent_size;
305 u32 data_size;
306
307 extent_size = min(end - i, bitmap_range);
308 data_size = free_space_bitmap_size(fs_info, extent_size);
309
310 key.objectid = i;
311 key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
312 key.offset = extent_size;
313
314 ret = btrfs_insert_empty_item(trans, root, path, &key,
315 data_size);
316 if (unlikely(ret)) {
317 btrfs_abort_transaction(trans, ret);
318 goto out;
319 }
320
321 leaf = path->nodes[0];
322 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
323 write_extent_buffer(leaf, bitmap_cursor, ptr,
324 data_size);
325 btrfs_release_path(path);
326
327 i += extent_size;
328 bitmap_cursor += data_size;
329 }
330
331 ret = 0;
332 out:
333 kvfree(bitmap);
334 return ret;
335 }
336
337 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_extents(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)338 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
339 struct btrfs_block_group *block_group,
340 struct btrfs_path *path)
341 {
342 struct btrfs_fs_info *fs_info = trans->fs_info;
343 struct btrfs_root *root = btrfs_free_space_root(block_group);
344 struct btrfs_free_space_info *info;
345 struct btrfs_key key, found_key;
346 struct extent_buffer *leaf;
347 unsigned long *bitmap;
348 u64 start, end;
349 u32 bitmap_size, flags, expected_extent_count;
350 unsigned long nrbits, start_bit, end_bit;
351 u32 extent_count = 0;
352 int done = 0, nr;
353 int ret;
354
355 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
356 bitmap = alloc_bitmap(bitmap_size);
357 if (unlikely(!bitmap))
358 return 0;
359
360 start = block_group->start;
361 end = btrfs_block_group_end(block_group);
362
363 key.objectid = end - 1;
364 key.type = (u8)-1;
365 key.offset = (u64)-1;
366
367 while (!done) {
368 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
369 if (unlikely(ret)) {
370 btrfs_abort_transaction(trans, ret);
371 goto out;
372 }
373
374 leaf = path->nodes[0];
375 nr = 0;
376 path->slots[0]++;
377 while (path->slots[0] > 0) {
378 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
379
380 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
381 ASSERT(found_key.objectid == block_group->start);
382 ASSERT(found_key.offset == block_group->length);
383 done = 1;
384 break;
385 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
386 unsigned long ptr;
387 char *bitmap_cursor;
388 u32 bitmap_pos, data_size;
389
390 ASSERT(found_key.objectid >= start);
391 ASSERT(found_key.objectid < end);
392 ASSERT(found_key.objectid + found_key.offset <= end);
393
394 bitmap_pos = div_u64(found_key.objectid - start,
395 fs_info->sectorsize *
396 BITS_PER_BYTE);
397 bitmap_cursor = ((char *)bitmap) + bitmap_pos;
398 data_size = free_space_bitmap_size(fs_info,
399 found_key.offset);
400
401 path->slots[0]--;
402 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
403 read_extent_buffer(leaf, bitmap_cursor, ptr,
404 data_size);
405
406 nr++;
407 } else {
408 ASSERT(0);
409 }
410 }
411
412 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
413 if (unlikely(ret)) {
414 btrfs_abort_transaction(trans, ret);
415 goto out;
416 }
417 btrfs_release_path(path);
418 }
419
420 info = btrfs_search_free_space_info(trans, block_group, path, 1);
421 if (IS_ERR(info)) {
422 ret = PTR_ERR(info);
423 btrfs_abort_transaction(trans, ret);
424 goto out;
425 }
426 leaf = path->nodes[0];
427 flags = btrfs_free_space_flags(leaf, info);
428 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
429 block_group->using_free_space_bitmaps = false;
430 block_group->using_free_space_bitmaps_cached = true;
431 btrfs_set_free_space_flags(leaf, info, flags);
432 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
433 btrfs_release_path(path);
434
435 nrbits = block_group->length >> fs_info->sectorsize_bits;
436 start_bit = find_next_bit_le(bitmap, nrbits, 0);
437
438 while (start_bit < nrbits) {
439 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
440 ASSERT(start_bit < end_bit);
441
442 key.objectid = start + start_bit * fs_info->sectorsize;
443 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
444 key.offset = (end_bit - start_bit) * fs_info->sectorsize;
445
446 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
447 if (unlikely(ret)) {
448 btrfs_abort_transaction(trans, ret);
449 goto out;
450 }
451 btrfs_release_path(path);
452
453 extent_count++;
454
455 start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
456 }
457
458 if (unlikely(extent_count != expected_extent_count)) {
459 btrfs_err(fs_info,
460 "incorrect extent count for %llu; counted %u, expected %u",
461 block_group->start, extent_count,
462 expected_extent_count);
463 ret = -EIO;
464 btrfs_abort_transaction(trans, ret);
465 goto out;
466 }
467
468 ret = 0;
469 out:
470 kvfree(bitmap);
471 return ret;
472 }
473
update_free_space_extent_count(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int new_extents)474 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
475 struct btrfs_block_group *block_group,
476 struct btrfs_path *path,
477 int new_extents)
478 {
479 struct btrfs_free_space_info *info;
480 u32 flags;
481 u32 extent_count;
482 int ret = 0;
483
484 if (new_extents == 0)
485 return 0;
486
487 info = btrfs_search_free_space_info(trans, block_group, path, 1);
488 if (IS_ERR(info))
489 return PTR_ERR(info);
490
491 flags = btrfs_free_space_flags(path->nodes[0], info);
492 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
493
494 extent_count += new_extents;
495 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
496 btrfs_release_path(path);
497
498 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
499 extent_count > block_group->bitmap_high_thresh) {
500 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
501 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
502 extent_count < block_group->bitmap_low_thresh) {
503 ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
504 }
505
506 return ret;
507 }
508
509 EXPORT_FOR_TESTS
btrfs_free_space_test_bit(struct btrfs_block_group * block_group,struct btrfs_path * path,u64 offset)510 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
511 struct btrfs_path *path, u64 offset)
512 {
513 struct extent_buffer *leaf;
514 struct btrfs_key key;
515 u64 found_start, found_end;
516 unsigned long ptr, i;
517
518 leaf = path->nodes[0];
519 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
520 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
521
522 found_start = key.objectid;
523 found_end = key.objectid + key.offset;
524 ASSERT(offset >= found_start && offset < found_end);
525
526 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
527 i = div_u64(offset - found_start,
528 block_group->fs_info->sectorsize);
529 return extent_buffer_test_bit(leaf, ptr, i);
530 }
531
free_space_modify_bits(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 * start,u64 * size,bool set_bits)532 static void free_space_modify_bits(struct btrfs_trans_handle *trans,
533 struct btrfs_block_group *block_group,
534 struct btrfs_path *path, u64 *start, u64 *size,
535 bool set_bits)
536 {
537 struct btrfs_fs_info *fs_info = block_group->fs_info;
538 struct extent_buffer *leaf;
539 struct btrfs_key key;
540 u64 end = *start + *size;
541 u64 found_start, found_end;
542 unsigned long ptr, first, last;
543
544 leaf = path->nodes[0];
545 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
546 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
547
548 found_start = key.objectid;
549 found_end = key.objectid + key.offset;
550 ASSERT(*start >= found_start && *start < found_end);
551 ASSERT(end > found_start);
552
553 if (end > found_end)
554 end = found_end;
555
556 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
557 first = (*start - found_start) >> fs_info->sectorsize_bits;
558 last = (end - found_start) >> fs_info->sectorsize_bits;
559 if (set_bits)
560 extent_buffer_bitmap_set(leaf, ptr, first, last - first);
561 else
562 extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
563 btrfs_mark_buffer_dirty(trans, leaf);
564
565 *size -= end - *start;
566 *start = end;
567 }
568
569 /*
570 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
571 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
572 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
573 * looking for.
574 */
free_space_next_bitmap(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * p)575 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
576 struct btrfs_root *root, struct btrfs_path *p)
577 {
578 struct btrfs_key key;
579
580 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
581 p->slots[0]++;
582 return 0;
583 }
584
585 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
586 btrfs_release_path(p);
587
588 key.objectid += key.offset;
589 key.type = (u8)-1;
590 key.offset = (u64)-1;
591
592 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
593 }
594
595 /*
596 * If remove is 1, then we are removing free space, thus clearing bits in the
597 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
598 * the bitmap.
599 */
modify_free_space_bitmap(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size,bool remove)600 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
601 struct btrfs_block_group *block_group,
602 struct btrfs_path *path,
603 u64 start, u64 size, bool remove)
604 {
605 struct btrfs_root *root = btrfs_free_space_root(block_group);
606 struct btrfs_key key;
607 u64 end = start + size;
608 u64 cur_start, cur_size;
609 bool prev_bit_set = false;
610 bool next_bit_set = false;
611 int new_extents;
612 int ret;
613
614 /*
615 * Read the bit for the block immediately before the extent of space if
616 * that block is within the block group.
617 */
618 if (start > block_group->start) {
619 u64 prev_block = start - block_group->fs_info->sectorsize;
620
621 key.objectid = prev_block;
622 key.type = (u8)-1;
623 key.offset = (u64)-1;
624
625 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
626 if (ret)
627 return ret;
628
629 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
630
631 /* The previous block may have been in the previous bitmap. */
632 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
633 if (start >= key.objectid + key.offset) {
634 ret = free_space_next_bitmap(trans, root, path);
635 if (ret)
636 return ret;
637 }
638 } else {
639 key.objectid = start;
640 key.type = (u8)-1;
641 key.offset = (u64)-1;
642
643 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
644 if (ret)
645 return ret;
646 }
647
648 /*
649 * Iterate over all of the bitmaps overlapped by the extent of space,
650 * clearing/setting bits as required.
651 */
652 cur_start = start;
653 cur_size = size;
654 while (1) {
655 free_space_modify_bits(trans, block_group, path, &cur_start,
656 &cur_size, !remove);
657 if (cur_size == 0)
658 break;
659 ret = free_space_next_bitmap(trans, root, path);
660 if (ret)
661 return ret;
662 }
663
664 /*
665 * Read the bit for the block immediately after the extent of space if
666 * that block is within the block group.
667 */
668 if (end < btrfs_block_group_end(block_group)) {
669 /* The next block may be in the next bitmap. */
670 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
671 if (end >= key.objectid + key.offset) {
672 ret = free_space_next_bitmap(trans, root, path);
673 if (ret)
674 return ret;
675 }
676
677 next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
678 }
679
680 if (remove) {
681 new_extents = -1;
682 if (prev_bit_set) {
683 /* Leftover on the left. */
684 new_extents++;
685 }
686 if (next_bit_set) {
687 /* Leftover on the right. */
688 new_extents++;
689 }
690 } else {
691 new_extents = 1;
692 if (prev_bit_set) {
693 /* Merging with neighbor on the left. */
694 new_extents--;
695 }
696 if (next_bit_set) {
697 /* Merging with neighbor on the right. */
698 new_extents--;
699 }
700 }
701
702 btrfs_release_path(path);
703 return update_free_space_extent_count(trans, block_group, path, new_extents);
704 }
705
remove_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)706 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
707 struct btrfs_block_group *block_group,
708 struct btrfs_path *path,
709 u64 start, u64 size)
710 {
711 struct btrfs_root *root = btrfs_free_space_root(block_group);
712 struct btrfs_key key;
713 u64 found_start, found_end;
714 u64 end = start + size;
715 int new_extents = -1;
716 int ret;
717
718 key.objectid = start;
719 key.type = (u8)-1;
720 key.offset = (u64)-1;
721
722 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
723 if (ret)
724 return ret;
725
726 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
727
728 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
729
730 found_start = key.objectid;
731 found_end = key.objectid + key.offset;
732 ASSERT(start >= found_start && end <= found_end);
733
734 /*
735 * Okay, now that we've found the free space extent which contains the
736 * free space that we are removing, there are four cases:
737 *
738 * 1. We're using the whole extent: delete the key we found and
739 * decrement the free space extent count.
740 * 2. We are using part of the extent starting at the beginning: delete
741 * the key we found and insert a new key representing the leftover at
742 * the end. There is no net change in the number of extents.
743 * 3. We are using part of the extent ending at the end: delete the key
744 * we found and insert a new key representing the leftover at the
745 * beginning. There is no net change in the number of extents.
746 * 4. We are using part of the extent in the middle: delete the key we
747 * found and insert two new keys representing the leftovers on each
748 * side. Where we used to have one extent, we now have two, so increment
749 * the extent count. We may need to convert the block group to bitmaps
750 * as a result.
751 */
752
753 /* Delete the existing key (cases 1-4). */
754 ret = btrfs_del_item(trans, root, path);
755 if (ret)
756 return ret;
757
758 /* Add a key for leftovers at the beginning (cases 3 and 4). */
759 if (start > found_start) {
760 key.objectid = found_start;
761 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
762 key.offset = start - found_start;
763
764 btrfs_release_path(path);
765 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
766 if (ret)
767 return ret;
768 new_extents++;
769 }
770
771 /* Add a key for leftovers at the end (cases 2 and 4). */
772 if (end < found_end) {
773 key.objectid = end;
774 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
775 key.offset = found_end - end;
776
777 btrfs_release_path(path);
778 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
779 if (ret)
780 return ret;
781 new_extents++;
782 }
783
784 btrfs_release_path(path);
785 return update_free_space_extent_count(trans, block_group, path, new_extents);
786 }
787
using_bitmaps(struct btrfs_block_group * bg,struct btrfs_path * path)788 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
789 {
790 struct btrfs_free_space_info *info;
791 u32 flags;
792
793 if (bg->using_free_space_bitmaps_cached)
794 return bg->using_free_space_bitmaps;
795
796 info = btrfs_search_free_space_info(NULL, bg, path, 0);
797 if (IS_ERR(info))
798 return PTR_ERR(info);
799 flags = btrfs_free_space_flags(path->nodes[0], info);
800 btrfs_release_path(path);
801
802 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
803 bg->using_free_space_bitmaps_cached = true;
804
805 return bg->using_free_space_bitmaps;
806 }
807
808 EXPORT_FOR_TESTS
__btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)809 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
810 struct btrfs_block_group *block_group,
811 struct btrfs_path *path, u64 start, u64 size)
812 {
813 int ret;
814
815 ret = __add_block_group_free_space(trans, block_group, path);
816 if (ret)
817 return ret;
818
819 ret = using_bitmaps(block_group, path);
820 if (ret < 0)
821 return ret;
822
823 if (ret)
824 return modify_free_space_bitmap(trans, block_group, path,
825 start, size, true);
826
827 return remove_free_space_extent(trans, block_group, path, start, size);
828 }
829
btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)830 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
831 u64 start, u64 size)
832 {
833 struct btrfs_block_group *block_group;
834 BTRFS_PATH_AUTO_FREE(path);
835 int ret;
836
837 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
838 return 0;
839
840 path = btrfs_alloc_path();
841 if (unlikely(!path)) {
842 ret = -ENOMEM;
843 btrfs_abort_transaction(trans, ret);
844 return ret;
845 }
846
847 block_group = btrfs_lookup_block_group(trans->fs_info, start);
848 if (unlikely(!block_group)) {
849 DEBUG_WARN("no block group found for start=%llu", start);
850 ret = -ENOENT;
851 btrfs_abort_transaction(trans, ret);
852 return ret;
853 }
854
855 mutex_lock(&block_group->free_space_lock);
856 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
857 mutex_unlock(&block_group->free_space_lock);
858 if (ret)
859 btrfs_abort_transaction(trans, ret);
860
861 btrfs_put_block_group(block_group);
862
863 return ret;
864 }
865
add_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)866 static int add_free_space_extent(struct btrfs_trans_handle *trans,
867 struct btrfs_block_group *block_group,
868 struct btrfs_path *path,
869 u64 start, u64 size)
870 {
871 struct btrfs_root *root = btrfs_free_space_root(block_group);
872 struct btrfs_key key, new_key;
873 u64 found_start, found_end;
874 u64 end = start + size;
875 int new_extents = 1;
876 int ret;
877
878 /*
879 * We are adding a new extent of free space, but we need to merge
880 * extents. There are four cases here:
881 *
882 * 1. The new extent does not have any immediate neighbors to merge
883 * with: add the new key and increment the free space extent count. We
884 * may need to convert the block group to bitmaps as a result.
885 * 2. The new extent has an immediate neighbor before it: remove the
886 * previous key and insert a new key combining both of them. There is no
887 * net change in the number of extents.
888 * 3. The new extent has an immediate neighbor after it: remove the next
889 * key and insert a new key combining both of them. There is no net
890 * change in the number of extents.
891 * 4. The new extent has immediate neighbors on both sides: remove both
892 * of the keys and insert a new key combining all of them. Where we used
893 * to have two extents, we now have one, so decrement the extent count.
894 */
895
896 new_key.objectid = start;
897 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
898 new_key.offset = size;
899
900 /* Search for a neighbor on the left. */
901 if (start == block_group->start)
902 goto right;
903 key.objectid = start - 1;
904 key.type = (u8)-1;
905 key.offset = (u64)-1;
906
907 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
908 if (ret)
909 return ret;
910
911 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
912
913 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
914 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
915 btrfs_release_path(path);
916 goto right;
917 }
918
919 found_start = key.objectid;
920 found_end = key.objectid + key.offset;
921 ASSERT(found_start >= block_group->start &&
922 found_end > block_group->start);
923 ASSERT(found_start < start && found_end <= start);
924
925 /*
926 * Delete the neighbor on the left and absorb it into the new key (cases
927 * 2 and 4).
928 */
929 if (found_end == start) {
930 ret = btrfs_del_item(trans, root, path);
931 if (ret)
932 return ret;
933 new_key.objectid = found_start;
934 new_key.offset += key.offset;
935 new_extents--;
936 }
937 btrfs_release_path(path);
938
939 right:
940 /* Search for a neighbor on the right. */
941 if (end == btrfs_block_group_end(block_group))
942 goto insert;
943 key.objectid = end;
944 key.type = (u8)-1;
945 key.offset = (u64)-1;
946
947 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
948 if (ret)
949 return ret;
950
951 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
952
953 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
954 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
955 btrfs_release_path(path);
956 goto insert;
957 }
958
959 found_start = key.objectid;
960 found_end = key.objectid + key.offset;
961 ASSERT(found_start >= block_group->start &&
962 found_end > block_group->start);
963 ASSERT((found_start < start && found_end <= start) ||
964 (found_start >= end && found_end > end));
965
966 /*
967 * Delete the neighbor on the right and absorb it into the new key
968 * (cases 3 and 4).
969 */
970 if (found_start == end) {
971 ret = btrfs_del_item(trans, root, path);
972 if (ret)
973 return ret;
974 new_key.offset += key.offset;
975 new_extents--;
976 }
977 btrfs_release_path(path);
978
979 insert:
980 /* Insert the new key (cases 1-4). */
981 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
982 if (ret)
983 return ret;
984
985 btrfs_release_path(path);
986 return update_free_space_extent_count(trans, block_group, path, new_extents);
987 }
988
989 EXPORT_FOR_TESTS
__btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)990 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
991 struct btrfs_block_group *block_group,
992 struct btrfs_path *path, u64 start, u64 size)
993 {
994 int ret;
995
996 ret = __add_block_group_free_space(trans, block_group, path);
997 if (ret)
998 return ret;
999
1000 ret = using_bitmaps(block_group, path);
1001 if (ret < 0)
1002 return ret;
1003
1004 if (ret)
1005 return modify_free_space_bitmap(trans, block_group, path,
1006 start, size, false);
1007
1008 return add_free_space_extent(trans, block_group, path, start, size);
1009 }
1010
btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)1011 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
1012 u64 start, u64 size)
1013 {
1014 struct btrfs_block_group *block_group;
1015 BTRFS_PATH_AUTO_FREE(path);
1016 int ret;
1017
1018 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1019 return 0;
1020
1021 path = btrfs_alloc_path();
1022 if (unlikely(!path)) {
1023 ret = -ENOMEM;
1024 btrfs_abort_transaction(trans, ret);
1025 return ret;
1026 }
1027
1028 block_group = btrfs_lookup_block_group(trans->fs_info, start);
1029 if (unlikely(!block_group)) {
1030 DEBUG_WARN("no block group found for start=%llu", start);
1031 ret = -ENOENT;
1032 btrfs_abort_transaction(trans, ret);
1033 return ret;
1034 }
1035
1036 mutex_lock(&block_group->free_space_lock);
1037 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
1038 mutex_unlock(&block_group->free_space_lock);
1039 if (ret)
1040 btrfs_abort_transaction(trans, ret);
1041
1042 btrfs_put_block_group(block_group);
1043
1044 return ret;
1045 }
1046
1047 /*
1048 * Populate the free space tree by walking the extent tree. Operations on the
1049 * extent tree that happen as a result of writes to the free space tree will go
1050 * through the normal add/remove hooks.
1051 */
populate_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1052 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1053 struct btrfs_block_group *block_group)
1054 {
1055 struct btrfs_root *extent_root;
1056 BTRFS_PATH_AUTO_FREE(path);
1057 BTRFS_PATH_AUTO_FREE(path2);
1058 struct btrfs_key key;
1059 u64 start, end;
1060 int ret;
1061
1062 path = btrfs_alloc_path();
1063 if (!path)
1064 return -ENOMEM;
1065
1066 path2 = btrfs_alloc_path();
1067 if (!path2)
1068 return -ENOMEM;
1069
1070 path->reada = READA_FORWARD;
1071
1072 ret = add_new_free_space_info(trans, block_group, path2);
1073 if (ret)
1074 return ret;
1075
1076 extent_root = btrfs_extent_root(trans->fs_info, block_group->start);
1077 if (unlikely(!extent_root)) {
1078 btrfs_err(trans->fs_info,
1079 "missing extent root for block group at offset %llu",
1080 block_group->start);
1081 return -EUCLEAN;
1082 }
1083
1084 mutex_lock(&block_group->free_space_lock);
1085
1086 /*
1087 * Iterate through all of the extent and metadata items in this block
1088 * group, adding the free space between them and the free space at the
1089 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1090 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1091 * contained in.
1092 */
1093 key.objectid = block_group->start;
1094 key.type = BTRFS_EXTENT_ITEM_KEY;
1095 key.offset = 0;
1096
1097 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1098 if (ret < 0)
1099 goto out_locked;
1100 /*
1101 * If ret is 1 (no key found), it means this is an empty block group,
1102 * without any extents allocated from it and there's no block group
1103 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
1104 * because we are using the block group tree feature (so block group
1105 * items are stored in the block group tree) or this is a new block
1106 * group created in the current transaction and its block group item
1107 * was not yet inserted in the extent tree (that happens in
1108 * btrfs_create_pending_block_groups() -> insert_block_group_item()).
1109 * It also means there are no extents allocated for block groups with a
1110 * start offset beyond this block group's end offset (this is the last,
1111 * highest, block group).
1112 */
1113 start = block_group->start;
1114 end = btrfs_block_group_end(block_group);
1115 while (ret == 0) {
1116 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1117
1118 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1119 key.type == BTRFS_METADATA_ITEM_KEY) {
1120 if (key.objectid >= end)
1121 break;
1122
1123 if (start < key.objectid) {
1124 ret = __btrfs_add_to_free_space_tree(trans,
1125 block_group,
1126 path2, start,
1127 key.objectid -
1128 start);
1129 if (ret)
1130 goto out_locked;
1131 }
1132 start = key.objectid;
1133 if (key.type == BTRFS_METADATA_ITEM_KEY)
1134 start += trans->fs_info->nodesize;
1135 else
1136 start += key.offset;
1137 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1138 if (key.objectid != block_group->start)
1139 break;
1140 }
1141
1142 ret = btrfs_next_item(extent_root, path);
1143 if (ret < 0)
1144 goto out_locked;
1145 }
1146 if (start < end) {
1147 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
1148 start, end - start);
1149 if (ret)
1150 goto out_locked;
1151 }
1152
1153 ret = 0;
1154 out_locked:
1155 mutex_unlock(&block_group->free_space_lock);
1156
1157 return ret;
1158 }
1159
btrfs_create_free_space_tree(struct btrfs_fs_info * fs_info)1160 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1161 {
1162 struct btrfs_trans_handle *trans;
1163 struct btrfs_root *tree_root = fs_info->tree_root;
1164 struct btrfs_root *free_space_root;
1165 struct btrfs_block_group *block_group;
1166 struct rb_node *node;
1167 int ret;
1168
1169 trans = btrfs_start_transaction(tree_root, 0);
1170 if (IS_ERR(trans))
1171 return PTR_ERR(trans);
1172
1173 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1174 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1175 free_space_root = btrfs_create_tree(trans,
1176 BTRFS_FREE_SPACE_TREE_OBJECTID);
1177 if (IS_ERR(free_space_root)) {
1178 ret = PTR_ERR(free_space_root);
1179 btrfs_abort_transaction(trans, ret);
1180 btrfs_end_transaction(trans);
1181 goto out_clear;
1182 }
1183 ret = btrfs_global_root_insert(free_space_root);
1184 if (unlikely(ret)) {
1185 btrfs_put_root(free_space_root);
1186 btrfs_abort_transaction(trans, ret);
1187 btrfs_end_transaction(trans);
1188 goto out_clear;
1189 }
1190
1191 node = rb_first_cached(&fs_info->block_group_cache_tree);
1192 while (node) {
1193 block_group = rb_entry(node, struct btrfs_block_group,
1194 cache_node);
1195 ret = populate_free_space_tree(trans, block_group);
1196 if (unlikely(ret)) {
1197 btrfs_abort_transaction(trans, ret);
1198 btrfs_end_transaction(trans);
1199 goto out_clear;
1200 }
1201 node = rb_next(node);
1202 }
1203
1204 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1205 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1206 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1207 ret = btrfs_commit_transaction(trans);
1208
1209 /*
1210 * Now that we've committed the transaction any reading of our commit
1211 * root will be safe, so we can cache from the free space tree now.
1212 */
1213 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214 return ret;
1215
1216 out_clear:
1217 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1218 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1219 return ret;
1220 }
1221
clear_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root)1222 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1223 struct btrfs_root *root)
1224 {
1225 BTRFS_PATH_AUTO_FREE(path);
1226 struct btrfs_key key;
1227 struct rb_node *node;
1228 int nr;
1229 int ret;
1230
1231 path = btrfs_alloc_path();
1232 if (!path)
1233 return -ENOMEM;
1234
1235 key.objectid = 0;
1236 key.type = 0;
1237 key.offset = 0;
1238
1239 while (1) {
1240 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1241 if (ret < 0)
1242 return ret;
1243
1244 nr = btrfs_header_nritems(path->nodes[0]);
1245 if (!nr)
1246 break;
1247
1248 path->slots[0] = 0;
1249 ret = btrfs_del_items(trans, root, path, 0, nr);
1250 if (ret)
1251 return ret;
1252
1253 btrfs_release_path(path);
1254 }
1255
1256 node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
1257 while (node) {
1258 struct btrfs_block_group *bg;
1259
1260 bg = rb_entry(node, struct btrfs_block_group, cache_node);
1261 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
1262 node = rb_next(node);
1263 cond_resched();
1264 }
1265
1266 return 0;
1267 }
1268
btrfs_delete_free_space_tree(struct btrfs_fs_info * fs_info)1269 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1270 {
1271 struct btrfs_trans_handle *trans;
1272 struct btrfs_root *tree_root = fs_info->tree_root;
1273 struct btrfs_key key = {
1274 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1275 .type = BTRFS_ROOT_ITEM_KEY,
1276 .offset = 0,
1277 };
1278 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1279 int ret;
1280
1281 trans = btrfs_start_transaction(tree_root, 0);
1282 if (IS_ERR(trans))
1283 return PTR_ERR(trans);
1284
1285 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1286 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1287
1288 ret = clear_free_space_tree(trans, free_space_root);
1289 if (unlikely(ret)) {
1290 btrfs_abort_transaction(trans, ret);
1291 btrfs_end_transaction(trans);
1292 return ret;
1293 }
1294
1295 ret = btrfs_del_root(trans, &free_space_root->root_key);
1296 if (unlikely(ret)) {
1297 btrfs_abort_transaction(trans, ret);
1298 btrfs_end_transaction(trans);
1299 return ret;
1300 }
1301
1302 btrfs_global_root_delete(free_space_root);
1303
1304 spin_lock(&fs_info->trans_lock);
1305 list_del(&free_space_root->dirty_list);
1306 spin_unlock(&fs_info->trans_lock);
1307
1308 btrfs_tree_lock(free_space_root->node);
1309 btrfs_clear_buffer_dirty(trans, free_space_root->node);
1310 btrfs_tree_unlock(free_space_root->node);
1311 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1312 free_space_root->node, 0, 1);
1313 btrfs_put_root(free_space_root);
1314 if (unlikely(ret < 0)) {
1315 btrfs_abort_transaction(trans, ret);
1316 btrfs_end_transaction(trans);
1317 return ret;
1318 }
1319
1320 return btrfs_commit_transaction(trans);
1321 }
1322
btrfs_rebuild_free_space_tree(struct btrfs_fs_info * fs_info)1323 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1324 {
1325 struct btrfs_trans_handle *trans;
1326 struct btrfs_key key = {
1327 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1328 .type = BTRFS_ROOT_ITEM_KEY,
1329 .offset = 0,
1330 };
1331 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1332 struct rb_node *node;
1333 int ret;
1334
1335 trans = btrfs_start_transaction(free_space_root, 1);
1336 if (IS_ERR(trans))
1337 return PTR_ERR(trans);
1338
1339 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1340 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1341
1342 ret = clear_free_space_tree(trans, free_space_root);
1343 if (unlikely(ret)) {
1344 btrfs_abort_transaction(trans, ret);
1345 btrfs_end_transaction(trans);
1346 return ret;
1347 }
1348
1349 node = rb_first_cached(&fs_info->block_group_cache_tree);
1350 while (node) {
1351 struct btrfs_block_group *block_group;
1352
1353 block_group = rb_entry(node, struct btrfs_block_group,
1354 cache_node);
1355
1356 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
1357 &block_group->runtime_flags))
1358 goto next;
1359
1360 ret = populate_free_space_tree(trans, block_group);
1361 if (unlikely(ret)) {
1362 btrfs_abort_transaction(trans, ret);
1363 btrfs_end_transaction(trans);
1364 return ret;
1365 }
1366 next:
1367 if (btrfs_should_end_transaction(trans)) {
1368 btrfs_end_transaction(trans);
1369 trans = btrfs_start_transaction(free_space_root, 1);
1370 if (IS_ERR(trans))
1371 return PTR_ERR(trans);
1372 }
1373 node = rb_next(node);
1374 }
1375
1376 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1377 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1378 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1379
1380 ret = btrfs_commit_transaction(trans);
1381 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1382 return ret;
1383 }
1384
__add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1385 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1386 struct btrfs_block_group *block_group,
1387 struct btrfs_path *path)
1388 {
1389 bool own_path = false;
1390 int ret;
1391
1392 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
1393 &block_group->runtime_flags))
1394 return 0;
1395
1396 /*
1397 * While rebuilding the free space tree we may allocate new metadata
1398 * block groups while modifying the free space tree.
1399 *
1400 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
1401 * can use multiple transactions, every time btrfs_end_transaction() is
1402 * called at btrfs_rebuild_free_space_tree() we finish the creation of
1403 * new block groups by calling btrfs_create_pending_block_groups(), and
1404 * that in turn calls us, through btrfs_add_block_group_free_space(),
1405 * to add a free space info item and a free space extent item for the
1406 * block group.
1407 *
1408 * Then later btrfs_rebuild_free_space_tree() may find such new block
1409 * groups and processes them with populate_free_space_tree(), which can
1410 * fail with EEXIST since there are already items for the block group in
1411 * the free space tree. Notice that we say "may find" because a new
1412 * block group may be added to the block groups rbtree in a node before
1413 * or after the block group currently being processed by the rebuild
1414 * process. So signal the rebuild process to skip such new block groups
1415 * if it finds them.
1416 */
1417 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
1418
1419 if (!path) {
1420 path = btrfs_alloc_path();
1421 if (unlikely(!path)) {
1422 btrfs_abort_transaction(trans, -ENOMEM);
1423 return -ENOMEM;
1424 }
1425 own_path = true;
1426 }
1427
1428 ret = add_new_free_space_info(trans, block_group, path);
1429 if (unlikely(ret)) {
1430 btrfs_abort_transaction(trans, ret);
1431 goto out;
1432 }
1433
1434 ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
1435 block_group->start, block_group->length);
1436 if (ret)
1437 btrfs_abort_transaction(trans, ret);
1438
1439 out:
1440 if (own_path)
1441 btrfs_free_path(path);
1442
1443 return ret;
1444 }
1445
btrfs_add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1446 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
1447 struct btrfs_block_group *block_group)
1448 {
1449 int ret;
1450
1451 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1452 return 0;
1453
1454 mutex_lock(&block_group->free_space_lock);
1455 ret = __add_block_group_free_space(trans, block_group, NULL);
1456 mutex_unlock(&block_group->free_space_lock);
1457 return ret;
1458 }
1459
btrfs_remove_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1460 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
1461 struct btrfs_block_group *block_group)
1462 {
1463 struct btrfs_root *root = btrfs_free_space_root(block_group);
1464 BTRFS_PATH_AUTO_FREE(path);
1465 struct btrfs_key key, found_key;
1466 struct extent_buffer *leaf;
1467 u64 start, end;
1468 int done = 0, nr;
1469 int ret;
1470
1471 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1472 return 0;
1473
1474 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1475 /* We never added this block group to the free space tree. */
1476 return 0;
1477 }
1478
1479 path = btrfs_alloc_path();
1480 if (unlikely(!path)) {
1481 ret = -ENOMEM;
1482 btrfs_abort_transaction(trans, ret);
1483 return ret;
1484 }
1485
1486 start = block_group->start;
1487 end = btrfs_block_group_end(block_group);
1488
1489 key.objectid = end - 1;
1490 key.type = (u8)-1;
1491 key.offset = (u64)-1;
1492
1493 while (!done) {
1494 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1495 if (unlikely(ret)) {
1496 btrfs_abort_transaction(trans, ret);
1497 return ret;
1498 }
1499
1500 leaf = path->nodes[0];
1501 nr = 0;
1502 path->slots[0]++;
1503 while (path->slots[0] > 0) {
1504 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1505
1506 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1507 ASSERT(found_key.objectid == block_group->start);
1508 ASSERT(found_key.offset == block_group->length);
1509 done = 1;
1510 nr++;
1511 path->slots[0]--;
1512 break;
1513 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1514 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1515 ASSERT(found_key.objectid >= start);
1516 ASSERT(found_key.objectid < end);
1517 ASSERT(found_key.objectid + found_key.offset <= end);
1518 nr++;
1519 path->slots[0]--;
1520 } else {
1521 ASSERT(0);
1522 }
1523 }
1524
1525 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1526 if (unlikely(ret)) {
1527 btrfs_abort_transaction(trans, ret);
1528 return ret;
1529 }
1530 btrfs_release_path(path);
1531 }
1532
1533 return 0;
1534 }
1535
load_free_space_bitmaps(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1536 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1537 struct btrfs_path *path,
1538 u32 expected_extent_count)
1539 {
1540 struct btrfs_block_group *block_group = caching_ctl->block_group;
1541 struct btrfs_fs_info *fs_info = block_group->fs_info;
1542 struct btrfs_root *root;
1543 struct btrfs_key key;
1544 bool prev_bit_set = false;
1545 /* Initialize to silence GCC. */
1546 u64 extent_start = 0;
1547 const u64 end = btrfs_block_group_end(block_group);
1548 u64 offset;
1549 u64 total_found = 0;
1550 u32 extent_count = 0;
1551 int ret;
1552
1553 root = btrfs_free_space_root(block_group);
1554
1555 while (1) {
1556 ret = btrfs_next_item(root, path);
1557 if (ret < 0)
1558 return ret;
1559 if (ret)
1560 break;
1561
1562 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1563
1564 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1565 break;
1566
1567 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1568 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1569
1570 offset = key.objectid;
1571 while (offset < key.objectid + key.offset) {
1572 bool bit_set;
1573
1574 bit_set = btrfs_free_space_test_bit(block_group, path, offset);
1575 if (!prev_bit_set && bit_set) {
1576 extent_start = offset;
1577 } else if (prev_bit_set && !bit_set) {
1578 u64 space_added;
1579
1580 ret = btrfs_add_new_free_space(block_group,
1581 extent_start,
1582 offset,
1583 &space_added);
1584 if (ret)
1585 return ret;
1586 total_found += space_added;
1587 if (total_found > CACHING_CTL_WAKE_UP) {
1588 total_found = 0;
1589 wake_up(&caching_ctl->wait);
1590 }
1591 extent_count++;
1592 }
1593 prev_bit_set = bit_set;
1594 offset += fs_info->sectorsize;
1595 }
1596 }
1597 if (prev_bit_set) {
1598 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1599 if (ret)
1600 return ret;
1601 extent_count++;
1602 }
1603
1604 if (unlikely(extent_count != expected_extent_count)) {
1605 btrfs_err(fs_info,
1606 "incorrect extent count for %llu; counted %u, expected %u",
1607 block_group->start, extent_count,
1608 expected_extent_count);
1609 DEBUG_WARN();
1610 return -EIO;
1611 }
1612
1613 return 0;
1614 }
1615
load_free_space_extents(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1616 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1617 struct btrfs_path *path,
1618 u32 expected_extent_count)
1619 {
1620 struct btrfs_block_group *block_group = caching_ctl->block_group;
1621 struct btrfs_fs_info *fs_info = block_group->fs_info;
1622 struct btrfs_root *root;
1623 struct btrfs_key key;
1624 const u64 end = btrfs_block_group_end(block_group);
1625 u64 total_found = 0;
1626 u32 extent_count = 0;
1627 int ret;
1628
1629 root = btrfs_free_space_root(block_group);
1630
1631 while (1) {
1632 u64 space_added;
1633
1634 ret = btrfs_next_item(root, path);
1635 if (ret < 0)
1636 return ret;
1637 if (ret)
1638 break;
1639
1640 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1641
1642 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1643 break;
1644
1645 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1646 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1647
1648 ret = btrfs_add_new_free_space(block_group, key.objectid,
1649 key.objectid + key.offset,
1650 &space_added);
1651 if (ret)
1652 return ret;
1653 total_found += space_added;
1654 if (total_found > CACHING_CTL_WAKE_UP) {
1655 total_found = 0;
1656 wake_up(&caching_ctl->wait);
1657 }
1658 extent_count++;
1659 }
1660
1661 if (unlikely(extent_count != expected_extent_count)) {
1662 btrfs_err(fs_info,
1663 "incorrect extent count for %llu; counted %u, expected %u",
1664 block_group->start, extent_count,
1665 expected_extent_count);
1666 DEBUG_WARN();
1667 return -EIO;
1668 }
1669
1670 return 0;
1671 }
1672
btrfs_load_free_space_tree(struct btrfs_caching_control * caching_ctl)1673 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1674 {
1675 struct btrfs_block_group *block_group;
1676 struct btrfs_free_space_info *info;
1677 BTRFS_PATH_AUTO_FREE(path);
1678 u32 extent_count, flags;
1679
1680 block_group = caching_ctl->block_group;
1681
1682 path = btrfs_alloc_path();
1683 if (!path)
1684 return -ENOMEM;
1685
1686 /*
1687 * Just like caching_thread() doesn't want to deadlock on the extent
1688 * tree, we don't want to deadlock on the free space tree.
1689 */
1690 path->skip_locking = true;
1691 path->search_commit_root = true;
1692 path->reada = READA_FORWARD;
1693
1694 info = btrfs_search_free_space_info(NULL, block_group, path, 0);
1695 if (IS_ERR(info))
1696 return PTR_ERR(info);
1697
1698 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1699 flags = btrfs_free_space_flags(path->nodes[0], info);
1700
1701 /*
1702 * We left path pointing to the free space info item, so now
1703 * load_free_space_foo can just iterate through the free space tree from
1704 * there.
1705 */
1706 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1707 return load_free_space_bitmaps(caching_ctl, path, extent_count);
1708 else
1709 return load_free_space_extents(caching_ctl, path, extent_count);
1710 }
1711
delete_orphan_free_space_entries(struct btrfs_root * fst_root,struct btrfs_path * path,u64 first_bg_bytenr)1712 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root,
1713 struct btrfs_path *path,
1714 u64 first_bg_bytenr)
1715 {
1716 struct btrfs_trans_handle *trans;
1717 int ret;
1718
1719 trans = btrfs_start_transaction(fst_root, 1);
1720 if (IS_ERR(trans))
1721 return PTR_ERR(trans);
1722
1723 while (true) {
1724 struct btrfs_key key = { 0 };
1725 int i;
1726
1727 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1);
1728 if (ret < 0)
1729 break;
1730 ASSERT(ret > 0);
1731 ret = 0;
1732 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) {
1733 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1734 if (key.objectid >= first_bg_bytenr) {
1735 /*
1736 * Only break the for() loop and continue to
1737 * delete items.
1738 */
1739 break;
1740 }
1741 }
1742 /* No items to delete, finished. */
1743 if (i == 0)
1744 break;
1745
1746 ret = btrfs_del_items(trans, fst_root, path, 0, i);
1747 if (ret < 0)
1748 break;
1749 btrfs_release_path(path);
1750 }
1751 btrfs_release_path(path);
1752 btrfs_end_transaction(trans);
1753 if (ret == 0)
1754 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries");
1755 return ret;
1756 }
1757
1758 /* Remove any free space entry before the first block group. */
btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info * fs_info)1759 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info)
1760 {
1761 BTRFS_PATH_AUTO_RELEASE(path);
1762 struct btrfs_key key = {
1763 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1764 .type = BTRFS_ROOT_ITEM_KEY,
1765 .offset = 0,
1766 };
1767 struct btrfs_root *root;
1768 struct btrfs_block_group *bg;
1769 u64 first_bg_bytenr;
1770 int ret;
1771
1772 /*
1773 * Extent tree v2 has multiple global roots based on the block group.
1774 * This means we cannot easily grab the global free space tree and locate
1775 * orphan items. Furthermore this is still experimental, all users
1776 * should use the latest btrfs-progs anyway.
1777 */
1778 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
1779 return 0;
1780 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1781 return 0;
1782 root = btrfs_global_root(fs_info, &key);
1783 if (!root)
1784 return 0;
1785
1786 key.objectid = 0;
1787 key.type = 0;
1788 key.offset = 0;
1789
1790 bg = btrfs_lookup_first_block_group(fs_info, 0);
1791 if (unlikely(!bg)) {
1792 btrfs_err(fs_info, "no block group found");
1793 return -EUCLEAN;
1794 }
1795 first_bg_bytenr = bg->start;
1796 btrfs_put_block_group(bg);
1797
1798 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1799 if (ret < 0)
1800 return ret;
1801 /* There should not be an all-zero key in fst. */
1802 ASSERT(ret > 0);
1803
1804 /* Empty free space tree. */
1805 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
1806 return 0;
1807
1808 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1809 if (key.objectid >= first_bg_bytenr)
1810 return 0;
1811 btrfs_release_path(&path);
1812 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr);
1813 }
1814