1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2015 Facebook. All rights reserved.
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 struct btrfs_block_group *block_group,
22 struct btrfs_path *path);
23
btrfs_free_space_root(struct btrfs_block_group * block_group)24 static struct btrfs_root *btrfs_free_space_root(
25 struct btrfs_block_group *block_group)
26 {
27 struct btrfs_key key = {
28 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29 .type = BTRFS_ROOT_ITEM_KEY,
30 .offset = 0,
31 };
32
33 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34 key.offset = block_group->global_root_id;
35 return btrfs_global_root(block_group->fs_info, &key);
36 }
37
btrfs_set_free_space_tree_thresholds(struct btrfs_block_group * cache)38 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39 {
40 u32 bitmap_range;
41 size_t bitmap_size;
42 u64 num_bitmaps, total_bitmap_size;
43
44 if (WARN_ON(cache->length == 0))
45 btrfs_warn(cache->fs_info, "block group %llu length is zero",
46 cache->start);
47
48 /*
49 * We convert to bitmaps when the disk space required for using extents
50 * exceeds that required for using bitmaps.
51 */
52 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55 total_bitmap_size = num_bitmaps * bitmap_size;
56 cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57 sizeof(struct btrfs_item));
58
59 /*
60 * We allow for a small buffer between the high threshold and low
61 * threshold to avoid thrashing back and forth between the two formats.
62 */
63 if (cache->bitmap_high_thresh > 100)
64 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65 else
66 cache->bitmap_low_thresh = 0;
67 }
68
add_new_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)69 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70 struct btrfs_block_group *block_group,
71 struct btrfs_path *path)
72 {
73 struct btrfs_root *root = btrfs_free_space_root(block_group);
74 struct btrfs_free_space_info *info;
75 struct btrfs_key key;
76 struct extent_buffer *leaf;
77 int ret;
78
79 key.objectid = block_group->start;
80 key.type = BTRFS_FREE_SPACE_INFO_KEY;
81 key.offset = block_group->length;
82
83 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84 if (ret)
85 return ret;
86
87 leaf = path->nodes[0];
88 info = btrfs_item_ptr(leaf, path->slots[0],
89 struct btrfs_free_space_info);
90 btrfs_set_free_space_extent_count(leaf, info, 0);
91 btrfs_set_free_space_flags(leaf, info, 0);
92 btrfs_release_path(path);
93 return 0;
94 }
95
96 EXPORT_FOR_TESTS
btrfs_search_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int cow)97 struct btrfs_free_space_info *btrfs_search_free_space_info(
98 struct btrfs_trans_handle *trans,
99 struct btrfs_block_group *block_group,
100 struct btrfs_path *path, int cow)
101 {
102 struct btrfs_fs_info *fs_info = block_group->fs_info;
103 struct btrfs_root *root = btrfs_free_space_root(block_group);
104 struct btrfs_key key;
105 int ret;
106
107 key.objectid = block_group->start;
108 key.type = BTRFS_FREE_SPACE_INFO_KEY;
109 key.offset = block_group->length;
110
111 ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
112 if (ret < 0)
113 return ERR_PTR(ret);
114 if (ret != 0) {
115 btrfs_warn(fs_info, "missing free space info for %llu",
116 block_group->start);
117 DEBUG_WARN();
118 return ERR_PTR(-ENOENT);
119 }
120
121 return btrfs_item_ptr(path->nodes[0], path->slots[0],
122 struct btrfs_free_space_info);
123 }
124
125 /*
126 * btrfs_search_slot() but we're looking for the greatest key less than the
127 * passed key.
128 */
btrfs_search_prev_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
130 struct btrfs_root *root,
131 struct btrfs_key *key, struct btrfs_path *p,
132 int ins_len, int cow)
133 {
134 int ret;
135
136 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
137 if (ret < 0)
138 return ret;
139
140 if (unlikely(ret == 0)) {
141 DEBUG_WARN();
142 return -EIO;
143 }
144
145 if (unlikely(p->slots[0] == 0)) {
146 DEBUG_WARN("no previous slot found");
147 return -EIO;
148 }
149 p->slots[0]--;
150
151 return 0;
152 }
153
free_space_bitmap_size(const struct btrfs_fs_info * fs_info,u64 size)154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
155 u64 size)
156 {
157 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
158 }
159
alloc_bitmap(u32 bitmap_size)160 static unsigned long *alloc_bitmap(u32 bitmap_size)
161 {
162 unsigned long *ret;
163 unsigned int nofs_flag;
164 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
165
166 /*
167 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
168 * into the filesystem here. All callers hold a transaction handle
169 * open, so if a GFP_KERNEL allocation recurses into the filesystem
170 * and triggers a transaction commit, we would deadlock.
171 */
172 nofs_flag = memalloc_nofs_save();
173 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
174 memalloc_nofs_restore(nofs_flag);
175 return ret;
176 }
177
le_bitmap_set(unsigned long * map,unsigned int start,int len)178 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
179 {
180 u8 *p = ((u8 *)map) + BIT_BYTE(start);
181 const unsigned int size = start + len;
182 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
183 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
184
185 while (len - bits_to_set >= 0) {
186 *p |= mask_to_set;
187 len -= bits_to_set;
188 bits_to_set = BITS_PER_BYTE;
189 mask_to_set = ~0;
190 p++;
191 }
192 if (len) {
193 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
194 *p |= mask_to_set;
195 }
196 }
197
198 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)199 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
200 struct btrfs_block_group *block_group,
201 struct btrfs_path *path)
202 {
203 struct btrfs_fs_info *fs_info = trans->fs_info;
204 struct btrfs_root *root = btrfs_free_space_root(block_group);
205 struct btrfs_free_space_info *info;
206 struct btrfs_key key, found_key;
207 struct extent_buffer *leaf;
208 unsigned long *bitmap;
209 char *bitmap_cursor;
210 u64 start, end;
211 u64 bitmap_range, i;
212 u32 bitmap_size, flags, expected_extent_count;
213 u32 extent_count = 0;
214 int done = 0, nr;
215 int ret;
216
217 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
218 bitmap = alloc_bitmap(bitmap_size);
219 if (unlikely(!bitmap))
220 return 0;
221
222 start = block_group->start;
223 end = block_group->start + block_group->length;
224
225 key.objectid = end - 1;
226 key.type = (u8)-1;
227 key.offset = (u64)-1;
228
229 while (!done) {
230 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
231 if (unlikely(ret)) {
232 btrfs_abort_transaction(trans, ret);
233 goto out;
234 }
235
236 leaf = path->nodes[0];
237 nr = 0;
238 path->slots[0]++;
239 while (path->slots[0] > 0) {
240 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
241
242 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
243 ASSERT(found_key.objectid == block_group->start);
244 ASSERT(found_key.offset == block_group->length);
245 done = 1;
246 break;
247 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
248 u64 first, last;
249
250 ASSERT(found_key.objectid >= start);
251 ASSERT(found_key.objectid < end);
252 ASSERT(found_key.objectid + found_key.offset <= end);
253
254 first = div_u64(found_key.objectid - start,
255 fs_info->sectorsize);
256 last = div_u64(found_key.objectid + found_key.offset - start,
257 fs_info->sectorsize);
258 le_bitmap_set(bitmap, first, last - first);
259
260 extent_count++;
261 nr++;
262 path->slots[0]--;
263 } else {
264 ASSERT(0);
265 }
266 }
267
268 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
269 if (unlikely(ret)) {
270 btrfs_abort_transaction(trans, ret);
271 goto out;
272 }
273 btrfs_release_path(path);
274 }
275
276 info = btrfs_search_free_space_info(trans, block_group, path, 1);
277 if (IS_ERR(info)) {
278 ret = PTR_ERR(info);
279 btrfs_abort_transaction(trans, ret);
280 goto out;
281 }
282 leaf = path->nodes[0];
283 flags = btrfs_free_space_flags(leaf, info);
284 flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
285 block_group->using_free_space_bitmaps = true;
286 block_group->using_free_space_bitmaps_cached = true;
287 btrfs_set_free_space_flags(leaf, info, flags);
288 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
289 btrfs_release_path(path);
290
291 if (unlikely(extent_count != expected_extent_count)) {
292 btrfs_err(fs_info,
293 "incorrect extent count for %llu; counted %u, expected %u",
294 block_group->start, extent_count,
295 expected_extent_count);
296 ret = -EIO;
297 btrfs_abort_transaction(trans, ret);
298 goto out;
299 }
300
301 bitmap_cursor = (char *)bitmap;
302 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
303 i = start;
304 while (i < end) {
305 unsigned long ptr;
306 u64 extent_size;
307 u32 data_size;
308
309 extent_size = min(end - i, bitmap_range);
310 data_size = free_space_bitmap_size(fs_info, extent_size);
311
312 key.objectid = i;
313 key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
314 key.offset = extent_size;
315
316 ret = btrfs_insert_empty_item(trans, root, path, &key,
317 data_size);
318 if (unlikely(ret)) {
319 btrfs_abort_transaction(trans, ret);
320 goto out;
321 }
322
323 leaf = path->nodes[0];
324 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
325 write_extent_buffer(leaf, bitmap_cursor, ptr,
326 data_size);
327 btrfs_release_path(path);
328
329 i += extent_size;
330 bitmap_cursor += data_size;
331 }
332
333 ret = 0;
334 out:
335 kvfree(bitmap);
336 return ret;
337 }
338
339 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_extents(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)340 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
341 struct btrfs_block_group *block_group,
342 struct btrfs_path *path)
343 {
344 struct btrfs_fs_info *fs_info = trans->fs_info;
345 struct btrfs_root *root = btrfs_free_space_root(block_group);
346 struct btrfs_free_space_info *info;
347 struct btrfs_key key, found_key;
348 struct extent_buffer *leaf;
349 unsigned long *bitmap;
350 u64 start, end;
351 u32 bitmap_size, flags, expected_extent_count;
352 unsigned long nrbits, start_bit, end_bit;
353 u32 extent_count = 0;
354 int done = 0, nr;
355 int ret;
356
357 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
358 bitmap = alloc_bitmap(bitmap_size);
359 if (unlikely(!bitmap))
360 return 0;
361
362 start = block_group->start;
363 end = block_group->start + block_group->length;
364
365 key.objectid = end - 1;
366 key.type = (u8)-1;
367 key.offset = (u64)-1;
368
369 while (!done) {
370 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
371 if (unlikely(ret)) {
372 btrfs_abort_transaction(trans, ret);
373 goto out;
374 }
375
376 leaf = path->nodes[0];
377 nr = 0;
378 path->slots[0]++;
379 while (path->slots[0] > 0) {
380 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
381
382 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
383 ASSERT(found_key.objectid == block_group->start);
384 ASSERT(found_key.offset == block_group->length);
385 done = 1;
386 break;
387 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
388 unsigned long ptr;
389 char *bitmap_cursor;
390 u32 bitmap_pos, data_size;
391
392 ASSERT(found_key.objectid >= start);
393 ASSERT(found_key.objectid < end);
394 ASSERT(found_key.objectid + found_key.offset <= end);
395
396 bitmap_pos = div_u64(found_key.objectid - start,
397 fs_info->sectorsize *
398 BITS_PER_BYTE);
399 bitmap_cursor = ((char *)bitmap) + bitmap_pos;
400 data_size = free_space_bitmap_size(fs_info,
401 found_key.offset);
402
403 path->slots[0]--;
404 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
405 read_extent_buffer(leaf, bitmap_cursor, ptr,
406 data_size);
407
408 nr++;
409 } else {
410 ASSERT(0);
411 }
412 }
413
414 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
415 if (unlikely(ret)) {
416 btrfs_abort_transaction(trans, ret);
417 goto out;
418 }
419 btrfs_release_path(path);
420 }
421
422 info = btrfs_search_free_space_info(trans, block_group, path, 1);
423 if (IS_ERR(info)) {
424 ret = PTR_ERR(info);
425 btrfs_abort_transaction(trans, ret);
426 goto out;
427 }
428 leaf = path->nodes[0];
429 flags = btrfs_free_space_flags(leaf, info);
430 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
431 block_group->using_free_space_bitmaps = false;
432 block_group->using_free_space_bitmaps_cached = true;
433 btrfs_set_free_space_flags(leaf, info, flags);
434 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
435 btrfs_release_path(path);
436
437 nrbits = block_group->length >> fs_info->sectorsize_bits;
438 start_bit = find_next_bit_le(bitmap, nrbits, 0);
439
440 while (start_bit < nrbits) {
441 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
442 ASSERT(start_bit < end_bit);
443
444 key.objectid = start + start_bit * fs_info->sectorsize;
445 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
446 key.offset = (end_bit - start_bit) * fs_info->sectorsize;
447
448 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
449 if (unlikely(ret)) {
450 btrfs_abort_transaction(trans, ret);
451 goto out;
452 }
453 btrfs_release_path(path);
454
455 extent_count++;
456
457 start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
458 }
459
460 if (unlikely(extent_count != expected_extent_count)) {
461 btrfs_err(fs_info,
462 "incorrect extent count for %llu; counted %u, expected %u",
463 block_group->start, extent_count,
464 expected_extent_count);
465 ret = -EIO;
466 btrfs_abort_transaction(trans, ret);
467 goto out;
468 }
469
470 ret = 0;
471 out:
472 kvfree(bitmap);
473 return ret;
474 }
475
update_free_space_extent_count(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int new_extents)476 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
477 struct btrfs_block_group *block_group,
478 struct btrfs_path *path,
479 int new_extents)
480 {
481 struct btrfs_free_space_info *info;
482 u32 flags;
483 u32 extent_count;
484 int ret = 0;
485
486 if (new_extents == 0)
487 return 0;
488
489 info = btrfs_search_free_space_info(trans, block_group, path, 1);
490 if (IS_ERR(info))
491 return PTR_ERR(info);
492
493 flags = btrfs_free_space_flags(path->nodes[0], info);
494 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
495
496 extent_count += new_extents;
497 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
498 btrfs_release_path(path);
499
500 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
501 extent_count > block_group->bitmap_high_thresh) {
502 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
503 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
504 extent_count < block_group->bitmap_low_thresh) {
505 ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
506 }
507
508 return ret;
509 }
510
511 EXPORT_FOR_TESTS
btrfs_free_space_test_bit(struct btrfs_block_group * block_group,struct btrfs_path * path,u64 offset)512 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
513 struct btrfs_path *path, u64 offset)
514 {
515 struct extent_buffer *leaf;
516 struct btrfs_key key;
517 u64 found_start, found_end;
518 unsigned long ptr, i;
519
520 leaf = path->nodes[0];
521 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
522 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
523
524 found_start = key.objectid;
525 found_end = key.objectid + key.offset;
526 ASSERT(offset >= found_start && offset < found_end);
527
528 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
529 i = div_u64(offset - found_start,
530 block_group->fs_info->sectorsize);
531 return extent_buffer_test_bit(leaf, ptr, i);
532 }
533
free_space_modify_bits(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 * start,u64 * size,bool set_bits)534 static void free_space_modify_bits(struct btrfs_trans_handle *trans,
535 struct btrfs_block_group *block_group,
536 struct btrfs_path *path, u64 *start, u64 *size,
537 bool set_bits)
538 {
539 struct btrfs_fs_info *fs_info = block_group->fs_info;
540 struct extent_buffer *leaf;
541 struct btrfs_key key;
542 u64 end = *start + *size;
543 u64 found_start, found_end;
544 unsigned long ptr, first, last;
545
546 leaf = path->nodes[0];
547 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
548 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
549
550 found_start = key.objectid;
551 found_end = key.objectid + key.offset;
552 ASSERT(*start >= found_start && *start < found_end);
553 ASSERT(end > found_start);
554
555 if (end > found_end)
556 end = found_end;
557
558 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
559 first = (*start - found_start) >> fs_info->sectorsize_bits;
560 last = (end - found_start) >> fs_info->sectorsize_bits;
561 if (set_bits)
562 extent_buffer_bitmap_set(leaf, ptr, first, last - first);
563 else
564 extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
565 btrfs_mark_buffer_dirty(trans, leaf);
566
567 *size -= end - *start;
568 *start = end;
569 }
570
571 /*
572 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
573 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
574 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
575 * looking for.
576 */
free_space_next_bitmap(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * p)577 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
578 struct btrfs_root *root, struct btrfs_path *p)
579 {
580 struct btrfs_key key;
581
582 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
583 p->slots[0]++;
584 return 0;
585 }
586
587 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
588 btrfs_release_path(p);
589
590 key.objectid += key.offset;
591 key.type = (u8)-1;
592 key.offset = (u64)-1;
593
594 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
595 }
596
597 /*
598 * If remove is 1, then we are removing free space, thus clearing bits in the
599 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
600 * the bitmap.
601 */
modify_free_space_bitmap(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size,bool remove)602 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
603 struct btrfs_block_group *block_group,
604 struct btrfs_path *path,
605 u64 start, u64 size, bool remove)
606 {
607 struct btrfs_root *root = btrfs_free_space_root(block_group);
608 struct btrfs_key key;
609 u64 end = start + size;
610 u64 cur_start, cur_size;
611 bool prev_bit_set = false;
612 bool next_bit_set = false;
613 int new_extents;
614 int ret;
615
616 /*
617 * Read the bit for the block immediately before the extent of space if
618 * that block is within the block group.
619 */
620 if (start > block_group->start) {
621 u64 prev_block = start - block_group->fs_info->sectorsize;
622
623 key.objectid = prev_block;
624 key.type = (u8)-1;
625 key.offset = (u64)-1;
626
627 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
628 if (ret)
629 return ret;
630
631 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
632
633 /* The previous block may have been in the previous bitmap. */
634 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
635 if (start >= key.objectid + key.offset) {
636 ret = free_space_next_bitmap(trans, root, path);
637 if (ret)
638 return ret;
639 }
640 } else {
641 key.objectid = start;
642 key.type = (u8)-1;
643 key.offset = (u64)-1;
644
645 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
646 if (ret)
647 return ret;
648 }
649
650 /*
651 * Iterate over all of the bitmaps overlapped by the extent of space,
652 * clearing/setting bits as required.
653 */
654 cur_start = start;
655 cur_size = size;
656 while (1) {
657 free_space_modify_bits(trans, block_group, path, &cur_start,
658 &cur_size, !remove);
659 if (cur_size == 0)
660 break;
661 ret = free_space_next_bitmap(trans, root, path);
662 if (ret)
663 return ret;
664 }
665
666 /*
667 * Read the bit for the block immediately after the extent of space if
668 * that block is within the block group.
669 */
670 if (end < block_group->start + block_group->length) {
671 /* The next block may be in the next bitmap. */
672 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
673 if (end >= key.objectid + key.offset) {
674 ret = free_space_next_bitmap(trans, root, path);
675 if (ret)
676 return ret;
677 }
678
679 next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
680 }
681
682 if (remove) {
683 new_extents = -1;
684 if (prev_bit_set) {
685 /* Leftover on the left. */
686 new_extents++;
687 }
688 if (next_bit_set) {
689 /* Leftover on the right. */
690 new_extents++;
691 }
692 } else {
693 new_extents = 1;
694 if (prev_bit_set) {
695 /* Merging with neighbor on the left. */
696 new_extents--;
697 }
698 if (next_bit_set) {
699 /* Merging with neighbor on the right. */
700 new_extents--;
701 }
702 }
703
704 btrfs_release_path(path);
705 return update_free_space_extent_count(trans, block_group, path, new_extents);
706 }
707
remove_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)708 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
709 struct btrfs_block_group *block_group,
710 struct btrfs_path *path,
711 u64 start, u64 size)
712 {
713 struct btrfs_root *root = btrfs_free_space_root(block_group);
714 struct btrfs_key key;
715 u64 found_start, found_end;
716 u64 end = start + size;
717 int new_extents = -1;
718 int ret;
719
720 key.objectid = start;
721 key.type = (u8)-1;
722 key.offset = (u64)-1;
723
724 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
725 if (ret)
726 return ret;
727
728 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
729
730 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
731
732 found_start = key.objectid;
733 found_end = key.objectid + key.offset;
734 ASSERT(start >= found_start && end <= found_end);
735
736 /*
737 * Okay, now that we've found the free space extent which contains the
738 * free space that we are removing, there are four cases:
739 *
740 * 1. We're using the whole extent: delete the key we found and
741 * decrement the free space extent count.
742 * 2. We are using part of the extent starting at the beginning: delete
743 * the key we found and insert a new key representing the leftover at
744 * the end. There is no net change in the number of extents.
745 * 3. We are using part of the extent ending at the end: delete the key
746 * we found and insert a new key representing the leftover at the
747 * beginning. There is no net change in the number of extents.
748 * 4. We are using part of the extent in the middle: delete the key we
749 * found and insert two new keys representing the leftovers on each
750 * side. Where we used to have one extent, we now have two, so increment
751 * the extent count. We may need to convert the block group to bitmaps
752 * as a result.
753 */
754
755 /* Delete the existing key (cases 1-4). */
756 ret = btrfs_del_item(trans, root, path);
757 if (ret)
758 return ret;
759
760 /* Add a key for leftovers at the beginning (cases 3 and 4). */
761 if (start > found_start) {
762 key.objectid = found_start;
763 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
764 key.offset = start - found_start;
765
766 btrfs_release_path(path);
767 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
768 if (ret)
769 return ret;
770 new_extents++;
771 }
772
773 /* Add a key for leftovers at the end (cases 2 and 4). */
774 if (end < found_end) {
775 key.objectid = end;
776 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
777 key.offset = found_end - end;
778
779 btrfs_release_path(path);
780 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
781 if (ret)
782 return ret;
783 new_extents++;
784 }
785
786 btrfs_release_path(path);
787 return update_free_space_extent_count(trans, block_group, path, new_extents);
788 }
789
using_bitmaps(struct btrfs_block_group * bg,struct btrfs_path * path)790 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
791 {
792 struct btrfs_free_space_info *info;
793 u32 flags;
794
795 if (bg->using_free_space_bitmaps_cached)
796 return bg->using_free_space_bitmaps;
797
798 info = btrfs_search_free_space_info(NULL, bg, path, 0);
799 if (IS_ERR(info))
800 return PTR_ERR(info);
801 flags = btrfs_free_space_flags(path->nodes[0], info);
802 btrfs_release_path(path);
803
804 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
805 bg->using_free_space_bitmaps_cached = true;
806
807 return bg->using_free_space_bitmaps;
808 }
809
810 EXPORT_FOR_TESTS
__btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)811 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
812 struct btrfs_block_group *block_group,
813 struct btrfs_path *path, u64 start, u64 size)
814 {
815 int ret;
816
817 ret = __add_block_group_free_space(trans, block_group, path);
818 if (ret)
819 return ret;
820
821 ret = using_bitmaps(block_group, path);
822 if (ret < 0)
823 return ret;
824
825 if (ret)
826 return modify_free_space_bitmap(trans, block_group, path,
827 start, size, true);
828
829 return remove_free_space_extent(trans, block_group, path, start, size);
830 }
831
btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)832 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
833 u64 start, u64 size)
834 {
835 struct btrfs_block_group *block_group;
836 BTRFS_PATH_AUTO_FREE(path);
837 int ret;
838
839 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
840 return 0;
841
842 path = btrfs_alloc_path();
843 if (unlikely(!path)) {
844 ret = -ENOMEM;
845 btrfs_abort_transaction(trans, ret);
846 return ret;
847 }
848
849 block_group = btrfs_lookup_block_group(trans->fs_info, start);
850 if (unlikely(!block_group)) {
851 DEBUG_WARN("no block group found for start=%llu", start);
852 ret = -ENOENT;
853 btrfs_abort_transaction(trans, ret);
854 return ret;
855 }
856
857 mutex_lock(&block_group->free_space_lock);
858 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
859 mutex_unlock(&block_group->free_space_lock);
860 if (ret)
861 btrfs_abort_transaction(trans, ret);
862
863 btrfs_put_block_group(block_group);
864
865 return ret;
866 }
867
add_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)868 static int add_free_space_extent(struct btrfs_trans_handle *trans,
869 struct btrfs_block_group *block_group,
870 struct btrfs_path *path,
871 u64 start, u64 size)
872 {
873 struct btrfs_root *root = btrfs_free_space_root(block_group);
874 struct btrfs_key key, new_key;
875 u64 found_start, found_end;
876 u64 end = start + size;
877 int new_extents = 1;
878 int ret;
879
880 /*
881 * We are adding a new extent of free space, but we need to merge
882 * extents. There are four cases here:
883 *
884 * 1. The new extent does not have any immediate neighbors to merge
885 * with: add the new key and increment the free space extent count. We
886 * may need to convert the block group to bitmaps as a result.
887 * 2. The new extent has an immediate neighbor before it: remove the
888 * previous key and insert a new key combining both of them. There is no
889 * net change in the number of extents.
890 * 3. The new extent has an immediate neighbor after it: remove the next
891 * key and insert a new key combining both of them. There is no net
892 * change in the number of extents.
893 * 4. The new extent has immediate neighbors on both sides: remove both
894 * of the keys and insert a new key combining all of them. Where we used
895 * to have two extents, we now have one, so decrement the extent count.
896 */
897
898 new_key.objectid = start;
899 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
900 new_key.offset = size;
901
902 /* Search for a neighbor on the left. */
903 if (start == block_group->start)
904 goto right;
905 key.objectid = start - 1;
906 key.type = (u8)-1;
907 key.offset = (u64)-1;
908
909 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
910 if (ret)
911 return ret;
912
913 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
914
915 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
916 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
917 btrfs_release_path(path);
918 goto right;
919 }
920
921 found_start = key.objectid;
922 found_end = key.objectid + key.offset;
923 ASSERT(found_start >= block_group->start &&
924 found_end > block_group->start);
925 ASSERT(found_start < start && found_end <= start);
926
927 /*
928 * Delete the neighbor on the left and absorb it into the new key (cases
929 * 2 and 4).
930 */
931 if (found_end == start) {
932 ret = btrfs_del_item(trans, root, path);
933 if (ret)
934 return ret;
935 new_key.objectid = found_start;
936 new_key.offset += key.offset;
937 new_extents--;
938 }
939 btrfs_release_path(path);
940
941 right:
942 /* Search for a neighbor on the right. */
943 if (end == block_group->start + block_group->length)
944 goto insert;
945 key.objectid = end;
946 key.type = (u8)-1;
947 key.offset = (u64)-1;
948
949 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
950 if (ret)
951 return ret;
952
953 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
954
955 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
956 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
957 btrfs_release_path(path);
958 goto insert;
959 }
960
961 found_start = key.objectid;
962 found_end = key.objectid + key.offset;
963 ASSERT(found_start >= block_group->start &&
964 found_end > block_group->start);
965 ASSERT((found_start < start && found_end <= start) ||
966 (found_start >= end && found_end > end));
967
968 /*
969 * Delete the neighbor on the right and absorb it into the new key
970 * (cases 3 and 4).
971 */
972 if (found_start == end) {
973 ret = btrfs_del_item(trans, root, path);
974 if (ret)
975 return ret;
976 new_key.offset += key.offset;
977 new_extents--;
978 }
979 btrfs_release_path(path);
980
981 insert:
982 /* Insert the new key (cases 1-4). */
983 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
984 if (ret)
985 return ret;
986
987 btrfs_release_path(path);
988 return update_free_space_extent_count(trans, block_group, path, new_extents);
989 }
990
991 EXPORT_FOR_TESTS
__btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)992 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
993 struct btrfs_block_group *block_group,
994 struct btrfs_path *path, u64 start, u64 size)
995 {
996 int ret;
997
998 ret = __add_block_group_free_space(trans, block_group, path);
999 if (ret)
1000 return ret;
1001
1002 ret = using_bitmaps(block_group, path);
1003 if (ret < 0)
1004 return ret;
1005
1006 if (ret)
1007 return modify_free_space_bitmap(trans, block_group, path,
1008 start, size, false);
1009
1010 return add_free_space_extent(trans, block_group, path, start, size);
1011 }
1012
btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)1013 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
1014 u64 start, u64 size)
1015 {
1016 struct btrfs_block_group *block_group;
1017 BTRFS_PATH_AUTO_FREE(path);
1018 int ret;
1019
1020 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1021 return 0;
1022
1023 path = btrfs_alloc_path();
1024 if (unlikely(!path)) {
1025 ret = -ENOMEM;
1026 btrfs_abort_transaction(trans, ret);
1027 return ret;
1028 }
1029
1030 block_group = btrfs_lookup_block_group(trans->fs_info, start);
1031 if (unlikely(!block_group)) {
1032 DEBUG_WARN("no block group found for start=%llu", start);
1033 ret = -ENOENT;
1034 btrfs_abort_transaction(trans, ret);
1035 return ret;
1036 }
1037
1038 mutex_lock(&block_group->free_space_lock);
1039 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
1040 mutex_unlock(&block_group->free_space_lock);
1041 if (ret)
1042 btrfs_abort_transaction(trans, ret);
1043
1044 btrfs_put_block_group(block_group);
1045
1046 return ret;
1047 }
1048
1049 /*
1050 * Populate the free space tree by walking the extent tree. Operations on the
1051 * extent tree that happen as a result of writes to the free space tree will go
1052 * through the normal add/remove hooks.
1053 */
populate_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1054 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1055 struct btrfs_block_group *block_group)
1056 {
1057 struct btrfs_root *extent_root;
1058 BTRFS_PATH_AUTO_FREE(path);
1059 BTRFS_PATH_AUTO_FREE(path2);
1060 struct btrfs_key key;
1061 u64 start, end;
1062 int ret;
1063
1064 path = btrfs_alloc_path();
1065 if (!path)
1066 return -ENOMEM;
1067
1068 path2 = btrfs_alloc_path();
1069 if (!path2)
1070 return -ENOMEM;
1071
1072 path->reada = READA_FORWARD;
1073
1074 ret = add_new_free_space_info(trans, block_group, path2);
1075 if (ret)
1076 return ret;
1077
1078 mutex_lock(&block_group->free_space_lock);
1079
1080 /*
1081 * Iterate through all of the extent and metadata items in this block
1082 * group, adding the free space between them and the free space at the
1083 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1084 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1085 * contained in.
1086 */
1087 key.objectid = block_group->start;
1088 key.type = BTRFS_EXTENT_ITEM_KEY;
1089 key.offset = 0;
1090
1091 extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1092 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1093 if (ret < 0)
1094 goto out_locked;
1095 /*
1096 * If ret is 1 (no key found), it means this is an empty block group,
1097 * without any extents allocated from it and there's no block group
1098 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
1099 * because we are using the block group tree feature (so block group
1100 * items are stored in the block group tree) or this is a new block
1101 * group created in the current transaction and its block group item
1102 * was not yet inserted in the extent tree (that happens in
1103 * btrfs_create_pending_block_groups() -> insert_block_group_item()).
1104 * It also means there are no extents allocated for block groups with a
1105 * start offset beyond this block group's end offset (this is the last,
1106 * highest, block group).
1107 */
1108 start = block_group->start;
1109 end = block_group->start + block_group->length;
1110 while (ret == 0) {
1111 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1112
1113 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1114 key.type == BTRFS_METADATA_ITEM_KEY) {
1115 if (key.objectid >= end)
1116 break;
1117
1118 if (start < key.objectid) {
1119 ret = __btrfs_add_to_free_space_tree(trans,
1120 block_group,
1121 path2, start,
1122 key.objectid -
1123 start);
1124 if (ret)
1125 goto out_locked;
1126 }
1127 start = key.objectid;
1128 if (key.type == BTRFS_METADATA_ITEM_KEY)
1129 start += trans->fs_info->nodesize;
1130 else
1131 start += key.offset;
1132 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1133 if (key.objectid != block_group->start)
1134 break;
1135 }
1136
1137 ret = btrfs_next_item(extent_root, path);
1138 if (ret < 0)
1139 goto out_locked;
1140 }
1141 if (start < end) {
1142 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
1143 start, end - start);
1144 if (ret)
1145 goto out_locked;
1146 }
1147
1148 ret = 0;
1149 out_locked:
1150 mutex_unlock(&block_group->free_space_lock);
1151
1152 return ret;
1153 }
1154
btrfs_create_free_space_tree(struct btrfs_fs_info * fs_info)1155 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1156 {
1157 struct btrfs_trans_handle *trans;
1158 struct btrfs_root *tree_root = fs_info->tree_root;
1159 struct btrfs_root *free_space_root;
1160 struct btrfs_block_group *block_group;
1161 struct rb_node *node;
1162 int ret;
1163
1164 trans = btrfs_start_transaction(tree_root, 0);
1165 if (IS_ERR(trans))
1166 return PTR_ERR(trans);
1167
1168 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1169 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1170 free_space_root = btrfs_create_tree(trans,
1171 BTRFS_FREE_SPACE_TREE_OBJECTID);
1172 if (IS_ERR(free_space_root)) {
1173 ret = PTR_ERR(free_space_root);
1174 btrfs_abort_transaction(trans, ret);
1175 btrfs_end_transaction(trans);
1176 goto out_clear;
1177 }
1178 ret = btrfs_global_root_insert(free_space_root);
1179 if (unlikely(ret)) {
1180 btrfs_put_root(free_space_root);
1181 btrfs_abort_transaction(trans, ret);
1182 btrfs_end_transaction(trans);
1183 goto out_clear;
1184 }
1185
1186 node = rb_first_cached(&fs_info->block_group_cache_tree);
1187 while (node) {
1188 block_group = rb_entry(node, struct btrfs_block_group,
1189 cache_node);
1190 ret = populate_free_space_tree(trans, block_group);
1191 if (unlikely(ret)) {
1192 btrfs_abort_transaction(trans, ret);
1193 btrfs_end_transaction(trans);
1194 goto out_clear;
1195 }
1196 node = rb_next(node);
1197 }
1198
1199 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1200 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1201 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1202 ret = btrfs_commit_transaction(trans);
1203
1204 /*
1205 * Now that we've committed the transaction any reading of our commit
1206 * root will be safe, so we can cache from the free space tree now.
1207 */
1208 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1209 return ret;
1210
1211 out_clear:
1212 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1213 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1214 return ret;
1215 }
1216
clear_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root)1217 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1218 struct btrfs_root *root)
1219 {
1220 BTRFS_PATH_AUTO_FREE(path);
1221 struct btrfs_key key;
1222 struct rb_node *node;
1223 int nr;
1224 int ret;
1225
1226 path = btrfs_alloc_path();
1227 if (!path)
1228 return -ENOMEM;
1229
1230 key.objectid = 0;
1231 key.type = 0;
1232 key.offset = 0;
1233
1234 while (1) {
1235 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1236 if (ret < 0)
1237 return ret;
1238
1239 nr = btrfs_header_nritems(path->nodes[0]);
1240 if (!nr)
1241 break;
1242
1243 path->slots[0] = 0;
1244 ret = btrfs_del_items(trans, root, path, 0, nr);
1245 if (ret)
1246 return ret;
1247
1248 btrfs_release_path(path);
1249 }
1250
1251 node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
1252 while (node) {
1253 struct btrfs_block_group *bg;
1254
1255 bg = rb_entry(node, struct btrfs_block_group, cache_node);
1256 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
1257 node = rb_next(node);
1258 cond_resched();
1259 }
1260
1261 return 0;
1262 }
1263
btrfs_delete_free_space_tree(struct btrfs_fs_info * fs_info)1264 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1265 {
1266 struct btrfs_trans_handle *trans;
1267 struct btrfs_root *tree_root = fs_info->tree_root;
1268 struct btrfs_key key = {
1269 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1270 .type = BTRFS_ROOT_ITEM_KEY,
1271 .offset = 0,
1272 };
1273 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1274 int ret;
1275
1276 trans = btrfs_start_transaction(tree_root, 0);
1277 if (IS_ERR(trans))
1278 return PTR_ERR(trans);
1279
1280 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1281 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1282
1283 ret = clear_free_space_tree(trans, free_space_root);
1284 if (unlikely(ret)) {
1285 btrfs_abort_transaction(trans, ret);
1286 btrfs_end_transaction(trans);
1287 return ret;
1288 }
1289
1290 ret = btrfs_del_root(trans, &free_space_root->root_key);
1291 if (unlikely(ret)) {
1292 btrfs_abort_transaction(trans, ret);
1293 btrfs_end_transaction(trans);
1294 return ret;
1295 }
1296
1297 btrfs_global_root_delete(free_space_root);
1298
1299 spin_lock(&fs_info->trans_lock);
1300 list_del(&free_space_root->dirty_list);
1301 spin_unlock(&fs_info->trans_lock);
1302
1303 btrfs_tree_lock(free_space_root->node);
1304 btrfs_clear_buffer_dirty(trans, free_space_root->node);
1305 btrfs_tree_unlock(free_space_root->node);
1306 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1307 free_space_root->node, 0, 1);
1308 btrfs_put_root(free_space_root);
1309 if (unlikely(ret < 0)) {
1310 btrfs_abort_transaction(trans, ret);
1311 btrfs_end_transaction(trans);
1312 return ret;
1313 }
1314
1315 return btrfs_commit_transaction(trans);
1316 }
1317
btrfs_rebuild_free_space_tree(struct btrfs_fs_info * fs_info)1318 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1319 {
1320 struct btrfs_trans_handle *trans;
1321 struct btrfs_key key = {
1322 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1323 .type = BTRFS_ROOT_ITEM_KEY,
1324 .offset = 0,
1325 };
1326 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1327 struct rb_node *node;
1328 int ret;
1329
1330 trans = btrfs_start_transaction(free_space_root, 1);
1331 if (IS_ERR(trans))
1332 return PTR_ERR(trans);
1333
1334 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1335 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1336
1337 ret = clear_free_space_tree(trans, free_space_root);
1338 if (unlikely(ret)) {
1339 btrfs_abort_transaction(trans, ret);
1340 btrfs_end_transaction(trans);
1341 return ret;
1342 }
1343
1344 node = rb_first_cached(&fs_info->block_group_cache_tree);
1345 while (node) {
1346 struct btrfs_block_group *block_group;
1347
1348 block_group = rb_entry(node, struct btrfs_block_group,
1349 cache_node);
1350
1351 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
1352 &block_group->runtime_flags))
1353 goto next;
1354
1355 ret = populate_free_space_tree(trans, block_group);
1356 if (unlikely(ret)) {
1357 btrfs_abort_transaction(trans, ret);
1358 btrfs_end_transaction(trans);
1359 return ret;
1360 }
1361 next:
1362 if (btrfs_should_end_transaction(trans)) {
1363 btrfs_end_transaction(trans);
1364 trans = btrfs_start_transaction(free_space_root, 1);
1365 if (IS_ERR(trans))
1366 return PTR_ERR(trans);
1367 }
1368 node = rb_next(node);
1369 }
1370
1371 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1372 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1373 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1374
1375 ret = btrfs_commit_transaction(trans);
1376 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1377 return ret;
1378 }
1379
__add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1380 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1381 struct btrfs_block_group *block_group,
1382 struct btrfs_path *path)
1383 {
1384 bool own_path = false;
1385 int ret;
1386
1387 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
1388 &block_group->runtime_flags))
1389 return 0;
1390
1391 /*
1392 * While rebuilding the free space tree we may allocate new metadata
1393 * block groups while modifying the free space tree.
1394 *
1395 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
1396 * can use multiple transactions, every time btrfs_end_transaction() is
1397 * called at btrfs_rebuild_free_space_tree() we finish the creation of
1398 * new block groups by calling btrfs_create_pending_block_groups(), and
1399 * that in turn calls us, through add_block_group_free_space(), to add
1400 * a free space info item and a free space extent item for the block
1401 * group.
1402 *
1403 * Then later btrfs_rebuild_free_space_tree() may find such new block
1404 * groups and processes them with populate_free_space_tree(), which can
1405 * fail with EEXIST since there are already items for the block group in
1406 * the free space tree. Notice that we say "may find" because a new
1407 * block group may be added to the block groups rbtree in a node before
1408 * or after the block group currently being processed by the rebuild
1409 * process. So signal the rebuild process to skip such new block groups
1410 * if it finds them.
1411 */
1412 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
1413
1414 if (!path) {
1415 path = btrfs_alloc_path();
1416 if (unlikely(!path)) {
1417 btrfs_abort_transaction(trans, -ENOMEM);
1418 return -ENOMEM;
1419 }
1420 own_path = true;
1421 }
1422
1423 ret = add_new_free_space_info(trans, block_group, path);
1424 if (unlikely(ret)) {
1425 btrfs_abort_transaction(trans, ret);
1426 goto out;
1427 }
1428
1429 ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
1430 block_group->start, block_group->length);
1431 if (ret)
1432 btrfs_abort_transaction(trans, ret);
1433
1434 out:
1435 if (own_path)
1436 btrfs_free_path(path);
1437
1438 return ret;
1439 }
1440
btrfs_add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1441 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
1442 struct btrfs_block_group *block_group)
1443 {
1444 int ret;
1445
1446 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1447 return 0;
1448
1449 mutex_lock(&block_group->free_space_lock);
1450 ret = __add_block_group_free_space(trans, block_group, NULL);
1451 mutex_unlock(&block_group->free_space_lock);
1452 return ret;
1453 }
1454
btrfs_remove_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1455 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
1456 struct btrfs_block_group *block_group)
1457 {
1458 struct btrfs_root *root = btrfs_free_space_root(block_group);
1459 BTRFS_PATH_AUTO_FREE(path);
1460 struct btrfs_key key, found_key;
1461 struct extent_buffer *leaf;
1462 u64 start, end;
1463 int done = 0, nr;
1464 int ret;
1465
1466 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1467 return 0;
1468
1469 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1470 /* We never added this block group to the free space tree. */
1471 return 0;
1472 }
1473
1474 path = btrfs_alloc_path();
1475 if (unlikely(!path)) {
1476 ret = -ENOMEM;
1477 btrfs_abort_transaction(trans, ret);
1478 return ret;
1479 }
1480
1481 start = block_group->start;
1482 end = block_group->start + block_group->length;
1483
1484 key.objectid = end - 1;
1485 key.type = (u8)-1;
1486 key.offset = (u64)-1;
1487
1488 while (!done) {
1489 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1490 if (unlikely(ret)) {
1491 btrfs_abort_transaction(trans, ret);
1492 return ret;
1493 }
1494
1495 leaf = path->nodes[0];
1496 nr = 0;
1497 path->slots[0]++;
1498 while (path->slots[0] > 0) {
1499 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1500
1501 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1502 ASSERT(found_key.objectid == block_group->start);
1503 ASSERT(found_key.offset == block_group->length);
1504 done = 1;
1505 nr++;
1506 path->slots[0]--;
1507 break;
1508 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1509 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1510 ASSERT(found_key.objectid >= start);
1511 ASSERT(found_key.objectid < end);
1512 ASSERT(found_key.objectid + found_key.offset <= end);
1513 nr++;
1514 path->slots[0]--;
1515 } else {
1516 ASSERT(0);
1517 }
1518 }
1519
1520 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1521 if (unlikely(ret)) {
1522 btrfs_abort_transaction(trans, ret);
1523 return ret;
1524 }
1525 btrfs_release_path(path);
1526 }
1527
1528 ret = 0;
1529
1530 return ret;
1531 }
1532
load_free_space_bitmaps(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1533 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1534 struct btrfs_path *path,
1535 u32 expected_extent_count)
1536 {
1537 struct btrfs_block_group *block_group;
1538 struct btrfs_fs_info *fs_info;
1539 struct btrfs_root *root;
1540 struct btrfs_key key;
1541 bool prev_bit_set = false;
1542 /* Initialize to silence GCC. */
1543 u64 extent_start = 0;
1544 u64 end, offset;
1545 u64 total_found = 0;
1546 u32 extent_count = 0;
1547 int ret;
1548
1549 block_group = caching_ctl->block_group;
1550 fs_info = block_group->fs_info;
1551 root = btrfs_free_space_root(block_group);
1552
1553 end = block_group->start + block_group->length;
1554
1555 while (1) {
1556 ret = btrfs_next_item(root, path);
1557 if (ret < 0)
1558 return ret;
1559 if (ret)
1560 break;
1561
1562 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1563
1564 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1565 break;
1566
1567 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1568 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1569
1570 offset = key.objectid;
1571 while (offset < key.objectid + key.offset) {
1572 bool bit_set;
1573
1574 bit_set = btrfs_free_space_test_bit(block_group, path, offset);
1575 if (!prev_bit_set && bit_set) {
1576 extent_start = offset;
1577 } else if (prev_bit_set && !bit_set) {
1578 u64 space_added;
1579
1580 ret = btrfs_add_new_free_space(block_group,
1581 extent_start,
1582 offset,
1583 &space_added);
1584 if (ret)
1585 return ret;
1586 total_found += space_added;
1587 if (total_found > CACHING_CTL_WAKE_UP) {
1588 total_found = 0;
1589 wake_up(&caching_ctl->wait);
1590 }
1591 extent_count++;
1592 }
1593 prev_bit_set = bit_set;
1594 offset += fs_info->sectorsize;
1595 }
1596 }
1597 if (prev_bit_set) {
1598 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1599 if (ret)
1600 return ret;
1601 extent_count++;
1602 }
1603
1604 if (unlikely(extent_count != expected_extent_count)) {
1605 btrfs_err(fs_info,
1606 "incorrect extent count for %llu; counted %u, expected %u",
1607 block_group->start, extent_count,
1608 expected_extent_count);
1609 DEBUG_WARN();
1610 return -EIO;
1611 }
1612
1613 return 0;
1614 }
1615
load_free_space_extents(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1616 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1617 struct btrfs_path *path,
1618 u32 expected_extent_count)
1619 {
1620 struct btrfs_block_group *block_group;
1621 struct btrfs_fs_info *fs_info;
1622 struct btrfs_root *root;
1623 struct btrfs_key key;
1624 u64 end;
1625 u64 total_found = 0;
1626 u32 extent_count = 0;
1627 int ret;
1628
1629 block_group = caching_ctl->block_group;
1630 fs_info = block_group->fs_info;
1631 root = btrfs_free_space_root(block_group);
1632
1633 end = block_group->start + block_group->length;
1634
1635 while (1) {
1636 u64 space_added;
1637
1638 ret = btrfs_next_item(root, path);
1639 if (ret < 0)
1640 return ret;
1641 if (ret)
1642 break;
1643
1644 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1645
1646 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1647 break;
1648
1649 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1650 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1651
1652 ret = btrfs_add_new_free_space(block_group, key.objectid,
1653 key.objectid + key.offset,
1654 &space_added);
1655 if (ret)
1656 return ret;
1657 total_found += space_added;
1658 if (total_found > CACHING_CTL_WAKE_UP) {
1659 total_found = 0;
1660 wake_up(&caching_ctl->wait);
1661 }
1662 extent_count++;
1663 }
1664
1665 if (unlikely(extent_count != expected_extent_count)) {
1666 btrfs_err(fs_info,
1667 "incorrect extent count for %llu; counted %u, expected %u",
1668 block_group->start, extent_count,
1669 expected_extent_count);
1670 DEBUG_WARN();
1671 return -EIO;
1672 }
1673
1674 return 0;
1675 }
1676
btrfs_load_free_space_tree(struct btrfs_caching_control * caching_ctl)1677 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1678 {
1679 struct btrfs_block_group *block_group;
1680 struct btrfs_free_space_info *info;
1681 BTRFS_PATH_AUTO_FREE(path);
1682 u32 extent_count, flags;
1683
1684 block_group = caching_ctl->block_group;
1685
1686 path = btrfs_alloc_path();
1687 if (!path)
1688 return -ENOMEM;
1689
1690 /*
1691 * Just like caching_thread() doesn't want to deadlock on the extent
1692 * tree, we don't want to deadlock on the free space tree.
1693 */
1694 path->skip_locking = true;
1695 path->search_commit_root = true;
1696 path->reada = READA_FORWARD;
1697
1698 info = btrfs_search_free_space_info(NULL, block_group, path, 0);
1699 if (IS_ERR(info))
1700 return PTR_ERR(info);
1701
1702 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1703 flags = btrfs_free_space_flags(path->nodes[0], info);
1704
1705 /*
1706 * We left path pointing to the free space info item, so now
1707 * load_free_space_foo can just iterate through the free space tree from
1708 * there.
1709 */
1710 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1711 return load_free_space_bitmaps(caching_ctl, path, extent_count);
1712 else
1713 return load_free_space_extents(caching_ctl, path, extent_count);
1714 }
1715