1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2015 Facebook. All rights reserved.
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
8 #include "messages.h"
9 #include "ctree.h"
10 #include "disk-io.h"
11 #include "locking.h"
12 #include "free-space-tree.h"
13 #include "transaction.h"
14 #include "block-group.h"
15 #include "fs.h"
16 #include "accessors.h"
17 #include "extent-tree.h"
18 #include "root-tree.h"
19
20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21 struct btrfs_block_group *block_group,
22 struct btrfs_path *path);
23
btrfs_free_space_root(struct btrfs_block_group * block_group)24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group)
25 {
26 struct btrfs_key key = {
27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
28 .type = BTRFS_ROOT_ITEM_KEY,
29 .offset = 0,
30 };
31
32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
33 key.offset = block_group->global_root_id;
34 return btrfs_global_root(block_group->fs_info, &key);
35 }
36
btrfs_set_free_space_tree_thresholds(struct btrfs_block_group * cache)37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache)
38 {
39 u32 bitmap_range;
40 size_t bitmap_size;
41 u64 num_bitmaps, total_bitmap_size;
42
43 if (WARN_ON(cache->length == 0))
44 btrfs_warn(cache->fs_info, "block group %llu length is zero",
45 cache->start);
46
47 /*
48 * We convert to bitmaps when the disk space required for using extents
49 * exceeds that required for using bitmaps.
50 */
51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
54 total_bitmap_size = num_bitmaps * bitmap_size;
55 cache->bitmap_high_thresh = div_u64(total_bitmap_size,
56 sizeof(struct btrfs_item));
57
58 /*
59 * We allow for a small buffer between the high threshold and low
60 * threshold to avoid thrashing back and forth between the two formats.
61 */
62 if (cache->bitmap_high_thresh > 100)
63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
64 else
65 cache->bitmap_low_thresh = 0;
66 }
67
add_new_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)68 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
69 struct btrfs_block_group *block_group,
70 struct btrfs_path *path)
71 {
72 struct btrfs_root *root = btrfs_free_space_root(block_group);
73 struct btrfs_free_space_info *info;
74 struct btrfs_key key;
75 struct extent_buffer *leaf;
76 int ret;
77
78 key.objectid = block_group->start;
79 key.type = BTRFS_FREE_SPACE_INFO_KEY;
80 key.offset = block_group->length;
81
82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
83 if (ret)
84 return ret;
85
86 leaf = path->nodes[0];
87 info = btrfs_item_ptr(leaf, path->slots[0],
88 struct btrfs_free_space_info);
89 btrfs_set_free_space_extent_count(leaf, info, 0);
90 btrfs_set_free_space_flags(leaf, info, 0);
91 btrfs_release_path(path);
92 return 0;
93 }
94
btrfs_search_free_space_info(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int cow)95 struct btrfs_free_space_info *btrfs_search_free_space_info(
96 struct btrfs_trans_handle *trans,
97 struct btrfs_block_group *block_group,
98 struct btrfs_path *path, int cow)
99 {
100 struct btrfs_fs_info *fs_info = block_group->fs_info;
101 struct btrfs_root *root = btrfs_free_space_root(block_group);
102 struct btrfs_key key;
103 int ret;
104
105 key.objectid = block_group->start;
106 key.type = BTRFS_FREE_SPACE_INFO_KEY;
107 key.offset = block_group->length;
108
109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
110 if (ret < 0)
111 return ERR_PTR(ret);
112 if (ret != 0) {
113 btrfs_warn(fs_info, "missing free space info for %llu",
114 block_group->start);
115 DEBUG_WARN();
116 return ERR_PTR(-ENOENT);
117 }
118
119 return btrfs_item_ptr(path->nodes[0], path->slots[0],
120 struct btrfs_free_space_info);
121 }
122
123 /*
124 * btrfs_search_slot() but we're looking for the greatest key less than the
125 * passed key.
126 */
btrfs_search_prev_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root,
129 struct btrfs_key *key, struct btrfs_path *p,
130 int ins_len, int cow)
131 {
132 int ret;
133
134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
135 if (ret < 0)
136 return ret;
137
138 if (unlikely(ret == 0)) {
139 DEBUG_WARN();
140 return -EIO;
141 }
142
143 if (unlikely(p->slots[0] == 0)) {
144 DEBUG_WARN("no previous slot found");
145 return -EIO;
146 }
147 p->slots[0]--;
148
149 return 0;
150 }
151
free_space_bitmap_size(const struct btrfs_fs_info * fs_info,u64 size)152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
153 u64 size)
154 {
155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
156 }
157
alloc_bitmap(u32 bitmap_size)158 static unsigned long *alloc_bitmap(u32 bitmap_size)
159 {
160 unsigned long *ret;
161 unsigned int nofs_flag;
162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
163
164 /*
165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
166 * into the filesystem here. All callers hold a transaction handle
167 * open, so if a GFP_KERNEL allocation recurses into the filesystem
168 * and triggers a transaction commit, we would deadlock.
169 */
170 nofs_flag = memalloc_nofs_save();
171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
172 memalloc_nofs_restore(nofs_flag);
173 return ret;
174 }
175
le_bitmap_set(unsigned long * map,unsigned int start,int len)176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
177 {
178 u8 *p = ((u8 *)map) + BIT_BYTE(start);
179 const unsigned int size = start + len;
180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
182
183 while (len - bits_to_set >= 0) {
184 *p |= mask_to_set;
185 len -= bits_to_set;
186 bits_to_set = BITS_PER_BYTE;
187 mask_to_set = ~0;
188 p++;
189 }
190 if (len) {
191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
192 *p |= mask_to_set;
193 }
194 }
195
196 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
198 struct btrfs_block_group *block_group,
199 struct btrfs_path *path)
200 {
201 struct btrfs_fs_info *fs_info = trans->fs_info;
202 struct btrfs_root *root = btrfs_free_space_root(block_group);
203 struct btrfs_free_space_info *info;
204 struct btrfs_key key, found_key;
205 struct extent_buffer *leaf;
206 unsigned long *bitmap;
207 char *bitmap_cursor;
208 u64 start, end;
209 u64 bitmap_range, i;
210 u32 bitmap_size, flags, expected_extent_count;
211 u32 extent_count = 0;
212 int done = 0, nr;
213 int ret;
214
215 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
216 bitmap = alloc_bitmap(bitmap_size);
217 if (unlikely(!bitmap))
218 return 0;
219
220 start = block_group->start;
221 end = btrfs_block_group_end(block_group);
222
223 key.objectid = end - 1;
224 key.type = (u8)-1;
225 key.offset = (u64)-1;
226
227 while (!done) {
228 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
229 if (unlikely(ret)) {
230 btrfs_abort_transaction(trans, ret);
231 goto out;
232 }
233
234 leaf = path->nodes[0];
235 nr = 0;
236 path->slots[0]++;
237 while (path->slots[0] > 0) {
238 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
239
240 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
241 ASSERT(found_key.objectid == block_group->start);
242 ASSERT(found_key.offset == block_group->length);
243 done = 1;
244 break;
245 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
246 u64 first, last;
247
248 ASSERT(found_key.objectid >= start);
249 ASSERT(found_key.objectid < end);
250 ASSERT(found_key.objectid + found_key.offset <= end);
251
252 first = div_u64(found_key.objectid - start,
253 fs_info->sectorsize);
254 last = div_u64(found_key.objectid + found_key.offset - start,
255 fs_info->sectorsize);
256 le_bitmap_set(bitmap, first, last - first);
257
258 extent_count++;
259 nr++;
260 path->slots[0]--;
261 } else {
262 btrfs_err(fs_info, "unexpected free space tree key type %u",
263 found_key.type);
264 ret = -EUCLEAN;
265 btrfs_abort_transaction(trans, ret);
266 goto out;
267 }
268 }
269
270 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
271 if (unlikely(ret)) {
272 btrfs_abort_transaction(trans, ret);
273 goto out;
274 }
275 btrfs_release_path(path);
276 }
277
278 info = btrfs_search_free_space_info(trans, block_group, path, 1);
279 if (IS_ERR(info)) {
280 ret = PTR_ERR(info);
281 btrfs_abort_transaction(trans, ret);
282 goto out;
283 }
284 leaf = path->nodes[0];
285 flags = btrfs_free_space_flags(leaf, info);
286 flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
287 block_group->using_free_space_bitmaps = true;
288 block_group->using_free_space_bitmaps_cached = true;
289 btrfs_set_free_space_flags(leaf, info, flags);
290 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
291 btrfs_release_path(path);
292
293 if (unlikely(extent_count != expected_extent_count)) {
294 btrfs_err(fs_info,
295 "incorrect extent count for %llu; counted %u, expected %u",
296 block_group->start, extent_count,
297 expected_extent_count);
298 ret = -EIO;
299 btrfs_abort_transaction(trans, ret);
300 goto out;
301 }
302
303 bitmap_cursor = (char *)bitmap;
304 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
305 i = start;
306 while (i < end) {
307 unsigned long ptr;
308 u64 extent_size;
309 u32 data_size;
310
311 extent_size = min(end - i, bitmap_range);
312 data_size = free_space_bitmap_size(fs_info, extent_size);
313
314 key.objectid = i;
315 key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
316 key.offset = extent_size;
317
318 ret = btrfs_insert_empty_item(trans, root, path, &key,
319 data_size);
320 if (unlikely(ret)) {
321 btrfs_abort_transaction(trans, ret);
322 goto out;
323 }
324
325 leaf = path->nodes[0];
326 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
327 write_extent_buffer(leaf, bitmap_cursor, ptr,
328 data_size);
329 btrfs_release_path(path);
330
331 i += extent_size;
332 bitmap_cursor += data_size;
333 }
334
335 ret = 0;
336 out:
337 kvfree(bitmap);
338 return ret;
339 }
340
341 EXPORT_FOR_TESTS
btrfs_convert_free_space_to_extents(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)342 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
343 struct btrfs_block_group *block_group,
344 struct btrfs_path *path)
345 {
346 struct btrfs_fs_info *fs_info = trans->fs_info;
347 struct btrfs_root *root = btrfs_free_space_root(block_group);
348 struct btrfs_free_space_info *info;
349 struct btrfs_key key, found_key;
350 struct extent_buffer *leaf;
351 unsigned long *bitmap;
352 u64 start, end;
353 u32 bitmap_size, flags, expected_extent_count;
354 unsigned long nrbits, start_bit, end_bit;
355 u32 extent_count = 0;
356 int done = 0, nr;
357 int ret;
358
359 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
360 bitmap = alloc_bitmap(bitmap_size);
361 if (unlikely(!bitmap))
362 return 0;
363
364 start = block_group->start;
365 end = btrfs_block_group_end(block_group);
366
367 key.objectid = end - 1;
368 key.type = (u8)-1;
369 key.offset = (u64)-1;
370
371 while (!done) {
372 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
373 if (unlikely(ret)) {
374 btrfs_abort_transaction(trans, ret);
375 goto out;
376 }
377
378 leaf = path->nodes[0];
379 nr = 0;
380 path->slots[0]++;
381 while (path->slots[0] > 0) {
382 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
383
384 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
385 ASSERT(found_key.objectid == block_group->start);
386 ASSERT(found_key.offset == block_group->length);
387 done = 1;
388 break;
389 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
390 unsigned long ptr;
391 char *bitmap_cursor;
392 u32 bitmap_pos, data_size;
393
394 ASSERT(found_key.objectid >= start);
395 ASSERT(found_key.objectid < end);
396 ASSERT(found_key.objectid + found_key.offset <= end);
397
398 bitmap_pos = div_u64(found_key.objectid - start,
399 fs_info->sectorsize *
400 BITS_PER_BYTE);
401 bitmap_cursor = ((char *)bitmap) + bitmap_pos;
402 data_size = free_space_bitmap_size(fs_info,
403 found_key.offset);
404
405 path->slots[0]--;
406 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
407 read_extent_buffer(leaf, bitmap_cursor, ptr,
408 data_size);
409
410 nr++;
411 } else {
412 btrfs_err(fs_info, "unexpected free space tree key type %u",
413 found_key.type);
414 ret = -EUCLEAN;
415 btrfs_abort_transaction(trans, ret);
416 goto out;
417 }
418 }
419
420 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
421 if (unlikely(ret)) {
422 btrfs_abort_transaction(trans, ret);
423 goto out;
424 }
425 btrfs_release_path(path);
426 }
427
428 info = btrfs_search_free_space_info(trans, block_group, path, 1);
429 if (IS_ERR(info)) {
430 ret = PTR_ERR(info);
431 btrfs_abort_transaction(trans, ret);
432 goto out;
433 }
434 leaf = path->nodes[0];
435 flags = btrfs_free_space_flags(leaf, info);
436 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
437 block_group->using_free_space_bitmaps = false;
438 block_group->using_free_space_bitmaps_cached = true;
439 btrfs_set_free_space_flags(leaf, info, flags);
440 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
441 btrfs_release_path(path);
442
443 nrbits = block_group->length >> fs_info->sectorsize_bits;
444 start_bit = find_next_bit_le(bitmap, nrbits, 0);
445
446 while (start_bit < nrbits) {
447 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
448 ASSERT(start_bit < end_bit);
449
450 key.objectid = start + start_bit * fs_info->sectorsize;
451 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
452 key.offset = (end_bit - start_bit) * fs_info->sectorsize;
453
454 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
455 if (unlikely(ret)) {
456 btrfs_abort_transaction(trans, ret);
457 goto out;
458 }
459 btrfs_release_path(path);
460
461 extent_count++;
462
463 start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
464 }
465
466 if (unlikely(extent_count != expected_extent_count)) {
467 btrfs_err(fs_info,
468 "incorrect extent count for %llu; counted %u, expected %u",
469 block_group->start, extent_count,
470 expected_extent_count);
471 ret = -EIO;
472 btrfs_abort_transaction(trans, ret);
473 goto out;
474 }
475
476 ret = 0;
477 out:
478 kvfree(bitmap);
479 return ret;
480 }
481
update_free_space_extent_count(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,int new_extents)482 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
483 struct btrfs_block_group *block_group,
484 struct btrfs_path *path,
485 int new_extents)
486 {
487 struct btrfs_free_space_info *info;
488 u32 flags;
489 u32 extent_count;
490 int ret = 0;
491
492 if (new_extents == 0)
493 return 0;
494
495 info = btrfs_search_free_space_info(trans, block_group, path, 1);
496 if (IS_ERR(info))
497 return PTR_ERR(info);
498
499 flags = btrfs_free_space_flags(path->nodes[0], info);
500 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
501
502 extent_count += new_extents;
503 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
504 btrfs_release_path(path);
505
506 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
507 extent_count > block_group->bitmap_high_thresh) {
508 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path);
509 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
510 extent_count < block_group->bitmap_low_thresh) {
511 ret = btrfs_convert_free_space_to_extents(trans, block_group, path);
512 }
513
514 return ret;
515 }
516
517 EXPORT_FOR_TESTS
btrfs_free_space_test_bit(struct btrfs_block_group * block_group,struct btrfs_path * path,u64 offset)518 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group,
519 struct btrfs_path *path, u64 offset)
520 {
521 struct extent_buffer *leaf;
522 struct btrfs_key key;
523 u64 found_start, found_end;
524 unsigned long ptr, i;
525
526 leaf = path->nodes[0];
527 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
528 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
529
530 found_start = key.objectid;
531 found_end = key.objectid + key.offset;
532 ASSERT(offset >= found_start && offset < found_end);
533
534 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
535 i = div_u64(offset - found_start,
536 block_group->fs_info->sectorsize);
537 return extent_buffer_test_bit(leaf, ptr, i);
538 }
539
free_space_modify_bits(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 * start,u64 * size,bool set_bits)540 static void free_space_modify_bits(struct btrfs_trans_handle *trans,
541 struct btrfs_block_group *block_group,
542 struct btrfs_path *path, u64 *start, u64 *size,
543 bool set_bits)
544 {
545 struct btrfs_fs_info *fs_info = block_group->fs_info;
546 struct extent_buffer *leaf;
547 struct btrfs_key key;
548 u64 end = *start + *size;
549 u64 found_start, found_end;
550 unsigned long ptr, first, last;
551
552 leaf = path->nodes[0];
553 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
554 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
555
556 found_start = key.objectid;
557 found_end = key.objectid + key.offset;
558 ASSERT(*start >= found_start && *start < found_end);
559 ASSERT(end > found_start);
560
561 if (end > found_end)
562 end = found_end;
563
564 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
565 first = (*start - found_start) >> fs_info->sectorsize_bits;
566 last = (end - found_start) >> fs_info->sectorsize_bits;
567 if (set_bits)
568 extent_buffer_bitmap_set(leaf, ptr, first, last - first);
569 else
570 extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
571 btrfs_mark_buffer_dirty(trans, leaf);
572
573 *size -= end - *start;
574 *start = end;
575 }
576
577 /*
578 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
579 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
580 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
581 * looking for.
582 */
free_space_next_bitmap(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * p)583 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
584 struct btrfs_root *root, struct btrfs_path *p)
585 {
586 struct btrfs_key key;
587
588 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
589 p->slots[0]++;
590 return 0;
591 }
592
593 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
594 btrfs_release_path(p);
595
596 key.objectid += key.offset;
597 key.type = (u8)-1;
598 key.offset = (u64)-1;
599
600 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
601 }
602
603 /*
604 * If remove is 1, then we are removing free space, thus clearing bits in the
605 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
606 * the bitmap.
607 */
modify_free_space_bitmap(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size,bool remove)608 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
609 struct btrfs_block_group *block_group,
610 struct btrfs_path *path,
611 u64 start, u64 size, bool remove)
612 {
613 struct btrfs_root *root = btrfs_free_space_root(block_group);
614 struct btrfs_key key;
615 u64 end = start + size;
616 u64 cur_start, cur_size;
617 bool prev_bit_set = false;
618 bool next_bit_set = false;
619 int new_extents;
620 int ret;
621
622 /*
623 * Read the bit for the block immediately before the extent of space if
624 * that block is within the block group.
625 */
626 if (start > block_group->start) {
627 u64 prev_block = start - block_group->fs_info->sectorsize;
628
629 key.objectid = prev_block;
630 key.type = (u8)-1;
631 key.offset = (u64)-1;
632
633 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
634 if (ret)
635 return ret;
636
637 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block);
638
639 /* The previous block may have been in the previous bitmap. */
640 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
641 if (start >= key.objectid + key.offset) {
642 ret = free_space_next_bitmap(trans, root, path);
643 if (ret)
644 return ret;
645 }
646 } else {
647 key.objectid = start;
648 key.type = (u8)-1;
649 key.offset = (u64)-1;
650
651 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
652 if (ret)
653 return ret;
654 }
655
656 /*
657 * Iterate over all of the bitmaps overlapped by the extent of space,
658 * clearing/setting bits as required.
659 */
660 cur_start = start;
661 cur_size = size;
662 while (1) {
663 free_space_modify_bits(trans, block_group, path, &cur_start,
664 &cur_size, !remove);
665 if (cur_size == 0)
666 break;
667 ret = free_space_next_bitmap(trans, root, path);
668 if (ret)
669 return ret;
670 }
671
672 /*
673 * Read the bit for the block immediately after the extent of space if
674 * that block is within the block group.
675 */
676 if (end < btrfs_block_group_end(block_group)) {
677 /* The next block may be in the next bitmap. */
678 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
679 if (end >= key.objectid + key.offset) {
680 ret = free_space_next_bitmap(trans, root, path);
681 if (ret)
682 return ret;
683 }
684
685 next_bit_set = btrfs_free_space_test_bit(block_group, path, end);
686 }
687
688 if (remove) {
689 new_extents = -1;
690 if (prev_bit_set) {
691 /* Leftover on the left. */
692 new_extents++;
693 }
694 if (next_bit_set) {
695 /* Leftover on the right. */
696 new_extents++;
697 }
698 } else {
699 new_extents = 1;
700 if (prev_bit_set) {
701 /* Merging with neighbor on the left. */
702 new_extents--;
703 }
704 if (next_bit_set) {
705 /* Merging with neighbor on the right. */
706 new_extents--;
707 }
708 }
709
710 btrfs_release_path(path);
711 return update_free_space_extent_count(trans, block_group, path, new_extents);
712 }
713
remove_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)714 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
715 struct btrfs_block_group *block_group,
716 struct btrfs_path *path,
717 u64 start, u64 size)
718 {
719 struct btrfs_root *root = btrfs_free_space_root(block_group);
720 struct btrfs_key key;
721 u64 found_start, found_end;
722 u64 end = start + size;
723 int new_extents = -1;
724 int ret;
725
726 key.objectid = start;
727 key.type = (u8)-1;
728 key.offset = (u64)-1;
729
730 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
731 if (ret)
732 return ret;
733
734 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
735
736 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
737
738 found_start = key.objectid;
739 found_end = key.objectid + key.offset;
740 ASSERT(start >= found_start && end <= found_end);
741
742 /*
743 * Okay, now that we've found the free space extent which contains the
744 * free space that we are removing, there are four cases:
745 *
746 * 1. We're using the whole extent: delete the key we found and
747 * decrement the free space extent count.
748 * 2. We are using part of the extent starting at the beginning: delete
749 * the key we found and insert a new key representing the leftover at
750 * the end. There is no net change in the number of extents.
751 * 3. We are using part of the extent ending at the end: delete the key
752 * we found and insert a new key representing the leftover at the
753 * beginning. There is no net change in the number of extents.
754 * 4. We are using part of the extent in the middle: delete the key we
755 * found and insert two new keys representing the leftovers on each
756 * side. Where we used to have one extent, we now have two, so increment
757 * the extent count. We may need to convert the block group to bitmaps
758 * as a result.
759 */
760
761 /* Delete the existing key (cases 1-4). */
762 ret = btrfs_del_item(trans, root, path);
763 if (ret)
764 return ret;
765
766 /* Add a key for leftovers at the beginning (cases 3 and 4). */
767 if (start > found_start) {
768 key.objectid = found_start;
769 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
770 key.offset = start - found_start;
771
772 btrfs_release_path(path);
773 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
774 if (ret)
775 return ret;
776 new_extents++;
777 }
778
779 /* Add a key for leftovers at the end (cases 2 and 4). */
780 if (end < found_end) {
781 key.objectid = end;
782 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
783 key.offset = found_end - end;
784
785 btrfs_release_path(path);
786 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
787 if (ret)
788 return ret;
789 new_extents++;
790 }
791
792 btrfs_release_path(path);
793 return update_free_space_extent_count(trans, block_group, path, new_extents);
794 }
795
using_bitmaps(struct btrfs_block_group * bg,struct btrfs_path * path)796 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path)
797 {
798 struct btrfs_free_space_info *info;
799 u32 flags;
800
801 if (bg->using_free_space_bitmaps_cached)
802 return bg->using_free_space_bitmaps;
803
804 info = btrfs_search_free_space_info(NULL, bg, path, 0);
805 if (IS_ERR(info))
806 return PTR_ERR(info);
807 flags = btrfs_free_space_flags(path->nodes[0], info);
808 btrfs_release_path(path);
809
810 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS);
811 bg->using_free_space_bitmaps_cached = true;
812
813 return bg->using_free_space_bitmaps;
814 }
815
816 EXPORT_FOR_TESTS
__btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)817 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
818 struct btrfs_block_group *block_group,
819 struct btrfs_path *path, u64 start, u64 size)
820 {
821 int ret;
822
823 ret = __add_block_group_free_space(trans, block_group, path);
824 if (ret)
825 return ret;
826
827 ret = using_bitmaps(block_group, path);
828 if (ret < 0)
829 return ret;
830
831 if (ret)
832 return modify_free_space_bitmap(trans, block_group, path,
833 start, size, true);
834
835 return remove_free_space_extent(trans, block_group, path, start, size);
836 }
837
btrfs_remove_from_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)838 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
839 u64 start, u64 size)
840 {
841 struct btrfs_block_group *block_group;
842 BTRFS_PATH_AUTO_FREE(path);
843 int ret;
844
845 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
846 return 0;
847
848 path = btrfs_alloc_path();
849 if (unlikely(!path)) {
850 ret = -ENOMEM;
851 btrfs_abort_transaction(trans, ret);
852 return ret;
853 }
854
855 block_group = btrfs_lookup_block_group(trans->fs_info, start);
856 if (unlikely(!block_group)) {
857 DEBUG_WARN("no block group found for start=%llu", start);
858 ret = -ENOENT;
859 btrfs_abort_transaction(trans, ret);
860 return ret;
861 }
862
863 mutex_lock(&block_group->free_space_lock);
864 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size);
865 mutex_unlock(&block_group->free_space_lock);
866 if (ret)
867 btrfs_abort_transaction(trans, ret);
868
869 btrfs_put_block_group(block_group);
870
871 return ret;
872 }
873
add_free_space_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)874 static int add_free_space_extent(struct btrfs_trans_handle *trans,
875 struct btrfs_block_group *block_group,
876 struct btrfs_path *path,
877 u64 start, u64 size)
878 {
879 struct btrfs_root *root = btrfs_free_space_root(block_group);
880 struct btrfs_key key, new_key;
881 u64 found_start, found_end;
882 u64 end = start + size;
883 int new_extents = 1;
884 int ret;
885
886 /*
887 * We are adding a new extent of free space, but we need to merge
888 * extents. There are four cases here:
889 *
890 * 1. The new extent does not have any immediate neighbors to merge
891 * with: add the new key and increment the free space extent count. We
892 * may need to convert the block group to bitmaps as a result.
893 * 2. The new extent has an immediate neighbor before it: remove the
894 * previous key and insert a new key combining both of them. There is no
895 * net change in the number of extents.
896 * 3. The new extent has an immediate neighbor after it: remove the next
897 * key and insert a new key combining both of them. There is no net
898 * change in the number of extents.
899 * 4. The new extent has immediate neighbors on both sides: remove both
900 * of the keys and insert a new key combining all of them. Where we used
901 * to have two extents, we now have one, so decrement the extent count.
902 */
903
904 new_key.objectid = start;
905 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
906 new_key.offset = size;
907
908 /* Search for a neighbor on the left. */
909 if (start == block_group->start)
910 goto right;
911 key.objectid = start - 1;
912 key.type = (u8)-1;
913 key.offset = (u64)-1;
914
915 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
916 if (ret)
917 return ret;
918
919 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
920
921 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
922 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
923 btrfs_release_path(path);
924 goto right;
925 }
926
927 found_start = key.objectid;
928 found_end = key.objectid + key.offset;
929 ASSERT(found_start >= block_group->start &&
930 found_end > block_group->start);
931 ASSERT(found_start < start && found_end <= start);
932
933 /*
934 * Delete the neighbor on the left and absorb it into the new key (cases
935 * 2 and 4).
936 */
937 if (found_end == start) {
938 ret = btrfs_del_item(trans, root, path);
939 if (ret)
940 return ret;
941 new_key.objectid = found_start;
942 new_key.offset += key.offset;
943 new_extents--;
944 }
945 btrfs_release_path(path);
946
947 right:
948 /* Search for a neighbor on the right. */
949 if (end == btrfs_block_group_end(block_group))
950 goto insert;
951 key.objectid = end;
952 key.type = (u8)-1;
953 key.offset = (u64)-1;
954
955 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
956 if (ret)
957 return ret;
958
959 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
960
961 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
962 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
963 btrfs_release_path(path);
964 goto insert;
965 }
966
967 found_start = key.objectid;
968 found_end = key.objectid + key.offset;
969 ASSERT(found_start >= block_group->start &&
970 found_end > block_group->start);
971 ASSERT((found_start < start && found_end <= start) ||
972 (found_start >= end && found_end > end));
973
974 /*
975 * Delete the neighbor on the right and absorb it into the new key
976 * (cases 3 and 4).
977 */
978 if (found_start == end) {
979 ret = btrfs_del_item(trans, root, path);
980 if (ret)
981 return ret;
982 new_key.offset += key.offset;
983 new_extents--;
984 }
985 btrfs_release_path(path);
986
987 insert:
988 /* Insert the new key (cases 1-4). */
989 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
990 if (ret)
991 return ret;
992
993 btrfs_release_path(path);
994 return update_free_space_extent_count(trans, block_group, path, new_extents);
995 }
996
997 EXPORT_FOR_TESTS
__btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path,u64 start,u64 size)998 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
999 struct btrfs_block_group *block_group,
1000 struct btrfs_path *path, u64 start, u64 size)
1001 {
1002 int ret;
1003
1004 ret = __add_block_group_free_space(trans, block_group, path);
1005 if (ret)
1006 return ret;
1007
1008 ret = using_bitmaps(block_group, path);
1009 if (ret < 0)
1010 return ret;
1011
1012 if (ret)
1013 return modify_free_space_bitmap(trans, block_group, path,
1014 start, size, false);
1015
1016 return add_free_space_extent(trans, block_group, path, start, size);
1017 }
1018
btrfs_add_to_free_space_tree(struct btrfs_trans_handle * trans,u64 start,u64 size)1019 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
1020 u64 start, u64 size)
1021 {
1022 struct btrfs_block_group *block_group;
1023 BTRFS_PATH_AUTO_FREE(path);
1024 int ret;
1025
1026 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1027 return 0;
1028
1029 path = btrfs_alloc_path();
1030 if (unlikely(!path)) {
1031 ret = -ENOMEM;
1032 btrfs_abort_transaction(trans, ret);
1033 return ret;
1034 }
1035
1036 block_group = btrfs_lookup_block_group(trans->fs_info, start);
1037 if (unlikely(!block_group)) {
1038 DEBUG_WARN("no block group found for start=%llu", start);
1039 ret = -ENOENT;
1040 btrfs_abort_transaction(trans, ret);
1041 return ret;
1042 }
1043
1044 mutex_lock(&block_group->free_space_lock);
1045 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size);
1046 mutex_unlock(&block_group->free_space_lock);
1047 if (ret)
1048 btrfs_abort_transaction(trans, ret);
1049
1050 btrfs_put_block_group(block_group);
1051
1052 return ret;
1053 }
1054
1055 /*
1056 * Populate the free space tree by walking the extent tree. Operations on the
1057 * extent tree that happen as a result of writes to the free space tree will go
1058 * through the normal add/remove hooks.
1059 */
populate_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1060 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1061 struct btrfs_block_group *block_group)
1062 {
1063 struct btrfs_root *extent_root;
1064 BTRFS_PATH_AUTO_FREE(path);
1065 BTRFS_PATH_AUTO_FREE(path2);
1066 struct btrfs_key key;
1067 u64 start, end;
1068 int ret;
1069
1070 path = btrfs_alloc_path();
1071 if (!path)
1072 return -ENOMEM;
1073
1074 path2 = btrfs_alloc_path();
1075 if (!path2)
1076 return -ENOMEM;
1077
1078 path->reada = READA_FORWARD;
1079
1080 ret = add_new_free_space_info(trans, block_group, path2);
1081 if (ret)
1082 return ret;
1083
1084 extent_root = btrfs_extent_root(trans->fs_info, block_group->start);
1085 if (unlikely(!extent_root)) {
1086 btrfs_err(trans->fs_info,
1087 "missing extent root for block group at offset %llu",
1088 block_group->start);
1089 return -EUCLEAN;
1090 }
1091
1092 mutex_lock(&block_group->free_space_lock);
1093
1094 /*
1095 * Iterate through all of the extent and metadata items in this block
1096 * group, adding the free space between them and the free space at the
1097 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1098 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1099 * contained in.
1100 */
1101 key.objectid = block_group->start;
1102 key.type = BTRFS_EXTENT_ITEM_KEY;
1103 key.offset = 0;
1104
1105 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1106 if (ret < 0)
1107 goto out_locked;
1108 /*
1109 * If ret is 1 (no key found), it means this is an empty block group,
1110 * without any extents allocated from it and there's no block group
1111 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
1112 * because we are using the block group tree feature (so block group
1113 * items are stored in the block group tree) or this is a new block
1114 * group created in the current transaction and its block group item
1115 * was not yet inserted in the extent tree (that happens in
1116 * btrfs_create_pending_block_groups() -> insert_block_group_item()).
1117 * It also means there are no extents allocated for block groups with a
1118 * start offset beyond this block group's end offset (this is the last,
1119 * highest, block group).
1120 */
1121 start = block_group->start;
1122 end = btrfs_block_group_end(block_group);
1123 while (ret == 0) {
1124 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1125
1126 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1127 key.type == BTRFS_METADATA_ITEM_KEY) {
1128 if (key.objectid >= end)
1129 break;
1130
1131 if (start < key.objectid) {
1132 ret = __btrfs_add_to_free_space_tree(trans,
1133 block_group,
1134 path2, start,
1135 key.objectid -
1136 start);
1137 if (ret)
1138 goto out_locked;
1139 }
1140 start = key.objectid;
1141 if (key.type == BTRFS_METADATA_ITEM_KEY)
1142 start += trans->fs_info->nodesize;
1143 else
1144 start += key.offset;
1145 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1146 if (key.objectid != block_group->start)
1147 break;
1148 }
1149
1150 ret = btrfs_next_item(extent_root, path);
1151 if (ret < 0)
1152 goto out_locked;
1153 }
1154 if (start < end) {
1155 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2,
1156 start, end - start);
1157 if (ret)
1158 goto out_locked;
1159 }
1160
1161 ret = 0;
1162 out_locked:
1163 mutex_unlock(&block_group->free_space_lock);
1164
1165 return ret;
1166 }
1167
btrfs_create_free_space_tree(struct btrfs_fs_info * fs_info)1168 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1169 {
1170 struct btrfs_trans_handle *trans;
1171 struct btrfs_root *tree_root = fs_info->tree_root;
1172 struct btrfs_root *free_space_root;
1173 struct btrfs_block_group *block_group;
1174 struct rb_node *node;
1175 int ret;
1176
1177 trans = btrfs_start_transaction(tree_root, 0);
1178 if (IS_ERR(trans))
1179 return PTR_ERR(trans);
1180
1181 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1182 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1183 free_space_root = btrfs_create_tree(trans,
1184 BTRFS_FREE_SPACE_TREE_OBJECTID);
1185 if (IS_ERR(free_space_root)) {
1186 ret = PTR_ERR(free_space_root);
1187 btrfs_abort_transaction(trans, ret);
1188 btrfs_end_transaction(trans);
1189 goto out_clear;
1190 }
1191 ret = btrfs_global_root_insert(free_space_root);
1192 if (unlikely(ret)) {
1193 btrfs_put_root(free_space_root);
1194 btrfs_abort_transaction(trans, ret);
1195 btrfs_end_transaction(trans);
1196 goto out_clear;
1197 }
1198
1199 node = rb_first_cached(&fs_info->block_group_cache_tree);
1200 while (node) {
1201 block_group = rb_entry(node, struct btrfs_block_group,
1202 cache_node);
1203 ret = populate_free_space_tree(trans, block_group);
1204 if (unlikely(ret)) {
1205 btrfs_abort_transaction(trans, ret);
1206 btrfs_end_transaction(trans);
1207 goto out_clear;
1208 }
1209 node = rb_next(node);
1210 }
1211
1212 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1213 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1214 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1215 ret = btrfs_commit_transaction(trans);
1216
1217 /*
1218 * Now that we've committed the transaction any reading of our commit
1219 * root will be safe, so we can cache from the free space tree now.
1220 */
1221 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1222 return ret;
1223
1224 out_clear:
1225 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1226 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1227 return ret;
1228 }
1229
clear_free_space_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root)1230 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1231 struct btrfs_root *root)
1232 {
1233 BTRFS_PATH_AUTO_FREE(path);
1234 struct btrfs_key key;
1235 struct rb_node *node;
1236 int nr;
1237 int ret;
1238
1239 path = btrfs_alloc_path();
1240 if (!path)
1241 return -ENOMEM;
1242
1243 key.objectid = 0;
1244 key.type = 0;
1245 key.offset = 0;
1246
1247 while (1) {
1248 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1249 if (ret < 0)
1250 return ret;
1251
1252 nr = btrfs_header_nritems(path->nodes[0]);
1253 if (!nr)
1254 break;
1255
1256 path->slots[0] = 0;
1257 ret = btrfs_del_items(trans, root, path, 0, nr);
1258 if (ret)
1259 return ret;
1260
1261 btrfs_release_path(path);
1262 }
1263
1264 node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
1265 while (node) {
1266 struct btrfs_block_group *bg;
1267
1268 bg = rb_entry(node, struct btrfs_block_group, cache_node);
1269 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
1270 node = rb_next(node);
1271 cond_resched();
1272 }
1273
1274 return 0;
1275 }
1276
btrfs_delete_free_space_tree(struct btrfs_fs_info * fs_info)1277 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1278 {
1279 struct btrfs_trans_handle *trans;
1280 struct btrfs_root *tree_root = fs_info->tree_root;
1281 struct btrfs_key key = {
1282 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1283 .type = BTRFS_ROOT_ITEM_KEY,
1284 .offset = 0,
1285 };
1286 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1287 int ret;
1288
1289 trans = btrfs_start_transaction(tree_root, 0);
1290 if (IS_ERR(trans))
1291 return PTR_ERR(trans);
1292
1293 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1294 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1295
1296 ret = clear_free_space_tree(trans, free_space_root);
1297 if (unlikely(ret)) {
1298 btrfs_abort_transaction(trans, ret);
1299 btrfs_end_transaction(trans);
1300 return ret;
1301 }
1302
1303 ret = btrfs_del_root(trans, &free_space_root->root_key);
1304 if (unlikely(ret)) {
1305 btrfs_abort_transaction(trans, ret);
1306 btrfs_end_transaction(trans);
1307 return ret;
1308 }
1309
1310 btrfs_global_root_delete(free_space_root);
1311
1312 spin_lock(&fs_info->trans_lock);
1313 list_del(&free_space_root->dirty_list);
1314 spin_unlock(&fs_info->trans_lock);
1315
1316 btrfs_tree_lock(free_space_root->node);
1317 btrfs_clear_buffer_dirty(trans, free_space_root->node);
1318 btrfs_tree_unlock(free_space_root->node);
1319 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1320 free_space_root->node, 0, 1);
1321 btrfs_put_root(free_space_root);
1322 if (unlikely(ret < 0)) {
1323 btrfs_abort_transaction(trans, ret);
1324 btrfs_end_transaction(trans);
1325 return ret;
1326 }
1327
1328 return btrfs_commit_transaction(trans);
1329 }
1330
btrfs_rebuild_free_space_tree(struct btrfs_fs_info * fs_info)1331 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1332 {
1333 struct btrfs_trans_handle *trans;
1334 struct btrfs_key key = {
1335 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1336 .type = BTRFS_ROOT_ITEM_KEY,
1337 .offset = 0,
1338 };
1339 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1340 struct rb_node *node;
1341 int ret;
1342
1343 trans = btrfs_start_transaction(free_space_root, 1);
1344 if (IS_ERR(trans))
1345 return PTR_ERR(trans);
1346
1347 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1348 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1349
1350 ret = clear_free_space_tree(trans, free_space_root);
1351 if (unlikely(ret)) {
1352 btrfs_abort_transaction(trans, ret);
1353 btrfs_end_transaction(trans);
1354 return ret;
1355 }
1356
1357 node = rb_first_cached(&fs_info->block_group_cache_tree);
1358 while (node) {
1359 struct btrfs_block_group *block_group;
1360
1361 block_group = rb_entry(node, struct btrfs_block_group,
1362 cache_node);
1363
1364 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
1365 &block_group->runtime_flags))
1366 goto next;
1367
1368 ret = populate_free_space_tree(trans, block_group);
1369 if (unlikely(ret)) {
1370 btrfs_abort_transaction(trans, ret);
1371 btrfs_end_transaction(trans);
1372 return ret;
1373 }
1374 next:
1375 if (btrfs_should_end_transaction(trans)) {
1376 btrfs_end_transaction(trans);
1377 trans = btrfs_start_transaction(free_space_root, 1);
1378 if (IS_ERR(trans))
1379 return PTR_ERR(trans);
1380 }
1381 node = rb_next(node);
1382 }
1383
1384 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1385 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1386 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1387
1388 ret = btrfs_commit_transaction(trans);
1389 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1390 return ret;
1391 }
1392
__add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1393 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1394 struct btrfs_block_group *block_group,
1395 struct btrfs_path *path)
1396 {
1397 bool own_path = false;
1398 int ret;
1399
1400 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
1401 &block_group->runtime_flags))
1402 return 0;
1403
1404 /*
1405 * While rebuilding the free space tree we may allocate new metadata
1406 * block groups while modifying the free space tree.
1407 *
1408 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
1409 * can use multiple transactions, every time btrfs_end_transaction() is
1410 * called at btrfs_rebuild_free_space_tree() we finish the creation of
1411 * new block groups by calling btrfs_create_pending_block_groups(), and
1412 * that in turn calls us, through btrfs_add_block_group_free_space(),
1413 * to add a free space info item and a free space extent item for the
1414 * block group.
1415 *
1416 * Then later btrfs_rebuild_free_space_tree() may find such new block
1417 * groups and processes them with populate_free_space_tree(), which can
1418 * fail with EEXIST since there are already items for the block group in
1419 * the free space tree. Notice that we say "may find" because a new
1420 * block group may be added to the block groups rbtree in a node before
1421 * or after the block group currently being processed by the rebuild
1422 * process. So signal the rebuild process to skip such new block groups
1423 * if it finds them.
1424 */
1425 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
1426
1427 if (!path) {
1428 path = btrfs_alloc_path();
1429 if (unlikely(!path)) {
1430 btrfs_abort_transaction(trans, -ENOMEM);
1431 return -ENOMEM;
1432 }
1433 own_path = true;
1434 }
1435
1436 ret = add_new_free_space_info(trans, block_group, path);
1437 if (unlikely(ret)) {
1438 btrfs_abort_transaction(trans, ret);
1439 goto out;
1440 }
1441
1442 ret = __btrfs_add_to_free_space_tree(trans, block_group, path,
1443 block_group->start, block_group->length);
1444 if (ret)
1445 btrfs_abort_transaction(trans, ret);
1446
1447 out:
1448 if (own_path)
1449 btrfs_free_path(path);
1450
1451 return ret;
1452 }
1453
btrfs_add_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1454 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans,
1455 struct btrfs_block_group *block_group)
1456 {
1457 int ret;
1458
1459 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1460 return 0;
1461
1462 mutex_lock(&block_group->free_space_lock);
1463 ret = __add_block_group_free_space(trans, block_group, NULL);
1464 mutex_unlock(&block_group->free_space_lock);
1465 return ret;
1466 }
1467
btrfs_remove_block_group_free_space(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group)1468 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
1469 struct btrfs_block_group *block_group)
1470 {
1471 struct btrfs_root *root = btrfs_free_space_root(block_group);
1472 BTRFS_PATH_AUTO_FREE(path);
1473 struct btrfs_key key, found_key;
1474 struct extent_buffer *leaf;
1475 u64 start, end;
1476 int done = 0, nr;
1477 int ret;
1478
1479 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1480 return 0;
1481
1482 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1483 /* We never added this block group to the free space tree. */
1484 return 0;
1485 }
1486
1487 path = btrfs_alloc_path();
1488 if (unlikely(!path)) {
1489 ret = -ENOMEM;
1490 btrfs_abort_transaction(trans, ret);
1491 return ret;
1492 }
1493
1494 start = block_group->start;
1495 end = btrfs_block_group_end(block_group);
1496
1497 key.objectid = end - 1;
1498 key.type = (u8)-1;
1499 key.offset = (u64)-1;
1500
1501 while (!done) {
1502 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1503 if (unlikely(ret)) {
1504 btrfs_abort_transaction(trans, ret);
1505 return ret;
1506 }
1507
1508 leaf = path->nodes[0];
1509 nr = 0;
1510 path->slots[0]++;
1511 while (path->slots[0] > 0) {
1512 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1513
1514 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1515 ASSERT(found_key.objectid == block_group->start);
1516 ASSERT(found_key.offset == block_group->length);
1517 done = 1;
1518 nr++;
1519 path->slots[0]--;
1520 break;
1521 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1522 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1523 ASSERT(found_key.objectid >= start);
1524 ASSERT(found_key.objectid < end);
1525 ASSERT(found_key.objectid + found_key.offset <= end);
1526 nr++;
1527 path->slots[0]--;
1528 } else {
1529 btrfs_err(trans->fs_info, "unexpected free space tree key type %u",
1530 found_key.type);
1531 ret = -EUCLEAN;
1532 btrfs_abort_transaction(trans, ret);
1533 return ret;
1534 }
1535 }
1536
1537 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1538 if (unlikely(ret)) {
1539 btrfs_abort_transaction(trans, ret);
1540 return ret;
1541 }
1542 btrfs_release_path(path);
1543 }
1544
1545 return 0;
1546 }
1547
load_free_space_bitmaps(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1548 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1549 struct btrfs_path *path,
1550 u32 expected_extent_count)
1551 {
1552 struct btrfs_block_group *block_group = caching_ctl->block_group;
1553 struct btrfs_fs_info *fs_info = block_group->fs_info;
1554 struct btrfs_root *root;
1555 struct btrfs_key key;
1556 bool prev_bit_set = false;
1557 /* Initialize to silence GCC. */
1558 u64 extent_start = 0;
1559 const u64 end = btrfs_block_group_end(block_group);
1560 u64 offset;
1561 u64 total_found = 0;
1562 u32 extent_count = 0;
1563 int ret;
1564
1565 root = btrfs_free_space_root(block_group);
1566
1567 while (1) {
1568 ret = btrfs_next_item(root, path);
1569 if (ret < 0)
1570 return ret;
1571 if (ret)
1572 break;
1573
1574 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1575
1576 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1577 break;
1578
1579 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1580 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1581
1582 offset = key.objectid;
1583 while (offset < key.objectid + key.offset) {
1584 bool bit_set;
1585
1586 bit_set = btrfs_free_space_test_bit(block_group, path, offset);
1587 if (!prev_bit_set && bit_set) {
1588 extent_start = offset;
1589 } else if (prev_bit_set && !bit_set) {
1590 u64 space_added;
1591
1592 ret = btrfs_add_new_free_space(block_group,
1593 extent_start,
1594 offset,
1595 &space_added);
1596 if (ret)
1597 return ret;
1598 total_found += space_added;
1599 if (total_found > CACHING_CTL_WAKE_UP) {
1600 total_found = 0;
1601 wake_up(&caching_ctl->wait);
1602 }
1603 extent_count++;
1604 }
1605 prev_bit_set = bit_set;
1606 offset += fs_info->sectorsize;
1607 }
1608 }
1609 if (prev_bit_set) {
1610 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1611 if (ret)
1612 return ret;
1613 extent_count++;
1614 }
1615
1616 if (unlikely(extent_count != expected_extent_count)) {
1617 btrfs_err(fs_info,
1618 "incorrect extent count for %llu; counted %u, expected %u",
1619 block_group->start, extent_count,
1620 expected_extent_count);
1621 DEBUG_WARN();
1622 return -EIO;
1623 }
1624
1625 return 0;
1626 }
1627
load_free_space_extents(struct btrfs_caching_control * caching_ctl,struct btrfs_path * path,u32 expected_extent_count)1628 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1629 struct btrfs_path *path,
1630 u32 expected_extent_count)
1631 {
1632 struct btrfs_block_group *block_group = caching_ctl->block_group;
1633 struct btrfs_fs_info *fs_info = block_group->fs_info;
1634 struct btrfs_root *root;
1635 struct btrfs_key key;
1636 const u64 end = btrfs_block_group_end(block_group);
1637 u64 total_found = 0;
1638 u32 extent_count = 0;
1639 int ret;
1640
1641 root = btrfs_free_space_root(block_group);
1642
1643 while (1) {
1644 u64 space_added;
1645
1646 ret = btrfs_next_item(root, path);
1647 if (ret < 0)
1648 return ret;
1649 if (ret)
1650 break;
1651
1652 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1653
1654 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1655 break;
1656
1657 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1658 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1659
1660 ret = btrfs_add_new_free_space(block_group, key.objectid,
1661 key.objectid + key.offset,
1662 &space_added);
1663 if (ret)
1664 return ret;
1665 total_found += space_added;
1666 if (total_found > CACHING_CTL_WAKE_UP) {
1667 total_found = 0;
1668 wake_up(&caching_ctl->wait);
1669 }
1670 extent_count++;
1671 }
1672
1673 if (unlikely(extent_count != expected_extent_count)) {
1674 btrfs_err(fs_info,
1675 "incorrect extent count for %llu; counted %u, expected %u",
1676 block_group->start, extent_count,
1677 expected_extent_count);
1678 DEBUG_WARN();
1679 return -EIO;
1680 }
1681
1682 return 0;
1683 }
1684
btrfs_load_free_space_tree(struct btrfs_caching_control * caching_ctl)1685 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1686 {
1687 struct btrfs_block_group *block_group;
1688 struct btrfs_free_space_info *info;
1689 BTRFS_PATH_AUTO_FREE(path);
1690 u32 extent_count, flags;
1691
1692 block_group = caching_ctl->block_group;
1693
1694 path = btrfs_alloc_path();
1695 if (!path)
1696 return -ENOMEM;
1697
1698 /*
1699 * Just like caching_thread() doesn't want to deadlock on the extent
1700 * tree, we don't want to deadlock on the free space tree.
1701 */
1702 path->skip_locking = true;
1703 path->search_commit_root = true;
1704 path->reada = READA_FORWARD;
1705
1706 info = btrfs_search_free_space_info(NULL, block_group, path, 0);
1707 if (IS_ERR(info))
1708 return PTR_ERR(info);
1709
1710 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1711 flags = btrfs_free_space_flags(path->nodes[0], info);
1712
1713 /*
1714 * We left path pointing to the free space info item, so now
1715 * load_free_space_foo can just iterate through the free space tree from
1716 * there.
1717 */
1718 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1719 return load_free_space_bitmaps(caching_ctl, path, extent_count);
1720 else
1721 return load_free_space_extents(caching_ctl, path, extent_count);
1722 }
1723
delete_orphan_free_space_entries(struct btrfs_root * fst_root,struct btrfs_path * path,u64 first_bg_bytenr)1724 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root,
1725 struct btrfs_path *path,
1726 u64 first_bg_bytenr)
1727 {
1728 struct btrfs_trans_handle *trans;
1729 int ret;
1730
1731 trans = btrfs_start_transaction(fst_root, 1);
1732 if (IS_ERR(trans))
1733 return PTR_ERR(trans);
1734
1735 while (true) {
1736 struct btrfs_key key = { 0 };
1737 int i;
1738
1739 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1);
1740 if (ret < 0)
1741 break;
1742 ASSERT(ret > 0);
1743 ret = 0;
1744 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) {
1745 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1746 if (key.objectid >= first_bg_bytenr) {
1747 /*
1748 * Only break the for() loop and continue to
1749 * delete items.
1750 */
1751 break;
1752 }
1753 }
1754 /* No items to delete, finished. */
1755 if (i == 0)
1756 break;
1757
1758 ret = btrfs_del_items(trans, fst_root, path, 0, i);
1759 if (ret < 0)
1760 break;
1761 btrfs_release_path(path);
1762 }
1763 btrfs_release_path(path);
1764 btrfs_end_transaction(trans);
1765 if (ret == 0)
1766 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries");
1767 return ret;
1768 }
1769
1770 /* Remove any free space entry before the first block group. */
btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info * fs_info)1771 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info)
1772 {
1773 BTRFS_PATH_AUTO_RELEASE(path);
1774 struct btrfs_key key = {
1775 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1776 .type = BTRFS_ROOT_ITEM_KEY,
1777 .offset = 0,
1778 };
1779 struct btrfs_root *root;
1780 struct btrfs_block_group *bg;
1781 u64 first_bg_bytenr;
1782 int ret;
1783
1784 /*
1785 * Extent tree v2 has multiple global roots based on the block group.
1786 * This means we cannot easily grab the global free space tree and locate
1787 * orphan items. Furthermore this is still experimental, all users
1788 * should use the latest btrfs-progs anyway.
1789 */
1790 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
1791 return 0;
1792 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1793 return 0;
1794 root = btrfs_global_root(fs_info, &key);
1795 if (!root)
1796 return 0;
1797
1798 key.objectid = 0;
1799 key.type = 0;
1800 key.offset = 0;
1801
1802 bg = btrfs_lookup_first_block_group(fs_info, 0);
1803 if (unlikely(!bg)) {
1804 btrfs_err(fs_info, "no block group found");
1805 return -EUCLEAN;
1806 }
1807 first_bg_bytenr = bg->start;
1808 btrfs_put_block_group(bg);
1809
1810 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1811 if (ret < 0)
1812 return ret;
1813 /* There should not be an all-zero key in fst. */
1814 ASSERT(ret > 0);
1815
1816 /* Empty free space tree. */
1817 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
1818 return 0;
1819
1820 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
1821 if (key.objectid >= first_bg_bytenr)
1822 return 0;
1823 btrfs_release_path(&path);
1824 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr);
1825 }
1826