1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4 */
5
6 #include <linux/btrfs_tree.h>
7 #include "ctree.h"
8 #include "fs.h"
9 #include "accessors.h"
10 #include "transaction.h"
11 #include "disk-io.h"
12 #include "raid-stripe-tree.h"
13 #include "volumes.h"
14 #include "print-tree.h"
15
btrfs_partially_delete_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_path * path,const struct btrfs_key * oldkey,u64 newlen,u64 frontpad)16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 struct btrfs_path *path,
18 const struct btrfs_key *oldkey,
19 u64 newlen, u64 frontpad)
20 {
21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
22 struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem);
23 struct extent_buffer *leaf;
24 int slot;
25 size_t item_size;
26 struct btrfs_key newkey = {
27 .objectid = oldkey->objectid + frontpad,
28 .type = BTRFS_RAID_STRIPE_KEY,
29 .offset = newlen,
30 };
31 int ret;
32
33 ASSERT(newlen > 0);
34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
35
36 leaf = path->nodes[0];
37 slot = path->slots[0];
38 item_size = btrfs_item_size(leaf, slot);
39
40 newitem = kzalloc(item_size, GFP_NOFS);
41 if (!newitem)
42 return -ENOMEM;
43
44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
45
46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
47 struct btrfs_raid_stride *stride = &extent->strides[i];
48 u64 devid;
49 u64 phys;
50
51 devid = btrfs_raid_stride_devid(leaf, stride);
52 btrfs_set_stack_raid_stride_devid(&newitem->strides[i], devid);
53 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
54 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
55 }
56
57 ret = btrfs_del_item(trans, stripe_root, path);
58 if (ret)
59 return ret;
60
61 btrfs_release_path(path);
62 return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
63 }
64
btrfs_delete_raid_extent(struct btrfs_trans_handle * trans,u64 start,u64 length)65 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
66 {
67 struct btrfs_fs_info *fs_info = trans->fs_info;
68 struct btrfs_root *stripe_root = fs_info->stripe_root;
69 BTRFS_PATH_AUTO_FREE(path);
70 struct btrfs_key key;
71 struct extent_buffer *leaf;
72 u64 found_start;
73 u64 found_end;
74 u64 end = start + length;
75 int slot;
76 int ret;
77
78 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
79 return 0;
80
81 if (!btrfs_is_testing(fs_info)) {
82 struct btrfs_chunk_map *map;
83 bool use_rst;
84
85 map = btrfs_find_chunk_map(fs_info, start, length);
86 if (!map)
87 return -EINVAL;
88 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
89 btrfs_free_chunk_map(map);
90 if (!use_rst)
91 return 0;
92 }
93
94 path = btrfs_alloc_path();
95 if (!path)
96 return -ENOMEM;
97
98 while (1) {
99 key.objectid = start;
100 key.type = BTRFS_RAID_STRIPE_KEY;
101 key.offset = (u64)-1;
102
103 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
104 if (ret < 0)
105 break;
106
107 /*
108 * Search with offset=(u64)-1 ensures we land on the correct
109 * leaf even when the target entry is the first item on a leaf.
110 * Since no real entry has offset=(u64)-1, ret is always 1 and
111 * slot points past the last entry with objectid==start (or
112 * past the end of the leaf if that entry is the last item).
113 * Back up one slot to find the actual entry.
114 */
115 if (path->slots[0] == 0) {
116 /* No entry with objectid <= start exists. */
117 ret = 0;
118 break;
119 }
120 path->slots[0]--;
121
122 leaf = path->nodes[0];
123 slot = path->slots[0];
124 btrfs_item_key_to_cpu(leaf, &key, slot);
125 found_start = key.objectid;
126 found_end = found_start + key.offset;
127 ret = 0;
128
129 /*
130 * The stripe extent starts before the range we want to delete,
131 * but the range spans more than one stripe extent:
132 *
133 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
134 * |--- keep ---|--- drop ---|
135 *
136 * This means we have to get the previous item, truncate its
137 * length and then restart the search.
138 */
139 if (found_start > start) {
140 if (slot == 0) {
141 ret = btrfs_previous_item(stripe_root, path, 0,
142 BTRFS_RAID_STRIPE_KEY);
143 if (ret) {
144 if (ret > 0)
145 ret = -ENOENT;
146 break;
147 }
148 } else {
149 path->slots[0]--;
150 }
151
152 leaf = path->nodes[0];
153 slot = path->slots[0];
154 btrfs_item_key_to_cpu(leaf, &key, slot);
155 found_start = key.objectid;
156 found_end = found_start + key.offset;
157 if (found_start > start || found_end <= start) {
158 ret = -ENOENT;
159 break;
160 }
161 }
162
163 if (key.type != BTRFS_RAID_STRIPE_KEY)
164 break;
165
166 /* That stripe ends before we start, we're done. */
167 if (found_end <= start)
168 break;
169
170 trace_btrfs_raid_extent_delete(fs_info, start, end,
171 found_start, found_end);
172
173 /*
174 * The stripe extent starts before the range we want to delete
175 * and ends after the range we want to delete, i.e. we're
176 * punching a hole in the stripe extent:
177 *
178 * |--- RAID Stripe Extent ---|
179 * | keep |--- drop ---| keep |
180 *
181 * This means we need to a) truncate the existing item and b)
182 * create a second item for the remaining range.
183 */
184 if (found_start < start && found_end > end) {
185 size_t item_size;
186 u64 diff_start = start - found_start;
187 u64 diff_end = found_end - end;
188 struct btrfs_stripe_extent *extent;
189 struct btrfs_key newkey = {
190 .objectid = end,
191 .type = BTRFS_RAID_STRIPE_KEY,
192 .offset = diff_end,
193 };
194
195 /* The "right" item. */
196 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
197 if (ret == -EAGAIN) {
198 btrfs_release_path(path);
199 continue;
200 }
201 if (ret)
202 break;
203
204 /*
205 * btrfs_duplicate_item() may have triggered a leaf
206 * split via setup_leaf_for_split(), so we must refresh
207 * our leaf pointer from the path.
208 */
209 leaf = path->nodes[0];
210 item_size = btrfs_item_size(leaf, path->slots[0]);
211 extent = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_stripe_extent);
213
214 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
215 struct btrfs_raid_stride *stride = &extent->strides[i];
216 u64 phys;
217
218 phys = btrfs_raid_stride_physical(leaf, stride);
219 phys += diff_start + length;
220 btrfs_set_raid_stride_physical(leaf, stride, phys);
221 }
222
223 /* The "left" item. */
224 path->slots[0]--;
225 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
226 ret = btrfs_partially_delete_raid_extent(trans, path,
227 &key,
228 diff_start, 0);
229 break;
230 }
231
232 /*
233 * The stripe extent starts before the range we want to delete:
234 *
235 * |--- RAID Stripe Extent ---|
236 * |--- keep ---|--- drop ---|
237 *
238 * This means we have to duplicate the tree item, truncate the
239 * length to the new size and then re-insert the item.
240 */
241 if (found_start < start) {
242 u64 diff_start = start - found_start;
243
244 ret = btrfs_partially_delete_raid_extent(trans, path,
245 &key,
246 diff_start, 0);
247 if (ret)
248 break;
249
250 start += (key.offset - diff_start);
251 length -= (key.offset - diff_start);
252 if (length == 0)
253 break;
254
255 btrfs_release_path(path);
256 continue;
257 }
258
259 /*
260 * The stripe extent ends after the range we want to delete:
261 *
262 * |--- RAID Stripe Extent ---|
263 * |--- drop ---|--- keep ---|
264 *
265 * This means we have to duplicate the tree item, truncate the
266 * length to the new size and then re-insert the item.
267 */
268 if (found_end > end) {
269 u64 diff_end = found_end - end;
270
271 ret = btrfs_partially_delete_raid_extent(trans, path,
272 &key,
273 key.offset - length,
274 length);
275 ASSERT(key.offset - diff_end == length);
276 break;
277 }
278
279 /* Finally we can delete the whole item, no more special cases. */
280 ret = btrfs_del_item(trans, stripe_root, path);
281 if (ret)
282 break;
283
284 start += key.offset;
285 length -= key.offset;
286 if (length == 0)
287 break;
288
289 btrfs_release_path(path);
290 }
291
292 return ret;
293 }
294
update_raid_extent_item(struct btrfs_trans_handle * trans,struct btrfs_key * key,struct btrfs_stripe_extent * stripe_extent,const size_t item_size)295 static int update_raid_extent_item(struct btrfs_trans_handle *trans,
296 struct btrfs_key *key,
297 struct btrfs_stripe_extent *stripe_extent,
298 const size_t item_size)
299 {
300 BTRFS_PATH_AUTO_FREE(path);
301 struct extent_buffer *leaf;
302 int ret;
303 int slot;
304
305 path = btrfs_alloc_path();
306 if (!path)
307 return -ENOMEM;
308
309 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
310 0, 1);
311 if (ret)
312 return (ret == 1 ? ret : -EINVAL);
313
314 leaf = path->nodes[0];
315 slot = path->slots[0];
316
317 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
318 item_size);
319
320 return ret;
321 }
322
323 EXPORT_FOR_TESTS
btrfs_insert_one_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_io_context * bioc)324 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
325 struct btrfs_io_context *bioc)
326 {
327 struct btrfs_fs_info *fs_info = trans->fs_info;
328 struct btrfs_key stripe_key;
329 struct btrfs_root *stripe_root = fs_info->stripe_root;
330 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
331 struct btrfs_stripe_extent AUTO_KFREE(stripe_extent);
332 const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
333 int ret;
334
335 stripe_extent = kzalloc(item_size, GFP_NOFS);
336 if (unlikely(!stripe_extent)) {
337 btrfs_abort_transaction(trans, -ENOMEM);
338 btrfs_end_transaction(trans);
339 return -ENOMEM;
340 }
341
342 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
343 num_stripes);
344 for (int i = 0; i < num_stripes; i++) {
345 u64 devid = bioc->stripes[i].dev->devid;
346 u64 physical = bioc->stripes[i].physical;
347 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
348
349 btrfs_set_stack_raid_stride_devid(raid_stride, devid);
350 btrfs_set_stack_raid_stride_physical(raid_stride, physical);
351 }
352
353 stripe_key.objectid = bioc->logical;
354 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
355 stripe_key.offset = bioc->size;
356
357 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
358 item_size);
359 if (ret == -EEXIST) {
360 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
361 item_size);
362 if (ret)
363 btrfs_abort_transaction(trans, ret);
364 } else if (ret) {
365 btrfs_abort_transaction(trans, ret);
366 }
367
368 return ret;
369 }
370
btrfs_insert_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_ordered_extent * ordered_extent)371 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
372 struct btrfs_ordered_extent *ordered_extent)
373 {
374 struct btrfs_io_context *bioc;
375 int ret;
376
377 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
378 return 0;
379
380 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
381 ret = btrfs_insert_one_raid_extent(trans, bioc);
382 if (ret)
383 return ret;
384 }
385
386 while (!list_empty(&ordered_extent->bioc_list)) {
387 bioc = list_first_entry(&ordered_extent->bioc_list,
388 typeof(*bioc), rst_ordered_entry);
389 list_del(&bioc->rst_ordered_entry);
390 btrfs_put_bioc(bioc);
391 }
392
393 return 0;
394 }
395
btrfs_get_raid_extent_offset(struct btrfs_fs_info * fs_info,u64 logical,u64 * length,u64 map_type,u32 stripe_index,struct btrfs_io_stripe * stripe)396 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
397 u64 logical, u64 *length, u64 map_type,
398 u32 stripe_index, struct btrfs_io_stripe *stripe)
399 {
400 struct btrfs_root *stripe_root = fs_info->stripe_root;
401 struct btrfs_stripe_extent *stripe_extent;
402 struct btrfs_key stripe_key;
403 struct btrfs_key found_key;
404 BTRFS_PATH_AUTO_FREE(path);
405 struct extent_buffer *leaf;
406 const u64 end = logical + *length;
407 int num_stripes;
408 u64 offset;
409 u64 found_logical;
410 u64 found_length;
411 u64 found_end;
412 int slot;
413 int ret;
414
415 stripe_key.objectid = logical;
416 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
417 stripe_key.offset = 0;
418
419 path = btrfs_alloc_path();
420 if (!path)
421 return -ENOMEM;
422
423 if (stripe->rst_search_commit_root) {
424 path->skip_locking = true;
425 path->search_commit_root = true;
426 }
427
428 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
429 if (ret < 0)
430 return ret;
431 if (ret) {
432 if (path->slots[0] != 0)
433 path->slots[0]--;
434 }
435
436 while (1) {
437 leaf = path->nodes[0];
438 slot = path->slots[0];
439
440 btrfs_item_key_to_cpu(leaf, &found_key, slot);
441 found_logical = found_key.objectid;
442 found_length = found_key.offset;
443 found_end = found_logical + found_length;
444
445 if (found_logical > end) {
446 ret = -ENODATA;
447 goto out;
448 }
449
450 if (in_range(logical, found_logical, found_length))
451 break;
452
453 ret = btrfs_next_item(stripe_root, path);
454 if (ret)
455 goto out;
456 }
457
458 offset = logical - found_logical;
459
460 /*
461 * If we have a logically contiguous, but physically non-continuous
462 * range, we need to split the bio. Record the length after which we
463 * must split the bio.
464 */
465 if (end > found_end)
466 *length -= end - found_end;
467
468 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
469 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
470
471 for (int i = 0; i < num_stripes; i++) {
472 struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
473 u64 devid = btrfs_raid_stride_devid(leaf, stride);
474 u64 physical = btrfs_raid_stride_physical(leaf, stride);
475
476 if (devid != stripe->dev->devid)
477 continue;
478
479 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
480 continue;
481
482 stripe->physical = physical + offset;
483
484 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
485 stripe->physical, devid);
486
487 return 0;
488 }
489
490 /* If we're here, we haven't found the requested devid in the stripe. */
491 ret = -ENODATA;
492 out:
493 if (ret > 0)
494 ret = -ENODATA;
495 if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
496 btrfs_debug(fs_info,
497 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
498 logical, logical + *length, stripe->dev->devid,
499 btrfs_bg_type_to_raid_name(map_type));
500 }
501
502 return ret;
503 }
504