1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4 */
5
6 #include <linux/btrfs_tree.h>
7 #include "ctree.h"
8 #include "fs.h"
9 #include "accessors.h"
10 #include "transaction.h"
11 #include "disk-io.h"
12 #include "raid-stripe-tree.h"
13 #include "volumes.h"
14 #include "print-tree.h"
15
btrfs_partially_delete_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_path * path,const struct btrfs_key * oldkey,u64 newlen,u64 frontpad)16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 struct btrfs_path *path,
18 const struct btrfs_key *oldkey,
19 u64 newlen, u64 frontpad)
20 {
21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
22 struct btrfs_stripe_extent *extent, *newitem;
23 struct extent_buffer *leaf;
24 int slot;
25 size_t item_size;
26 struct btrfs_key newkey = {
27 .objectid = oldkey->objectid + frontpad,
28 .type = BTRFS_RAID_STRIPE_KEY,
29 .offset = newlen,
30 };
31 int ret;
32
33 ASSERT(newlen > 0);
34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
35
36 leaf = path->nodes[0];
37 slot = path->slots[0];
38 item_size = btrfs_item_size(leaf, slot);
39
40 newitem = kzalloc(item_size, GFP_NOFS);
41 if (!newitem)
42 return -ENOMEM;
43
44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
45
46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
47 struct btrfs_raid_stride *stride = &extent->strides[i];
48 u64 phys;
49
50 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
51 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
52 }
53
54 ret = btrfs_del_item(trans, stripe_root, path);
55 if (ret)
56 goto out;
57
58 btrfs_release_path(path);
59 ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
60
61 out:
62 kfree(newitem);
63 return ret;
64 }
65
btrfs_delete_raid_extent(struct btrfs_trans_handle * trans,u64 start,u64 length)66 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
67 {
68 struct btrfs_fs_info *fs_info = trans->fs_info;
69 struct btrfs_root *stripe_root = fs_info->stripe_root;
70 struct btrfs_path *path;
71 struct btrfs_key key;
72 struct extent_buffer *leaf;
73 u64 found_start;
74 u64 found_end;
75 u64 end = start + length;
76 int slot;
77 int ret;
78
79 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
80 return 0;
81
82 if (!btrfs_is_testing(fs_info)) {
83 struct btrfs_chunk_map *map;
84 bool use_rst;
85
86 map = btrfs_find_chunk_map(fs_info, start, length);
87 if (!map)
88 return -EINVAL;
89 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
90 btrfs_free_chunk_map(map);
91 if (!use_rst)
92 return 0;
93 }
94
95 path = btrfs_alloc_path();
96 if (!path)
97 return -ENOMEM;
98
99 while (1) {
100 key.objectid = start;
101 key.type = BTRFS_RAID_STRIPE_KEY;
102 key.offset = 0;
103
104 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
105 if (ret < 0)
106 break;
107
108 if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
109 path->slots[0]--;
110
111 leaf = path->nodes[0];
112 slot = path->slots[0];
113 btrfs_item_key_to_cpu(leaf, &key, slot);
114 found_start = key.objectid;
115 found_end = found_start + key.offset;
116 ret = 0;
117
118 /*
119 * The stripe extent starts before the range we want to delete,
120 * but the range spans more than one stripe extent:
121 *
122 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
123 * |--- keep ---|--- drop ---|
124 *
125 * This means we have to get the previous item, truncate its
126 * length and then restart the search.
127 */
128 if (found_start > start) {
129 if (slot == 0) {
130 ret = btrfs_previous_item(stripe_root, path, start,
131 BTRFS_RAID_STRIPE_KEY);
132 if (ret) {
133 if (ret > 0)
134 ret = -ENOENT;
135 break;
136 }
137 } else {
138 path->slots[0]--;
139 }
140
141 leaf = path->nodes[0];
142 slot = path->slots[0];
143 btrfs_item_key_to_cpu(leaf, &key, slot);
144 found_start = key.objectid;
145 found_end = found_start + key.offset;
146 ASSERT(found_start <= start);
147 }
148
149 if (key.type != BTRFS_RAID_STRIPE_KEY)
150 break;
151
152 /* That stripe ends before we start, we're done. */
153 if (found_end <= start)
154 break;
155
156 trace_btrfs_raid_extent_delete(fs_info, start, end,
157 found_start, found_end);
158
159 /*
160 * The stripe extent starts before the range we want to delete
161 * and ends after the range we want to delete, i.e. we're
162 * punching a hole in the stripe extent:
163 *
164 * |--- RAID Stripe Extent ---|
165 * | keep |--- drop ---| keep |
166 *
167 * This means we need to a) truncate the existing item and b)
168 * create a second item for the remaining range.
169 */
170 if (found_start < start && found_end > end) {
171 size_t item_size;
172 u64 diff_start = start - found_start;
173 u64 diff_end = found_end - end;
174 struct btrfs_stripe_extent *extent;
175 struct btrfs_key newkey = {
176 .objectid = end,
177 .type = BTRFS_RAID_STRIPE_KEY,
178 .offset = diff_end,
179 };
180
181 /* The "right" item. */
182 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
183 if (ret)
184 break;
185
186 item_size = btrfs_item_size(leaf, path->slots[0]);
187 extent = btrfs_item_ptr(leaf, path->slots[0],
188 struct btrfs_stripe_extent);
189
190 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
191 struct btrfs_raid_stride *stride = &extent->strides[i];
192 u64 phys;
193
194 phys = btrfs_raid_stride_physical(leaf, stride);
195 phys += diff_start + length;
196 btrfs_set_raid_stride_physical(leaf, stride, phys);
197 }
198
199 /* The "left" item. */
200 path->slots[0]--;
201 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
202 btrfs_partially_delete_raid_extent(trans, path, &key,
203 diff_start, 0);
204 break;
205 }
206
207 /*
208 * The stripe extent starts before the range we want to delete:
209 *
210 * |--- RAID Stripe Extent ---|
211 * |--- keep ---|--- drop ---|
212 *
213 * This means we have to duplicate the tree item, truncate the
214 * length to the new size and then re-insert the item.
215 */
216 if (found_start < start) {
217 u64 diff_start = start - found_start;
218
219 btrfs_partially_delete_raid_extent(trans, path, &key,
220 diff_start, 0);
221
222 start += (key.offset - diff_start);
223 length -= (key.offset - diff_start);
224 if (length == 0)
225 break;
226
227 btrfs_release_path(path);
228 continue;
229 }
230
231 /*
232 * The stripe extent ends after the range we want to delete:
233 *
234 * |--- RAID Stripe Extent ---|
235 * |--- drop ---|--- keep ---|
236 *
237 * This means we have to duplicate the tree item, truncate the
238 * length to the new size and then re-insert the item.
239 */
240 if (found_end > end) {
241 u64 diff_end = found_end - end;
242
243 btrfs_partially_delete_raid_extent(trans, path, &key,
244 key.offset - length,
245 length);
246 ASSERT(key.offset - diff_end == length);
247 break;
248 }
249
250 /* Finally we can delete the whole item, no more special cases. */
251 ret = btrfs_del_item(trans, stripe_root, path);
252 if (ret)
253 break;
254
255 start += key.offset;
256 length -= key.offset;
257 if (length == 0)
258 break;
259
260 btrfs_release_path(path);
261 }
262
263 btrfs_free_path(path);
264 return ret;
265 }
266
update_raid_extent_item(struct btrfs_trans_handle * trans,struct btrfs_key * key,struct btrfs_stripe_extent * stripe_extent,const size_t item_size)267 static int update_raid_extent_item(struct btrfs_trans_handle *trans,
268 struct btrfs_key *key,
269 struct btrfs_stripe_extent *stripe_extent,
270 const size_t item_size)
271 {
272 struct btrfs_path *path;
273 struct extent_buffer *leaf;
274 int ret;
275 int slot;
276
277 path = btrfs_alloc_path();
278 if (!path)
279 return -ENOMEM;
280
281 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
282 0, 1);
283 if (ret)
284 return (ret == 1 ? ret : -EINVAL);
285
286 leaf = path->nodes[0];
287 slot = path->slots[0];
288
289 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
290 item_size);
291 btrfs_free_path(path);
292
293 return ret;
294 }
295
296 EXPORT_FOR_TESTS
btrfs_insert_one_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_io_context * bioc)297 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
298 struct btrfs_io_context *bioc)
299 {
300 struct btrfs_fs_info *fs_info = trans->fs_info;
301 struct btrfs_key stripe_key;
302 struct btrfs_root *stripe_root = fs_info->stripe_root;
303 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
304 struct btrfs_stripe_extent *stripe_extent;
305 const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
306 int ret;
307
308 stripe_extent = kzalloc(item_size, GFP_NOFS);
309 if (!stripe_extent) {
310 btrfs_abort_transaction(trans, -ENOMEM);
311 btrfs_end_transaction(trans);
312 return -ENOMEM;
313 }
314
315 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
316 num_stripes);
317 for (int i = 0; i < num_stripes; i++) {
318 u64 devid = bioc->stripes[i].dev->devid;
319 u64 physical = bioc->stripes[i].physical;
320 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
321
322 btrfs_set_stack_raid_stride_devid(raid_stride, devid);
323 btrfs_set_stack_raid_stride_physical(raid_stride, physical);
324 }
325
326 stripe_key.objectid = bioc->logical;
327 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
328 stripe_key.offset = bioc->size;
329
330 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
331 item_size);
332 if (ret == -EEXIST) {
333 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
334 item_size);
335 if (ret)
336 btrfs_abort_transaction(trans, ret);
337 } else if (ret) {
338 btrfs_abort_transaction(trans, ret);
339 }
340
341 kfree(stripe_extent);
342
343 return ret;
344 }
345
btrfs_insert_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_ordered_extent * ordered_extent)346 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
347 struct btrfs_ordered_extent *ordered_extent)
348 {
349 struct btrfs_io_context *bioc;
350 int ret;
351
352 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
353 return 0;
354
355 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
356 ret = btrfs_insert_one_raid_extent(trans, bioc);
357 if (ret)
358 return ret;
359 }
360
361 while (!list_empty(&ordered_extent->bioc_list)) {
362 bioc = list_first_entry(&ordered_extent->bioc_list,
363 typeof(*bioc), rst_ordered_entry);
364 list_del(&bioc->rst_ordered_entry);
365 btrfs_put_bioc(bioc);
366 }
367
368 return 0;
369 }
370
btrfs_get_raid_extent_offset(struct btrfs_fs_info * fs_info,u64 logical,u64 * length,u64 map_type,u32 stripe_index,struct btrfs_io_stripe * stripe)371 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
372 u64 logical, u64 *length, u64 map_type,
373 u32 stripe_index, struct btrfs_io_stripe *stripe)
374 {
375 struct btrfs_root *stripe_root = fs_info->stripe_root;
376 struct btrfs_stripe_extent *stripe_extent;
377 struct btrfs_key stripe_key;
378 struct btrfs_key found_key;
379 struct btrfs_path *path;
380 struct extent_buffer *leaf;
381 const u64 end = logical + *length;
382 int num_stripes;
383 u64 offset;
384 u64 found_logical;
385 u64 found_length;
386 u64 found_end;
387 int slot;
388 int ret;
389
390 stripe_key.objectid = logical;
391 stripe_key.type = BTRFS_RAID_STRIPE_KEY;
392 stripe_key.offset = 0;
393
394 path = btrfs_alloc_path();
395 if (!path)
396 return -ENOMEM;
397
398 if (stripe->rst_search_commit_root) {
399 path->skip_locking = 1;
400 path->search_commit_root = 1;
401 }
402
403 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
404 if (ret < 0)
405 goto free_path;
406 if (ret) {
407 if (path->slots[0] != 0)
408 path->slots[0]--;
409 }
410
411 while (1) {
412 leaf = path->nodes[0];
413 slot = path->slots[0];
414
415 btrfs_item_key_to_cpu(leaf, &found_key, slot);
416 found_logical = found_key.objectid;
417 found_length = found_key.offset;
418 found_end = found_logical + found_length;
419
420 if (found_logical > end) {
421 ret = -ENODATA;
422 goto out;
423 }
424
425 if (in_range(logical, found_logical, found_length))
426 break;
427
428 ret = btrfs_next_item(stripe_root, path);
429 if (ret)
430 goto out;
431 }
432
433 offset = logical - found_logical;
434
435 /*
436 * If we have a logically contiguous, but physically non-continuous
437 * range, we need to split the bio. Record the length after which we
438 * must split the bio.
439 */
440 if (end > found_end)
441 *length -= end - found_end;
442
443 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
444 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
445
446 for (int i = 0; i < num_stripes; i++) {
447 struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
448 u64 devid = btrfs_raid_stride_devid(leaf, stride);
449 u64 physical = btrfs_raid_stride_physical(leaf, stride);
450
451 if (devid != stripe->dev->devid)
452 continue;
453
454 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
455 continue;
456
457 stripe->physical = physical + offset;
458
459 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
460 stripe->physical, devid);
461
462 ret = 0;
463 goto free_path;
464 }
465
466 /* If we're here, we haven't found the requested devid in the stripe. */
467 ret = -ENODATA;
468 out:
469 if (ret > 0)
470 ret = -ENODATA;
471 if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
472 btrfs_debug(fs_info,
473 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
474 logical, logical + *length, stripe->dev->devid,
475 btrfs_bg_type_to_raid_name(map_type));
476 }
477 free_path:
478 btrfs_free_path(path);
479
480 return ret;
481 }
482