xref: /linux/fs/btrfs/raid-stripe-tree.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/btrfs_tree.h>
7 #include "ctree.h"
8 #include "fs.h"
9 #include "accessors.h"
10 #include "transaction.h"
11 #include "disk-io.h"
12 #include "raid-stripe-tree.h"
13 #include "volumes.h"
14 #include "print-tree.h"
15 
16 static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 					       struct btrfs_path *path,
18 					       const struct btrfs_key *oldkey,
19 					       u64 newlen, u64 frontpad)
20 {
21 	struct btrfs_stripe_extent *extent;
22 	struct extent_buffer *leaf;
23 	int slot;
24 	size_t item_size;
25 	struct btrfs_key newkey = {
26 		.objectid = oldkey->objectid + frontpad,
27 		.type = BTRFS_RAID_STRIPE_KEY,
28 		.offset = newlen,
29 	};
30 
31 	ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
32 
33 	leaf = path->nodes[0];
34 	slot = path->slots[0];
35 	item_size = btrfs_item_size(leaf, slot);
36 	extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
37 
38 	for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
39 		struct btrfs_raid_stride *stride = &extent->strides[i];
40 		u64 phys;
41 
42 		phys = btrfs_raid_stride_physical(leaf, stride);
43 		btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad);
44 	}
45 
46 	btrfs_set_item_key_safe(trans, path, &newkey);
47 }
48 
49 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
50 {
51 	struct btrfs_fs_info *fs_info = trans->fs_info;
52 	struct btrfs_root *stripe_root = fs_info->stripe_root;
53 	struct btrfs_path *path;
54 	struct btrfs_key key;
55 	struct extent_buffer *leaf;
56 	u64 found_start;
57 	u64 found_end;
58 	u64 end = start + length;
59 	int slot;
60 	int ret;
61 
62 	if (!stripe_root)
63 		return 0;
64 
65 	path = btrfs_alloc_path();
66 	if (!path)
67 		return -ENOMEM;
68 
69 	while (1) {
70 		key.objectid = start;
71 		key.type = BTRFS_RAID_STRIPE_KEY;
72 		key.offset = 0;
73 
74 		ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
75 		if (ret < 0)
76 			break;
77 
78 		if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
79 			path->slots[0]--;
80 
81 		leaf = path->nodes[0];
82 		slot = path->slots[0];
83 		btrfs_item_key_to_cpu(leaf, &key, slot);
84 		found_start = key.objectid;
85 		found_end = found_start + key.offset;
86 		ret = 0;
87 
88 		if (key.type != BTRFS_RAID_STRIPE_KEY)
89 			break;
90 
91 		/* That stripe ends before we start, we're done. */
92 		if (found_end <= start)
93 			break;
94 
95 		trace_btrfs_raid_extent_delete(fs_info, start, end,
96 					       found_start, found_end);
97 
98 		/*
99 		 * The stripe extent starts before the range we want to delete:
100 		 *
101 		 * |--- RAID Stripe Extent ---|
102 		 * |--- keep  ---|--- drop ---|
103 		 *
104 		 * This means we have to duplicate the tree item, truncate the
105 		 * length to the new size and then re-insert the item.
106 		 */
107 		if (found_start < start) {
108 			u64 diff = start - found_start;
109 
110 			btrfs_partially_delete_raid_extent(trans, path, &key,
111 							   diff, 0);
112 			break;
113 		}
114 
115 		/*
116 		 * The stripe extent ends after the range we want to delete:
117 		 *
118 		 * |--- RAID Stripe Extent ---|
119 		 * |--- drop  ---|--- keep ---|
120 		 *
121 		 * This means we have to duplicate the tree item, truncate the
122 		 * length to the new size and then re-insert the item.
123 		 */
124 		if (found_end > end) {
125 			u64 diff = found_end - end;
126 
127 			btrfs_partially_delete_raid_extent(trans, path, &key,
128 							   diff, diff);
129 			break;
130 		}
131 
132 		ret = btrfs_del_item(trans, stripe_root, path);
133 		if (ret)
134 			break;
135 
136 		start += key.offset;
137 		length -= key.offset;
138 		if (length == 0)
139 			break;
140 
141 		btrfs_release_path(path);
142 	}
143 
144 	btrfs_free_path(path);
145 	return ret;
146 }
147 
148 static int update_raid_extent_item(struct btrfs_trans_handle *trans,
149 				   struct btrfs_key *key,
150 				   struct btrfs_stripe_extent *stripe_extent,
151 				   const size_t item_size)
152 {
153 	struct btrfs_path *path;
154 	struct extent_buffer *leaf;
155 	int ret;
156 	int slot;
157 
158 	path = btrfs_alloc_path();
159 	if (!path)
160 		return -ENOMEM;
161 
162 	ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
163 				0, 1);
164 	if (ret)
165 		return (ret == 1 ? ret : -EINVAL);
166 
167 	leaf = path->nodes[0];
168 	slot = path->slots[0];
169 
170 	write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
171 			    item_size);
172 	btrfs_mark_buffer_dirty(trans, leaf);
173 	btrfs_free_path(path);
174 
175 	return ret;
176 }
177 
178 EXPORT_FOR_TESTS
179 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
180 				 struct btrfs_io_context *bioc)
181 {
182 	struct btrfs_fs_info *fs_info = trans->fs_info;
183 	struct btrfs_key stripe_key;
184 	struct btrfs_root *stripe_root = fs_info->stripe_root;
185 	const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
186 	struct btrfs_stripe_extent *stripe_extent;
187 	const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
188 	int ret;
189 
190 	stripe_extent = kzalloc(item_size, GFP_NOFS);
191 	if (!stripe_extent) {
192 		btrfs_abort_transaction(trans, -ENOMEM);
193 		btrfs_end_transaction(trans);
194 		return -ENOMEM;
195 	}
196 
197 	trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
198 					   num_stripes);
199 	for (int i = 0; i < num_stripes; i++) {
200 		u64 devid = bioc->stripes[i].dev->devid;
201 		u64 physical = bioc->stripes[i].physical;
202 		u64 length = bioc->stripes[i].length;
203 		struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
204 
205 		if (length == 0)
206 			length = bioc->size;
207 
208 		btrfs_set_stack_raid_stride_devid(raid_stride, devid);
209 		btrfs_set_stack_raid_stride_physical(raid_stride, physical);
210 	}
211 
212 	stripe_key.objectid = bioc->logical;
213 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
214 	stripe_key.offset = bioc->size;
215 
216 	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
217 				item_size);
218 	if (ret == -EEXIST)
219 		ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
220 					      item_size);
221 	if (ret)
222 		btrfs_abort_transaction(trans, ret);
223 
224 	kfree(stripe_extent);
225 
226 	return ret;
227 }
228 
229 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
230 			     struct btrfs_ordered_extent *ordered_extent)
231 {
232 	struct btrfs_io_context *bioc;
233 	int ret;
234 
235 	if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
236 		return 0;
237 
238 	list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
239 		ret = btrfs_insert_one_raid_extent(trans, bioc);
240 		if (ret)
241 			return ret;
242 	}
243 
244 	while (!list_empty(&ordered_extent->bioc_list)) {
245 		bioc = list_first_entry(&ordered_extent->bioc_list,
246 					typeof(*bioc), rst_ordered_entry);
247 		list_del(&bioc->rst_ordered_entry);
248 		btrfs_put_bioc(bioc);
249 	}
250 
251 	return 0;
252 }
253 
254 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
255 				 u64 logical, u64 *length, u64 map_type,
256 				 u32 stripe_index, struct btrfs_io_stripe *stripe)
257 {
258 	struct btrfs_root *stripe_root = fs_info->stripe_root;
259 	struct btrfs_stripe_extent *stripe_extent;
260 	struct btrfs_key stripe_key;
261 	struct btrfs_key found_key;
262 	struct btrfs_path *path;
263 	struct extent_buffer *leaf;
264 	const u64 end = logical + *length;
265 	int num_stripes;
266 	u64 offset;
267 	u64 found_logical;
268 	u64 found_length;
269 	u64 found_end;
270 	int slot;
271 	int ret;
272 
273 	stripe_key.objectid = logical;
274 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
275 	stripe_key.offset = 0;
276 
277 	path = btrfs_alloc_path();
278 	if (!path)
279 		return -ENOMEM;
280 
281 	if (stripe->rst_search_commit_root) {
282 		path->skip_locking = 1;
283 		path->search_commit_root = 1;
284 	}
285 
286 	ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
287 	if (ret < 0)
288 		goto free_path;
289 	if (ret) {
290 		if (path->slots[0] != 0)
291 			path->slots[0]--;
292 	}
293 
294 	while (1) {
295 		leaf = path->nodes[0];
296 		slot = path->slots[0];
297 
298 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
299 		found_logical = found_key.objectid;
300 		found_length = found_key.offset;
301 		found_end = found_logical + found_length;
302 
303 		if (found_logical > end) {
304 			ret = -ENODATA;
305 			goto out;
306 		}
307 
308 		if (in_range(logical, found_logical, found_length))
309 			break;
310 
311 		ret = btrfs_next_item(stripe_root, path);
312 		if (ret)
313 			goto out;
314 	}
315 
316 	offset = logical - found_logical;
317 
318 	/*
319 	 * If we have a logically contiguous, but physically non-continuous
320 	 * range, we need to split the bio. Record the length after which we
321 	 * must split the bio.
322 	 */
323 	if (end > found_end)
324 		*length -= end - found_end;
325 
326 	num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
327 	stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
328 
329 	for (int i = 0; i < num_stripes; i++) {
330 		struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
331 		u64 devid = btrfs_raid_stride_devid(leaf, stride);
332 		u64 physical = btrfs_raid_stride_physical(leaf, stride);
333 
334 		if (devid != stripe->dev->devid)
335 			continue;
336 
337 		if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
338 			continue;
339 
340 		stripe->physical = physical + offset;
341 
342 		trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
343 						   stripe->physical, devid);
344 
345 		ret = 0;
346 		goto free_path;
347 	}
348 
349 	/* If we're here, we haven't found the requested devid in the stripe. */
350 	ret = -ENODATA;
351 out:
352 	if (ret > 0)
353 		ret = -ENODATA;
354 	if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
355 		btrfs_debug(fs_info,
356 		"cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
357 			  logical, logical + *length, stripe->dev->devid,
358 			  btrfs_bg_type_to_raid_name(map_type));
359 	}
360 free_path:
361 	btrfs_free_path(path);
362 
363 	return ret;
364 }
365