xref: /linux/fs/btrfs/raid-stripe-tree.c (revision 0eb4aaa230d725fa9b1cd758c0f17abca5597af6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/btrfs_tree.h>
7 #include "ctree.h"
8 #include "fs.h"
9 #include "accessors.h"
10 #include "transaction.h"
11 #include "disk-io.h"
12 #include "raid-stripe-tree.h"
13 #include "volumes.h"
14 #include "print-tree.h"
15 
btrfs_partially_delete_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_path * path,const struct btrfs_key * oldkey,u64 newlen,u64 frontpad)16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 					       struct btrfs_path *path,
18 					       const struct btrfs_key *oldkey,
19 					       u64 newlen, u64 frontpad)
20 {
21 	struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
22 	struct btrfs_stripe_extent *extent, *newitem;
23 	struct extent_buffer *leaf;
24 	int slot;
25 	size_t item_size;
26 	struct btrfs_key newkey = {
27 		.objectid = oldkey->objectid + frontpad,
28 		.type = BTRFS_RAID_STRIPE_KEY,
29 		.offset = newlen,
30 	};
31 	int ret;
32 
33 	ASSERT(newlen > 0);
34 	ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
35 
36 	leaf = path->nodes[0];
37 	slot = path->slots[0];
38 	item_size = btrfs_item_size(leaf, slot);
39 
40 	newitem = kzalloc(item_size, GFP_NOFS);
41 	if (!newitem)
42 		return -ENOMEM;
43 
44 	extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
45 
46 	for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
47 		struct btrfs_raid_stride *stride = &extent->strides[i];
48 		u64 phys;
49 
50 		phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
51 		btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
52 	}
53 
54 	ret = btrfs_del_item(trans, stripe_root, path);
55 	if (ret)
56 		goto out;
57 
58 	btrfs_release_path(path);
59 	ret = btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
60 
61 out:
62 	kfree(newitem);
63 	return ret;
64 }
65 
btrfs_delete_raid_extent(struct btrfs_trans_handle * trans,u64 start,u64 length)66 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
67 {
68 	struct btrfs_fs_info *fs_info = trans->fs_info;
69 	struct btrfs_root *stripe_root = fs_info->stripe_root;
70 	struct btrfs_path *path;
71 	struct btrfs_key key;
72 	struct extent_buffer *leaf;
73 	u64 found_start;
74 	u64 found_end;
75 	u64 end = start + length;
76 	int slot;
77 	int ret;
78 
79 	if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
80 		return 0;
81 
82 	if (!btrfs_is_testing(fs_info)) {
83 		struct btrfs_chunk_map *map;
84 		bool use_rst;
85 
86 		map = btrfs_find_chunk_map(fs_info, start, length);
87 		if (!map)
88 			return -EINVAL;
89 		use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
90 		btrfs_free_chunk_map(map);
91 		if (!use_rst)
92 			return 0;
93 	}
94 
95 	path = btrfs_alloc_path();
96 	if (!path)
97 		return -ENOMEM;
98 
99 	while (1) {
100 		key.objectid = start;
101 		key.type = BTRFS_RAID_STRIPE_KEY;
102 		key.offset = 0;
103 
104 		ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
105 		if (ret < 0)
106 			break;
107 
108 		if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
109 			path->slots[0]--;
110 
111 		leaf = path->nodes[0];
112 		slot = path->slots[0];
113 		btrfs_item_key_to_cpu(leaf, &key, slot);
114 		found_start = key.objectid;
115 		found_end = found_start + key.offset;
116 		ret = 0;
117 
118 		/*
119 		 * The stripe extent starts before the range we want to delete,
120 		 * but the range spans more than one stripe extent:
121 		 *
122 		 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
123 		 *        |--- keep  ---|--- drop ---|
124 		 *
125 		 * This means we have to get the previous item, truncate its
126 		 * length and then restart the search.
127 		 */
128 		if (found_start > start) {
129 			if (slot == 0) {
130 				ret = btrfs_previous_item(stripe_root, path, start,
131 							  BTRFS_RAID_STRIPE_KEY);
132 				if (ret) {
133 					if (ret > 0)
134 						ret = -ENOENT;
135 					break;
136 				}
137 			} else {
138 				path->slots[0]--;
139 			}
140 
141 			leaf = path->nodes[0];
142 			slot = path->slots[0];
143 			btrfs_item_key_to_cpu(leaf, &key, slot);
144 			found_start = key.objectid;
145 			found_end = found_start + key.offset;
146 			ASSERT(found_start <= start);
147 		}
148 
149 		if (key.type != BTRFS_RAID_STRIPE_KEY)
150 			break;
151 
152 		/* That stripe ends before we start, we're done. */
153 		if (found_end <= start)
154 			break;
155 
156 		trace_btrfs_raid_extent_delete(fs_info, start, end,
157 					       found_start, found_end);
158 
159 		/*
160 		 * The stripe extent starts before the range we want to delete
161 		 * and ends after the range we want to delete, i.e. we're
162 		 * punching a hole in the stripe extent:
163 		 *
164 		 *  |--- RAID Stripe Extent ---|
165 		 *  | keep |--- drop ---| keep |
166 		 *
167 		 * This means we need to a) truncate the existing item and b)
168 		 * create a second item for the remaining range.
169 		 */
170 		if (found_start < start && found_end > end) {
171 			size_t item_size;
172 			u64 diff_start = start - found_start;
173 			u64 diff_end = found_end - end;
174 			struct btrfs_stripe_extent *extent;
175 			struct btrfs_key newkey = {
176 				.objectid = end,
177 				.type = BTRFS_RAID_STRIPE_KEY,
178 				.offset = diff_end,
179 			};
180 
181 			/* The "right" item. */
182 			ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
183 			if (ret)
184 				break;
185 
186 			item_size = btrfs_item_size(leaf, path->slots[0]);
187 			extent = btrfs_item_ptr(leaf, path->slots[0],
188 						struct btrfs_stripe_extent);
189 
190 			for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
191 				struct btrfs_raid_stride *stride = &extent->strides[i];
192 				u64 phys;
193 
194 				phys = btrfs_raid_stride_physical(leaf, stride);
195 				phys += diff_start + length;
196 				btrfs_set_raid_stride_physical(leaf, stride, phys);
197 			}
198 
199 			/* The "left" item. */
200 			path->slots[0]--;
201 			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
202 			btrfs_partially_delete_raid_extent(trans, path, &key,
203 							   diff_start, 0);
204 			break;
205 		}
206 
207 		/*
208 		 * The stripe extent starts before the range we want to delete:
209 		 *
210 		 * |--- RAID Stripe Extent ---|
211 		 * |--- keep  ---|--- drop ---|
212 		 *
213 		 * This means we have to duplicate the tree item, truncate the
214 		 * length to the new size and then re-insert the item.
215 		 */
216 		if (found_start < start) {
217 			u64 diff_start = start - found_start;
218 
219 			btrfs_partially_delete_raid_extent(trans, path, &key,
220 							   diff_start, 0);
221 
222 			start += (key.offset - diff_start);
223 			length -= (key.offset - diff_start);
224 			if (length == 0)
225 				break;
226 
227 			btrfs_release_path(path);
228 			continue;
229 		}
230 
231 		/*
232 		 * The stripe extent ends after the range we want to delete:
233 		 *
234 		 * |--- RAID Stripe Extent ---|
235 		 * |--- drop  ---|--- keep ---|
236 		 *
237 		 * This means we have to duplicate the tree item, truncate the
238 		 * length to the new size and then re-insert the item.
239 		 */
240 		if (found_end > end) {
241 			u64 diff_end = found_end - end;
242 
243 			btrfs_partially_delete_raid_extent(trans, path, &key,
244 							   key.offset - length,
245 							   length);
246 			ASSERT(key.offset - diff_end == length);
247 			break;
248 		}
249 
250 		/* Finally we can delete the whole item, no more special cases. */
251 		ret = btrfs_del_item(trans, stripe_root, path);
252 		if (ret)
253 			break;
254 
255 		start += key.offset;
256 		length -= key.offset;
257 		if (length == 0)
258 			break;
259 
260 		btrfs_release_path(path);
261 	}
262 
263 	btrfs_free_path(path);
264 	return ret;
265 }
266 
update_raid_extent_item(struct btrfs_trans_handle * trans,struct btrfs_key * key,struct btrfs_stripe_extent * stripe_extent,const size_t item_size)267 static int update_raid_extent_item(struct btrfs_trans_handle *trans,
268 				   struct btrfs_key *key,
269 				   struct btrfs_stripe_extent *stripe_extent,
270 				   const size_t item_size)
271 {
272 	struct btrfs_path *path;
273 	struct extent_buffer *leaf;
274 	int ret;
275 	int slot;
276 
277 	path = btrfs_alloc_path();
278 	if (!path)
279 		return -ENOMEM;
280 
281 	ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
282 				0, 1);
283 	if (ret)
284 		return (ret == 1 ? ret : -EINVAL);
285 
286 	leaf = path->nodes[0];
287 	slot = path->slots[0];
288 
289 	write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
290 			    item_size);
291 	btrfs_free_path(path);
292 
293 	return ret;
294 }
295 
296 EXPORT_FOR_TESTS
btrfs_insert_one_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_io_context * bioc)297 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
298 				 struct btrfs_io_context *bioc)
299 {
300 	struct btrfs_fs_info *fs_info = trans->fs_info;
301 	struct btrfs_key stripe_key;
302 	struct btrfs_root *stripe_root = fs_info->stripe_root;
303 	const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
304 	struct btrfs_stripe_extent *stripe_extent;
305 	const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
306 	int ret;
307 
308 	stripe_extent = kzalloc(item_size, GFP_NOFS);
309 	if (!stripe_extent) {
310 		btrfs_abort_transaction(trans, -ENOMEM);
311 		btrfs_end_transaction(trans);
312 		return -ENOMEM;
313 	}
314 
315 	trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
316 					   num_stripes);
317 	for (int i = 0; i < num_stripes; i++) {
318 		u64 devid = bioc->stripes[i].dev->devid;
319 		u64 physical = bioc->stripes[i].physical;
320 		struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
321 
322 		btrfs_set_stack_raid_stride_devid(raid_stride, devid);
323 		btrfs_set_stack_raid_stride_physical(raid_stride, physical);
324 	}
325 
326 	stripe_key.objectid = bioc->logical;
327 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
328 	stripe_key.offset = bioc->size;
329 
330 	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
331 				item_size);
332 	if (ret == -EEXIST)
333 		ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
334 					      item_size);
335 	if (ret)
336 		btrfs_abort_transaction(trans, ret);
337 
338 	kfree(stripe_extent);
339 
340 	return ret;
341 }
342 
btrfs_insert_raid_extent(struct btrfs_trans_handle * trans,struct btrfs_ordered_extent * ordered_extent)343 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
344 			     struct btrfs_ordered_extent *ordered_extent)
345 {
346 	struct btrfs_io_context *bioc;
347 	int ret;
348 
349 	if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
350 		return 0;
351 
352 	list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
353 		ret = btrfs_insert_one_raid_extent(trans, bioc);
354 		if (ret)
355 			return ret;
356 	}
357 
358 	while (!list_empty(&ordered_extent->bioc_list)) {
359 		bioc = list_first_entry(&ordered_extent->bioc_list,
360 					typeof(*bioc), rst_ordered_entry);
361 		list_del(&bioc->rst_ordered_entry);
362 		btrfs_put_bioc(bioc);
363 	}
364 
365 	return 0;
366 }
367 
btrfs_get_raid_extent_offset(struct btrfs_fs_info * fs_info,u64 logical,u64 * length,u64 map_type,u32 stripe_index,struct btrfs_io_stripe * stripe)368 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
369 				 u64 logical, u64 *length, u64 map_type,
370 				 u32 stripe_index, struct btrfs_io_stripe *stripe)
371 {
372 	struct btrfs_root *stripe_root = fs_info->stripe_root;
373 	struct btrfs_stripe_extent *stripe_extent;
374 	struct btrfs_key stripe_key;
375 	struct btrfs_key found_key;
376 	struct btrfs_path *path;
377 	struct extent_buffer *leaf;
378 	const u64 end = logical + *length;
379 	int num_stripes;
380 	u64 offset;
381 	u64 found_logical;
382 	u64 found_length;
383 	u64 found_end;
384 	int slot;
385 	int ret;
386 
387 	stripe_key.objectid = logical;
388 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
389 	stripe_key.offset = 0;
390 
391 	path = btrfs_alloc_path();
392 	if (!path)
393 		return -ENOMEM;
394 
395 	if (stripe->rst_search_commit_root) {
396 		path->skip_locking = 1;
397 		path->search_commit_root = 1;
398 	}
399 
400 	ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
401 	if (ret < 0)
402 		goto free_path;
403 	if (ret) {
404 		if (path->slots[0] != 0)
405 			path->slots[0]--;
406 	}
407 
408 	while (1) {
409 		leaf = path->nodes[0];
410 		slot = path->slots[0];
411 
412 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
413 		found_logical = found_key.objectid;
414 		found_length = found_key.offset;
415 		found_end = found_logical + found_length;
416 
417 		if (found_logical > end) {
418 			ret = -ENODATA;
419 			goto out;
420 		}
421 
422 		if (in_range(logical, found_logical, found_length))
423 			break;
424 
425 		ret = btrfs_next_item(stripe_root, path);
426 		if (ret)
427 			goto out;
428 	}
429 
430 	offset = logical - found_logical;
431 
432 	/*
433 	 * If we have a logically contiguous, but physically non-continuous
434 	 * range, we need to split the bio. Record the length after which we
435 	 * must split the bio.
436 	 */
437 	if (end > found_end)
438 		*length -= end - found_end;
439 
440 	num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
441 	stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
442 
443 	for (int i = 0; i < num_stripes; i++) {
444 		struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
445 		u64 devid = btrfs_raid_stride_devid(leaf, stride);
446 		u64 physical = btrfs_raid_stride_physical(leaf, stride);
447 
448 		if (devid != stripe->dev->devid)
449 			continue;
450 
451 		if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
452 			continue;
453 
454 		stripe->physical = physical + offset;
455 
456 		trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
457 						   stripe->physical, devid);
458 
459 		ret = 0;
460 		goto free_path;
461 	}
462 
463 	/* If we're here, we haven't found the requested devid in the stripe. */
464 	ret = -ENODATA;
465 out:
466 	if (ret > 0)
467 		ret = -ENODATA;
468 	if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
469 		btrfs_debug(fs_info,
470 		"cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
471 			  logical, logical + *length, stripe->dev->devid,
472 			  btrfs_bg_type_to_raid_name(map_type));
473 	}
474 free_path:
475 	btrfs_free_path(path);
476 
477 	return ret;
478 }
479