xref: /linux/fs/btrfs/raid-stripe-tree.c (revision 7696286034ac72cf9b46499be1715ac62fd302c3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/btrfs_tree.h>
7 #include "ctree.h"
8 #include "fs.h"
9 #include "accessors.h"
10 #include "transaction.h"
11 #include "disk-io.h"
12 #include "raid-stripe-tree.h"
13 #include "volumes.h"
14 #include "print-tree.h"
15 
16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
17 					       struct btrfs_path *path,
18 					       const struct btrfs_key *oldkey,
19 					       u64 newlen, u64 frontpad)
20 {
21 	struct btrfs_root *stripe_root = trans->fs_info->stripe_root;
22 	struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem);
23 	struct extent_buffer *leaf;
24 	int slot;
25 	size_t item_size;
26 	struct btrfs_key newkey = {
27 		.objectid = oldkey->objectid + frontpad,
28 		.type = BTRFS_RAID_STRIPE_KEY,
29 		.offset = newlen,
30 	};
31 	int ret;
32 
33 	ASSERT(newlen > 0);
34 	ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
35 
36 	leaf = path->nodes[0];
37 	slot = path->slots[0];
38 	item_size = btrfs_item_size(leaf, slot);
39 
40 	newitem = kzalloc(item_size, GFP_NOFS);
41 	if (!newitem)
42 		return -ENOMEM;
43 
44 	extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
45 
46 	for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
47 		struct btrfs_raid_stride *stride = &extent->strides[i];
48 		u64 phys;
49 
50 		phys = btrfs_raid_stride_physical(leaf, stride) + frontpad;
51 		btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys);
52 	}
53 
54 	ret = btrfs_del_item(trans, stripe_root, path);
55 	if (ret)
56 		return ret;
57 
58 	btrfs_release_path(path);
59 	return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size);
60 }
61 
62 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
63 {
64 	struct btrfs_fs_info *fs_info = trans->fs_info;
65 	struct btrfs_root *stripe_root = fs_info->stripe_root;
66 	BTRFS_PATH_AUTO_FREE(path);
67 	struct btrfs_key key;
68 	struct extent_buffer *leaf;
69 	u64 found_start;
70 	u64 found_end;
71 	u64 end = start + length;
72 	int slot;
73 	int ret;
74 
75 	if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root)
76 		return 0;
77 
78 	if (!btrfs_is_testing(fs_info)) {
79 		struct btrfs_chunk_map *map;
80 		bool use_rst;
81 
82 		map = btrfs_find_chunk_map(fs_info, start, length);
83 		if (!map)
84 			return -EINVAL;
85 		use_rst = btrfs_need_stripe_tree_update(fs_info, map->type);
86 		btrfs_free_chunk_map(map);
87 		if (!use_rst)
88 			return 0;
89 	}
90 
91 	path = btrfs_alloc_path();
92 	if (!path)
93 		return -ENOMEM;
94 
95 	while (1) {
96 		key.objectid = start;
97 		key.type = BTRFS_RAID_STRIPE_KEY;
98 		key.offset = 0;
99 
100 		ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
101 		if (ret < 0)
102 			break;
103 
104 		if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
105 			path->slots[0]--;
106 
107 		leaf = path->nodes[0];
108 		slot = path->slots[0];
109 		btrfs_item_key_to_cpu(leaf, &key, slot);
110 		found_start = key.objectid;
111 		found_end = found_start + key.offset;
112 		ret = 0;
113 
114 		/*
115 		 * The stripe extent starts before the range we want to delete,
116 		 * but the range spans more than one stripe extent:
117 		 *
118 		 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---|
119 		 *        |--- keep  ---|--- drop ---|
120 		 *
121 		 * This means we have to get the previous item, truncate its
122 		 * length and then restart the search.
123 		 */
124 		if (found_start > start) {
125 			if (slot == 0) {
126 				ret = btrfs_previous_item(stripe_root, path, start,
127 							  BTRFS_RAID_STRIPE_KEY);
128 				if (ret) {
129 					if (ret > 0)
130 						ret = -ENOENT;
131 					break;
132 				}
133 			} else {
134 				path->slots[0]--;
135 			}
136 
137 			leaf = path->nodes[0];
138 			slot = path->slots[0];
139 			btrfs_item_key_to_cpu(leaf, &key, slot);
140 			found_start = key.objectid;
141 			found_end = found_start + key.offset;
142 			ASSERT(found_start <= start);
143 		}
144 
145 		if (key.type != BTRFS_RAID_STRIPE_KEY)
146 			break;
147 
148 		/* That stripe ends before we start, we're done. */
149 		if (found_end <= start)
150 			break;
151 
152 		trace_btrfs_raid_extent_delete(fs_info, start, end,
153 					       found_start, found_end);
154 
155 		/*
156 		 * The stripe extent starts before the range we want to delete
157 		 * and ends after the range we want to delete, i.e. we're
158 		 * punching a hole in the stripe extent:
159 		 *
160 		 *  |--- RAID Stripe Extent ---|
161 		 *  | keep |--- drop ---| keep |
162 		 *
163 		 * This means we need to a) truncate the existing item and b)
164 		 * create a second item for the remaining range.
165 		 */
166 		if (found_start < start && found_end > end) {
167 			size_t item_size;
168 			u64 diff_start = start - found_start;
169 			u64 diff_end = found_end - end;
170 			struct btrfs_stripe_extent *extent;
171 			struct btrfs_key newkey = {
172 				.objectid = end,
173 				.type = BTRFS_RAID_STRIPE_KEY,
174 				.offset = diff_end,
175 			};
176 
177 			/* The "right" item. */
178 			ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey);
179 			if (ret)
180 				break;
181 
182 			item_size = btrfs_item_size(leaf, path->slots[0]);
183 			extent = btrfs_item_ptr(leaf, path->slots[0],
184 						struct btrfs_stripe_extent);
185 
186 			for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
187 				struct btrfs_raid_stride *stride = &extent->strides[i];
188 				u64 phys;
189 
190 				phys = btrfs_raid_stride_physical(leaf, stride);
191 				phys += diff_start + length;
192 				btrfs_set_raid_stride_physical(leaf, stride, phys);
193 			}
194 
195 			/* The "left" item. */
196 			path->slots[0]--;
197 			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
198 			btrfs_partially_delete_raid_extent(trans, path, &key,
199 							   diff_start, 0);
200 			break;
201 		}
202 
203 		/*
204 		 * The stripe extent starts before the range we want to delete:
205 		 *
206 		 * |--- RAID Stripe Extent ---|
207 		 * |--- keep  ---|--- drop ---|
208 		 *
209 		 * This means we have to duplicate the tree item, truncate the
210 		 * length to the new size and then re-insert the item.
211 		 */
212 		if (found_start < start) {
213 			u64 diff_start = start - found_start;
214 
215 			btrfs_partially_delete_raid_extent(trans, path, &key,
216 							   diff_start, 0);
217 
218 			start += (key.offset - diff_start);
219 			length -= (key.offset - diff_start);
220 			if (length == 0)
221 				break;
222 
223 			btrfs_release_path(path);
224 			continue;
225 		}
226 
227 		/*
228 		 * The stripe extent ends after the range we want to delete:
229 		 *
230 		 * |--- RAID Stripe Extent ---|
231 		 * |--- drop  ---|--- keep ---|
232 		 *
233 		 * This means we have to duplicate the tree item, truncate the
234 		 * length to the new size and then re-insert the item.
235 		 */
236 		if (found_end > end) {
237 			u64 diff_end = found_end - end;
238 
239 			btrfs_partially_delete_raid_extent(trans, path, &key,
240 							   key.offset - length,
241 							   length);
242 			ASSERT(key.offset - diff_end == length);
243 			break;
244 		}
245 
246 		/* Finally we can delete the whole item, no more special cases. */
247 		ret = btrfs_del_item(trans, stripe_root, path);
248 		if (ret)
249 			break;
250 
251 		start += key.offset;
252 		length -= key.offset;
253 		if (length == 0)
254 			break;
255 
256 		btrfs_release_path(path);
257 	}
258 
259 	return ret;
260 }
261 
262 static int update_raid_extent_item(struct btrfs_trans_handle *trans,
263 				   struct btrfs_key *key,
264 				   struct btrfs_stripe_extent *stripe_extent,
265 				   const size_t item_size)
266 {
267 	BTRFS_PATH_AUTO_FREE(path);
268 	struct extent_buffer *leaf;
269 	int ret;
270 	int slot;
271 
272 	path = btrfs_alloc_path();
273 	if (!path)
274 		return -ENOMEM;
275 
276 	ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path,
277 				0, 1);
278 	if (ret)
279 		return (ret == 1 ? ret : -EINVAL);
280 
281 	leaf = path->nodes[0];
282 	slot = path->slots[0];
283 
284 	write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
285 			    item_size);
286 
287 	return ret;
288 }
289 
290 EXPORT_FOR_TESTS
291 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
292 				 struct btrfs_io_context *bioc)
293 {
294 	struct btrfs_fs_info *fs_info = trans->fs_info;
295 	struct btrfs_key stripe_key;
296 	struct btrfs_root *stripe_root = fs_info->stripe_root;
297 	const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
298 	struct btrfs_stripe_extent AUTO_KFREE(stripe_extent);
299 	const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
300 	int ret;
301 
302 	stripe_extent = kzalloc(item_size, GFP_NOFS);
303 	if (!unlikely(stripe_extent)) {
304 		btrfs_abort_transaction(trans, -ENOMEM);
305 		btrfs_end_transaction(trans);
306 		return -ENOMEM;
307 	}
308 
309 	trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
310 					   num_stripes);
311 	for (int i = 0; i < num_stripes; i++) {
312 		u64 devid = bioc->stripes[i].dev->devid;
313 		u64 physical = bioc->stripes[i].physical;
314 		struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
315 
316 		btrfs_set_stack_raid_stride_devid(raid_stride, devid);
317 		btrfs_set_stack_raid_stride_physical(raid_stride, physical);
318 	}
319 
320 	stripe_key.objectid = bioc->logical;
321 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
322 	stripe_key.offset = bioc->size;
323 
324 	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
325 				item_size);
326 	if (ret == -EEXIST) {
327 		ret = update_raid_extent_item(trans, &stripe_key, stripe_extent,
328 					      item_size);
329 		if (ret)
330 			btrfs_abort_transaction(trans, ret);
331 	} else if (ret) {
332 		btrfs_abort_transaction(trans, ret);
333 	}
334 
335 	return ret;
336 }
337 
338 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
339 			     struct btrfs_ordered_extent *ordered_extent)
340 {
341 	struct btrfs_io_context *bioc;
342 	int ret;
343 
344 	if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
345 		return 0;
346 
347 	list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
348 		ret = btrfs_insert_one_raid_extent(trans, bioc);
349 		if (ret)
350 			return ret;
351 	}
352 
353 	while (!list_empty(&ordered_extent->bioc_list)) {
354 		bioc = list_first_entry(&ordered_extent->bioc_list,
355 					typeof(*bioc), rst_ordered_entry);
356 		list_del(&bioc->rst_ordered_entry);
357 		btrfs_put_bioc(bioc);
358 	}
359 
360 	return 0;
361 }
362 
363 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
364 				 u64 logical, u64 *length, u64 map_type,
365 				 u32 stripe_index, struct btrfs_io_stripe *stripe)
366 {
367 	struct btrfs_root *stripe_root = fs_info->stripe_root;
368 	struct btrfs_stripe_extent *stripe_extent;
369 	struct btrfs_key stripe_key;
370 	struct btrfs_key found_key;
371 	BTRFS_PATH_AUTO_FREE(path);
372 	struct extent_buffer *leaf;
373 	const u64 end = logical + *length;
374 	int num_stripes;
375 	u64 offset;
376 	u64 found_logical;
377 	u64 found_length;
378 	u64 found_end;
379 	int slot;
380 	int ret;
381 
382 	stripe_key.objectid = logical;
383 	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
384 	stripe_key.offset = 0;
385 
386 	path = btrfs_alloc_path();
387 	if (!path)
388 		return -ENOMEM;
389 
390 	if (stripe->rst_search_commit_root) {
391 		path->skip_locking = true;
392 		path->search_commit_root = true;
393 	}
394 
395 	ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
396 	if (ret < 0)
397 		return ret;
398 	if (ret) {
399 		if (path->slots[0] != 0)
400 			path->slots[0]--;
401 	}
402 
403 	while (1) {
404 		leaf = path->nodes[0];
405 		slot = path->slots[0];
406 
407 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
408 		found_logical = found_key.objectid;
409 		found_length = found_key.offset;
410 		found_end = found_logical + found_length;
411 
412 		if (found_logical > end) {
413 			ret = -ENODATA;
414 			goto out;
415 		}
416 
417 		if (in_range(logical, found_logical, found_length))
418 			break;
419 
420 		ret = btrfs_next_item(stripe_root, path);
421 		if (ret)
422 			goto out;
423 	}
424 
425 	offset = logical - found_logical;
426 
427 	/*
428 	 * If we have a logically contiguous, but physically non-continuous
429 	 * range, we need to split the bio. Record the length after which we
430 	 * must split the bio.
431 	 */
432 	if (end > found_end)
433 		*length -= end - found_end;
434 
435 	num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
436 	stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
437 
438 	for (int i = 0; i < num_stripes; i++) {
439 		struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
440 		u64 devid = btrfs_raid_stride_devid(leaf, stride);
441 		u64 physical = btrfs_raid_stride_physical(leaf, stride);
442 
443 		if (devid != stripe->dev->devid)
444 			continue;
445 
446 		if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i)
447 			continue;
448 
449 		stripe->physical = physical + offset;
450 
451 		trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
452 						   stripe->physical, devid);
453 
454 		return 0;
455 	}
456 
457 	/* If we're here, we haven't found the requested devid in the stripe. */
458 	ret = -ENODATA;
459 out:
460 	if (ret > 0)
461 		ret = -ENODATA;
462 	if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
463 		btrfs_debug(fs_info,
464 		"cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
465 			  logical, logical + *length, stripe->dev->devid,
466 			  btrfs_bg_type_to_raid_name(map_type));
467 	}
468 
469 	return ret;
470 }
471