1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Western Digital Corporation or its affiliates. 4 */ 5 6 #include <linux/btrfs_tree.h> 7 #include "ctree.h" 8 #include "fs.h" 9 #include "accessors.h" 10 #include "transaction.h" 11 #include "disk-io.h" 12 #include "raid-stripe-tree.h" 13 #include "volumes.h" 14 #include "print-tree.h" 15 16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, 17 struct btrfs_path *path, 18 const struct btrfs_key *oldkey, 19 u64 newlen, u64 frontpad) 20 { 21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root; 22 struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem); 23 struct extent_buffer *leaf; 24 int slot; 25 size_t item_size; 26 struct btrfs_key newkey = { 27 .objectid = oldkey->objectid + frontpad, 28 .type = BTRFS_RAID_STRIPE_KEY, 29 .offset = newlen, 30 }; 31 int ret; 32 33 ASSERT(newlen > 0); 34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); 35 36 leaf = path->nodes[0]; 37 slot = path->slots[0]; 38 item_size = btrfs_item_size(leaf, slot); 39 40 newitem = kzalloc(item_size, GFP_NOFS); 41 if (!newitem) 42 return -ENOMEM; 43 44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 45 46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 47 struct btrfs_raid_stride *stride = &extent->strides[i]; 48 u64 devid; 49 u64 phys; 50 51 devid = btrfs_raid_stride_devid(leaf, stride); 52 btrfs_set_stack_raid_stride_devid(&newitem->strides[i], devid); 53 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad; 54 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys); 55 } 56 57 ret = btrfs_del_item(trans, stripe_root, path); 58 if (ret) 59 return ret; 60 61 btrfs_release_path(path); 62 return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); 63 } 64 65 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) 66 { 67 struct btrfs_fs_info *fs_info = trans->fs_info; 68 struct btrfs_root *stripe_root = fs_info->stripe_root; 69 BTRFS_PATH_AUTO_FREE(path); 70 struct btrfs_key key; 71 struct extent_buffer *leaf; 72 u64 found_start; 73 u64 found_end; 74 u64 end = start + length; 75 int slot; 76 int ret; 77 78 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root) 79 return 0; 80 81 if (!btrfs_is_testing(fs_info)) { 82 struct btrfs_chunk_map *map; 83 bool use_rst; 84 85 map = btrfs_find_chunk_map(fs_info, start, length); 86 if (!map) 87 return -EINVAL; 88 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type); 89 btrfs_free_chunk_map(map); 90 if (!use_rst) 91 return 0; 92 } 93 94 path = btrfs_alloc_path(); 95 if (!path) 96 return -ENOMEM; 97 98 while (1) { 99 key.objectid = start; 100 key.type = BTRFS_RAID_STRIPE_KEY; 101 key.offset = (u64)-1; 102 103 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); 104 if (ret < 0) 105 break; 106 107 /* 108 * Search with offset=(u64)-1 ensures we land on the correct 109 * leaf even when the target entry is the first item on a leaf. 110 * Since no real entry has offset=(u64)-1, ret is always 1 and 111 * slot points past the last entry with objectid==start (or 112 * past the end of the leaf if that entry is the last item). 113 * Back up one slot to find the actual entry. 114 */ 115 if (path->slots[0] == 0) { 116 /* No entry with objectid <= start exists. */ 117 ret = 0; 118 break; 119 } 120 path->slots[0]--; 121 122 leaf = path->nodes[0]; 123 slot = path->slots[0]; 124 btrfs_item_key_to_cpu(leaf, &key, slot); 125 found_start = key.objectid; 126 found_end = found_start + key.offset; 127 ret = 0; 128 129 /* 130 * The stripe extent starts before the range we want to delete, 131 * but the range spans more than one stripe extent: 132 * 133 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---| 134 * |--- keep ---|--- drop ---| 135 * 136 * This means we have to get the previous item, truncate its 137 * length and then restart the search. 138 */ 139 if (found_start > start) { 140 if (slot == 0) { 141 ret = btrfs_previous_item(stripe_root, path, 0, 142 BTRFS_RAID_STRIPE_KEY); 143 if (ret) { 144 if (ret > 0) 145 ret = -ENOENT; 146 break; 147 } 148 } else { 149 path->slots[0]--; 150 } 151 152 leaf = path->nodes[0]; 153 slot = path->slots[0]; 154 btrfs_item_key_to_cpu(leaf, &key, slot); 155 found_start = key.objectid; 156 found_end = found_start + key.offset; 157 if (found_start > start || found_end <= start) { 158 ret = -ENOENT; 159 break; 160 } 161 } 162 163 if (key.type != BTRFS_RAID_STRIPE_KEY) 164 break; 165 166 /* That stripe ends before we start, we're done. */ 167 if (found_end <= start) 168 break; 169 170 trace_btrfs_raid_extent_delete(fs_info, start, end, 171 found_start, found_end); 172 173 /* 174 * The stripe extent starts before the range we want to delete 175 * and ends after the range we want to delete, i.e. we're 176 * punching a hole in the stripe extent: 177 * 178 * |--- RAID Stripe Extent ---| 179 * | keep |--- drop ---| keep | 180 * 181 * This means we need to a) truncate the existing item and b) 182 * create a second item for the remaining range. 183 */ 184 if (found_start < start && found_end > end) { 185 size_t item_size; 186 u64 diff_start = start - found_start; 187 u64 diff_end = found_end - end; 188 struct btrfs_stripe_extent *extent; 189 struct btrfs_key newkey = { 190 .objectid = end, 191 .type = BTRFS_RAID_STRIPE_KEY, 192 .offset = diff_end, 193 }; 194 195 /* The "right" item. */ 196 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey); 197 if (ret == -EAGAIN) { 198 btrfs_release_path(path); 199 continue; 200 } 201 if (ret) 202 break; 203 204 /* 205 * btrfs_duplicate_item() may have triggered a leaf 206 * split via setup_leaf_for_split(), so we must refresh 207 * our leaf pointer from the path. 208 */ 209 leaf = path->nodes[0]; 210 item_size = btrfs_item_size(leaf, path->slots[0]); 211 extent = btrfs_item_ptr(leaf, path->slots[0], 212 struct btrfs_stripe_extent); 213 214 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 215 struct btrfs_raid_stride *stride = &extent->strides[i]; 216 u64 phys; 217 218 phys = btrfs_raid_stride_physical(leaf, stride); 219 phys += diff_start + length; 220 btrfs_set_raid_stride_physical(leaf, stride, phys); 221 } 222 223 /* The "left" item. */ 224 path->slots[0]--; 225 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 226 ret = btrfs_partially_delete_raid_extent(trans, path, 227 &key, 228 diff_start, 0); 229 break; 230 } 231 232 /* 233 * The stripe extent starts before the range we want to delete: 234 * 235 * |--- RAID Stripe Extent ---| 236 * |--- keep ---|--- drop ---| 237 * 238 * This means we have to duplicate the tree item, truncate the 239 * length to the new size and then re-insert the item. 240 */ 241 if (found_start < start) { 242 u64 diff_start = start - found_start; 243 244 ret = btrfs_partially_delete_raid_extent(trans, path, 245 &key, 246 diff_start, 0); 247 if (ret) 248 break; 249 250 start += (key.offset - diff_start); 251 length -= (key.offset - diff_start); 252 if (length == 0) 253 break; 254 255 btrfs_release_path(path); 256 continue; 257 } 258 259 /* 260 * The stripe extent ends after the range we want to delete: 261 * 262 * |--- RAID Stripe Extent ---| 263 * |--- drop ---|--- keep ---| 264 * 265 * This means we have to duplicate the tree item, truncate the 266 * length to the new size and then re-insert the item. 267 */ 268 if (found_end > end) { 269 u64 diff_end = found_end - end; 270 271 ret = btrfs_partially_delete_raid_extent(trans, path, 272 &key, 273 key.offset - length, 274 length); 275 ASSERT(key.offset - diff_end == length); 276 break; 277 } 278 279 /* Finally we can delete the whole item, no more special cases. */ 280 ret = btrfs_del_item(trans, stripe_root, path); 281 if (ret) 282 break; 283 284 start += key.offset; 285 length -= key.offset; 286 if (length == 0) 287 break; 288 289 btrfs_release_path(path); 290 } 291 292 return ret; 293 } 294 295 static int update_raid_extent_item(struct btrfs_trans_handle *trans, 296 struct btrfs_key *key, 297 struct btrfs_stripe_extent *stripe_extent, 298 const size_t item_size) 299 { 300 BTRFS_PATH_AUTO_FREE(path); 301 struct extent_buffer *leaf; 302 int ret; 303 int slot; 304 305 path = btrfs_alloc_path(); 306 if (!path) 307 return -ENOMEM; 308 309 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path, 310 0, 1); 311 if (ret) 312 return (ret == 1 ? ret : -EINVAL); 313 314 leaf = path->nodes[0]; 315 slot = path->slots[0]; 316 317 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot), 318 item_size); 319 320 return ret; 321 } 322 323 EXPORT_FOR_TESTS 324 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans, 325 struct btrfs_io_context *bioc) 326 { 327 struct btrfs_fs_info *fs_info = trans->fs_info; 328 struct btrfs_key stripe_key; 329 struct btrfs_root *stripe_root = fs_info->stripe_root; 330 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); 331 struct btrfs_stripe_extent AUTO_KFREE(stripe_extent); 332 const size_t item_size = struct_size(stripe_extent, strides, num_stripes); 333 int ret; 334 335 stripe_extent = kzalloc(item_size, GFP_NOFS); 336 if (unlikely(!stripe_extent)) { 337 btrfs_abort_transaction(trans, -ENOMEM); 338 btrfs_end_transaction(trans); 339 return -ENOMEM; 340 } 341 342 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size, 343 num_stripes); 344 for (int i = 0; i < num_stripes; i++) { 345 u64 devid = bioc->stripes[i].dev->devid; 346 u64 physical = bioc->stripes[i].physical; 347 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i]; 348 349 btrfs_set_stack_raid_stride_devid(raid_stride, devid); 350 btrfs_set_stack_raid_stride_physical(raid_stride, physical); 351 } 352 353 stripe_key.objectid = bioc->logical; 354 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 355 stripe_key.offset = bioc->size; 356 357 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent, 358 item_size); 359 if (ret == -EEXIST) { 360 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent, 361 item_size); 362 if (ret) 363 btrfs_abort_transaction(trans, ret); 364 } else if (ret) { 365 btrfs_abort_transaction(trans, ret); 366 } 367 368 return ret; 369 } 370 371 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, 372 struct btrfs_ordered_extent *ordered_extent) 373 { 374 struct btrfs_io_context *bioc; 375 int ret; 376 377 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE)) 378 return 0; 379 380 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) { 381 ret = btrfs_insert_one_raid_extent(trans, bioc); 382 if (ret) 383 return ret; 384 } 385 386 while (!list_empty(&ordered_extent->bioc_list)) { 387 bioc = list_first_entry(&ordered_extent->bioc_list, 388 typeof(*bioc), rst_ordered_entry); 389 list_del(&bioc->rst_ordered_entry); 390 btrfs_put_bioc(bioc); 391 } 392 393 return 0; 394 } 395 396 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, 397 u64 logical, u64 *length, u64 map_type, 398 u32 stripe_index, struct btrfs_io_stripe *stripe) 399 { 400 struct btrfs_root *stripe_root = fs_info->stripe_root; 401 struct btrfs_stripe_extent *stripe_extent; 402 struct btrfs_key stripe_key; 403 struct btrfs_key found_key; 404 BTRFS_PATH_AUTO_FREE(path); 405 struct extent_buffer *leaf; 406 const u64 end = logical + *length; 407 int num_stripes; 408 u64 offset; 409 u64 found_logical; 410 u64 found_length; 411 u64 found_end; 412 int slot; 413 int ret; 414 415 stripe_key.objectid = logical; 416 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 417 stripe_key.offset = 0; 418 419 path = btrfs_alloc_path(); 420 if (!path) 421 return -ENOMEM; 422 423 if (stripe->rst_search_commit_root) { 424 path->skip_locking = true; 425 path->search_commit_root = true; 426 } 427 428 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); 429 if (ret < 0) 430 return ret; 431 if (ret) { 432 if (path->slots[0] != 0) 433 path->slots[0]--; 434 } 435 436 while (1) { 437 leaf = path->nodes[0]; 438 slot = path->slots[0]; 439 440 btrfs_item_key_to_cpu(leaf, &found_key, slot); 441 found_logical = found_key.objectid; 442 found_length = found_key.offset; 443 found_end = found_logical + found_length; 444 445 if (found_logical > end) { 446 ret = -ENODATA; 447 goto out; 448 } 449 450 if (in_range(logical, found_logical, found_length)) 451 break; 452 453 ret = btrfs_next_item(stripe_root, path); 454 if (ret) 455 goto out; 456 } 457 458 offset = logical - found_logical; 459 460 /* 461 * If we have a logically contiguous, but physically non-continuous 462 * range, we need to split the bio. Record the length after which we 463 * must split the bio. 464 */ 465 if (end > found_end) 466 *length -= end - found_end; 467 468 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot)); 469 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 470 471 for (int i = 0; i < num_stripes; i++) { 472 struct btrfs_raid_stride *stride = &stripe_extent->strides[i]; 473 u64 devid = btrfs_raid_stride_devid(leaf, stride); 474 u64 physical = btrfs_raid_stride_physical(leaf, stride); 475 476 if (devid != stripe->dev->devid) 477 continue; 478 479 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i) 480 continue; 481 482 stripe->physical = physical + offset; 483 484 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length, 485 stripe->physical, devid); 486 487 return 0; 488 } 489 490 /* If we're here, we haven't found the requested devid in the stripe. */ 491 ret = -ENODATA; 492 out: 493 if (ret > 0) 494 ret = -ENODATA; 495 if (ret && ret != -EIO && !stripe->rst_search_commit_root) { 496 btrfs_debug(fs_info, 497 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s", 498 logical, logical + *length, stripe->dev->devid, 499 btrfs_bg_type_to_raid_name(map_type)); 500 } 501 502 return ret; 503 } 504