1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2023 Western Digital Corporation or its affiliates. 4 */ 5 6 #include <linux/btrfs_tree.h> 7 #include "ctree.h" 8 #include "fs.h" 9 #include "accessors.h" 10 #include "transaction.h" 11 #include "disk-io.h" 12 #include "raid-stripe-tree.h" 13 #include "volumes.h" 14 #include "print-tree.h" 15 16 static int btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans, 17 struct btrfs_path *path, 18 const struct btrfs_key *oldkey, 19 u64 newlen, u64 frontpad) 20 { 21 struct btrfs_root *stripe_root = trans->fs_info->stripe_root; 22 struct btrfs_stripe_extent *extent, AUTO_KFREE(newitem); 23 struct extent_buffer *leaf; 24 int slot; 25 size_t item_size; 26 struct btrfs_key newkey = { 27 .objectid = oldkey->objectid + frontpad, 28 .type = BTRFS_RAID_STRIPE_KEY, 29 .offset = newlen, 30 }; 31 int ret; 32 33 ASSERT(newlen > 0); 34 ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY); 35 36 leaf = path->nodes[0]; 37 slot = path->slots[0]; 38 item_size = btrfs_item_size(leaf, slot); 39 40 newitem = kzalloc(item_size, GFP_NOFS); 41 if (!newitem) 42 return -ENOMEM; 43 44 extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 45 46 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 47 struct btrfs_raid_stride *stride = &extent->strides[i]; 48 u64 phys; 49 50 phys = btrfs_raid_stride_physical(leaf, stride) + frontpad; 51 btrfs_set_stack_raid_stride_physical(&newitem->strides[i], phys); 52 } 53 54 ret = btrfs_del_item(trans, stripe_root, path); 55 if (ret) 56 return ret; 57 58 btrfs_release_path(path); 59 return btrfs_insert_item(trans, stripe_root, &newkey, newitem, item_size); 60 } 61 62 int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length) 63 { 64 struct btrfs_fs_info *fs_info = trans->fs_info; 65 struct btrfs_root *stripe_root = fs_info->stripe_root; 66 BTRFS_PATH_AUTO_FREE(path); 67 struct btrfs_key key; 68 struct extent_buffer *leaf; 69 u64 found_start; 70 u64 found_end; 71 u64 end = start + length; 72 int slot; 73 int ret; 74 75 if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE) || !stripe_root) 76 return 0; 77 78 if (!btrfs_is_testing(fs_info)) { 79 struct btrfs_chunk_map *map; 80 bool use_rst; 81 82 map = btrfs_find_chunk_map(fs_info, start, length); 83 if (!map) 84 return -EINVAL; 85 use_rst = btrfs_need_stripe_tree_update(fs_info, map->type); 86 btrfs_free_chunk_map(map); 87 if (!use_rst) 88 return 0; 89 } 90 91 path = btrfs_alloc_path(); 92 if (!path) 93 return -ENOMEM; 94 95 while (1) { 96 key.objectid = start; 97 key.type = BTRFS_RAID_STRIPE_KEY; 98 key.offset = 0; 99 100 ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1); 101 if (ret < 0) 102 break; 103 104 if (path->slots[0] == btrfs_header_nritems(path->nodes[0])) 105 path->slots[0]--; 106 107 leaf = path->nodes[0]; 108 slot = path->slots[0]; 109 btrfs_item_key_to_cpu(leaf, &key, slot); 110 found_start = key.objectid; 111 found_end = found_start + key.offset; 112 ret = 0; 113 114 /* 115 * The stripe extent starts before the range we want to delete, 116 * but the range spans more than one stripe extent: 117 * 118 * |--- RAID Stripe Extent ---||--- RAID Stripe Extent ---| 119 * |--- keep ---|--- drop ---| 120 * 121 * This means we have to get the previous item, truncate its 122 * length and then restart the search. 123 */ 124 if (found_start > start) { 125 if (slot == 0) { 126 ret = btrfs_previous_item(stripe_root, path, start, 127 BTRFS_RAID_STRIPE_KEY); 128 if (ret) { 129 if (ret > 0) 130 ret = -ENOENT; 131 break; 132 } 133 } else { 134 path->slots[0]--; 135 } 136 137 leaf = path->nodes[0]; 138 slot = path->slots[0]; 139 btrfs_item_key_to_cpu(leaf, &key, slot); 140 found_start = key.objectid; 141 found_end = found_start + key.offset; 142 ASSERT(found_start <= start); 143 } 144 145 if (key.type != BTRFS_RAID_STRIPE_KEY) 146 break; 147 148 /* That stripe ends before we start, we're done. */ 149 if (found_end <= start) 150 break; 151 152 trace_btrfs_raid_extent_delete(fs_info, start, end, 153 found_start, found_end); 154 155 /* 156 * The stripe extent starts before the range we want to delete 157 * and ends after the range we want to delete, i.e. we're 158 * punching a hole in the stripe extent: 159 * 160 * |--- RAID Stripe Extent ---| 161 * | keep |--- drop ---| keep | 162 * 163 * This means we need to a) truncate the existing item and b) 164 * create a second item for the remaining range. 165 */ 166 if (found_start < start && found_end > end) { 167 size_t item_size; 168 u64 diff_start = start - found_start; 169 u64 diff_end = found_end - end; 170 struct btrfs_stripe_extent *extent; 171 struct btrfs_key newkey = { 172 .objectid = end, 173 .type = BTRFS_RAID_STRIPE_KEY, 174 .offset = diff_end, 175 }; 176 177 /* The "right" item. */ 178 ret = btrfs_duplicate_item(trans, stripe_root, path, &newkey); 179 if (ret) 180 break; 181 182 item_size = btrfs_item_size(leaf, path->slots[0]); 183 extent = btrfs_item_ptr(leaf, path->slots[0], 184 struct btrfs_stripe_extent); 185 186 for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) { 187 struct btrfs_raid_stride *stride = &extent->strides[i]; 188 u64 phys; 189 190 phys = btrfs_raid_stride_physical(leaf, stride); 191 phys += diff_start + length; 192 btrfs_set_raid_stride_physical(leaf, stride, phys); 193 } 194 195 /* The "left" item. */ 196 path->slots[0]--; 197 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 198 btrfs_partially_delete_raid_extent(trans, path, &key, 199 diff_start, 0); 200 break; 201 } 202 203 /* 204 * The stripe extent starts before the range we want to delete: 205 * 206 * |--- RAID Stripe Extent ---| 207 * |--- keep ---|--- drop ---| 208 * 209 * This means we have to duplicate the tree item, truncate the 210 * length to the new size and then re-insert the item. 211 */ 212 if (found_start < start) { 213 u64 diff_start = start - found_start; 214 215 btrfs_partially_delete_raid_extent(trans, path, &key, 216 diff_start, 0); 217 218 start += (key.offset - diff_start); 219 length -= (key.offset - diff_start); 220 if (length == 0) 221 break; 222 223 btrfs_release_path(path); 224 continue; 225 } 226 227 /* 228 * The stripe extent ends after the range we want to delete: 229 * 230 * |--- RAID Stripe Extent ---| 231 * |--- drop ---|--- keep ---| 232 * 233 * This means we have to duplicate the tree item, truncate the 234 * length to the new size and then re-insert the item. 235 */ 236 if (found_end > end) { 237 u64 diff_end = found_end - end; 238 239 btrfs_partially_delete_raid_extent(trans, path, &key, 240 key.offset - length, 241 length); 242 ASSERT(key.offset - diff_end == length); 243 break; 244 } 245 246 /* Finally we can delete the whole item, no more special cases. */ 247 ret = btrfs_del_item(trans, stripe_root, path); 248 if (ret) 249 break; 250 251 start += key.offset; 252 length -= key.offset; 253 if (length == 0) 254 break; 255 256 btrfs_release_path(path); 257 } 258 259 return ret; 260 } 261 262 static int update_raid_extent_item(struct btrfs_trans_handle *trans, 263 struct btrfs_key *key, 264 struct btrfs_stripe_extent *stripe_extent, 265 const size_t item_size) 266 { 267 BTRFS_PATH_AUTO_FREE(path); 268 struct extent_buffer *leaf; 269 int ret; 270 int slot; 271 272 path = btrfs_alloc_path(); 273 if (!path) 274 return -ENOMEM; 275 276 ret = btrfs_search_slot(trans, trans->fs_info->stripe_root, key, path, 277 0, 1); 278 if (ret) 279 return (ret == 1 ? ret : -EINVAL); 280 281 leaf = path->nodes[0]; 282 slot = path->slots[0]; 283 284 write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot), 285 item_size); 286 287 return ret; 288 } 289 290 EXPORT_FOR_TESTS 291 int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans, 292 struct btrfs_io_context *bioc) 293 { 294 struct btrfs_fs_info *fs_info = trans->fs_info; 295 struct btrfs_key stripe_key; 296 struct btrfs_root *stripe_root = fs_info->stripe_root; 297 const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); 298 struct btrfs_stripe_extent AUTO_KFREE(stripe_extent); 299 const size_t item_size = struct_size(stripe_extent, strides, num_stripes); 300 int ret; 301 302 stripe_extent = kzalloc(item_size, GFP_NOFS); 303 if (!unlikely(stripe_extent)) { 304 btrfs_abort_transaction(trans, -ENOMEM); 305 btrfs_end_transaction(trans); 306 return -ENOMEM; 307 } 308 309 trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size, 310 num_stripes); 311 for (int i = 0; i < num_stripes; i++) { 312 u64 devid = bioc->stripes[i].dev->devid; 313 u64 physical = bioc->stripes[i].physical; 314 struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i]; 315 316 btrfs_set_stack_raid_stride_devid(raid_stride, devid); 317 btrfs_set_stack_raid_stride_physical(raid_stride, physical); 318 } 319 320 stripe_key.objectid = bioc->logical; 321 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 322 stripe_key.offset = bioc->size; 323 324 ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent, 325 item_size); 326 if (ret == -EEXIST) { 327 ret = update_raid_extent_item(trans, &stripe_key, stripe_extent, 328 item_size); 329 if (ret) 330 btrfs_abort_transaction(trans, ret); 331 } else if (ret) { 332 btrfs_abort_transaction(trans, ret); 333 } 334 335 return ret; 336 } 337 338 int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, 339 struct btrfs_ordered_extent *ordered_extent) 340 { 341 struct btrfs_io_context *bioc; 342 int ret; 343 344 if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE)) 345 return 0; 346 347 list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) { 348 ret = btrfs_insert_one_raid_extent(trans, bioc); 349 if (ret) 350 return ret; 351 } 352 353 while (!list_empty(&ordered_extent->bioc_list)) { 354 bioc = list_first_entry(&ordered_extent->bioc_list, 355 typeof(*bioc), rst_ordered_entry); 356 list_del(&bioc->rst_ordered_entry); 357 btrfs_put_bioc(bioc); 358 } 359 360 return 0; 361 } 362 363 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, 364 u64 logical, u64 *length, u64 map_type, 365 u32 stripe_index, struct btrfs_io_stripe *stripe) 366 { 367 struct btrfs_root *stripe_root = fs_info->stripe_root; 368 struct btrfs_stripe_extent *stripe_extent; 369 struct btrfs_key stripe_key; 370 struct btrfs_key found_key; 371 BTRFS_PATH_AUTO_FREE(path); 372 struct extent_buffer *leaf; 373 const u64 end = logical + *length; 374 int num_stripes; 375 u64 offset; 376 u64 found_logical; 377 u64 found_length; 378 u64 found_end; 379 int slot; 380 int ret; 381 382 stripe_key.objectid = logical; 383 stripe_key.type = BTRFS_RAID_STRIPE_KEY; 384 stripe_key.offset = 0; 385 386 path = btrfs_alloc_path(); 387 if (!path) 388 return -ENOMEM; 389 390 if (stripe->rst_search_commit_root) { 391 path->skip_locking = true; 392 path->search_commit_root = true; 393 } 394 395 ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); 396 if (ret < 0) 397 return ret; 398 if (ret) { 399 if (path->slots[0] != 0) 400 path->slots[0]--; 401 } 402 403 while (1) { 404 leaf = path->nodes[0]; 405 slot = path->slots[0]; 406 407 btrfs_item_key_to_cpu(leaf, &found_key, slot); 408 found_logical = found_key.objectid; 409 found_length = found_key.offset; 410 found_end = found_logical + found_length; 411 412 if (found_logical > end) { 413 ret = -ENODATA; 414 goto out; 415 } 416 417 if (in_range(logical, found_logical, found_length)) 418 break; 419 420 ret = btrfs_next_item(stripe_root, path); 421 if (ret) 422 goto out; 423 } 424 425 offset = logical - found_logical; 426 427 /* 428 * If we have a logically contiguous, but physically non-continuous 429 * range, we need to split the bio. Record the length after which we 430 * must split the bio. 431 */ 432 if (end > found_end) 433 *length -= end - found_end; 434 435 num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot)); 436 stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); 437 438 for (int i = 0; i < num_stripes; i++) { 439 struct btrfs_raid_stride *stride = &stripe_extent->strides[i]; 440 u64 devid = btrfs_raid_stride_devid(leaf, stride); 441 u64 physical = btrfs_raid_stride_physical(leaf, stride); 442 443 if (devid != stripe->dev->devid) 444 continue; 445 446 if ((map_type & BTRFS_BLOCK_GROUP_DUP) && stripe_index != i) 447 continue; 448 449 stripe->physical = physical + offset; 450 451 trace_btrfs_get_raid_extent_offset(fs_info, logical, *length, 452 stripe->physical, devid); 453 454 return 0; 455 } 456 457 /* If we're here, we haven't found the requested devid in the stripe. */ 458 ret = -ENODATA; 459 out: 460 if (ret > 0) 461 ret = -ENODATA; 462 if (ret && ret != -EIO && !stripe->rst_search_commit_root) { 463 btrfs_debug(fs_info, 464 "cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s", 465 logical, logical + *length, stripe->dev->devid, 466 btrfs_bg_type_to_raid_name(map_type)); 467 } 468 469 return ret; 470 } 471