1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 28 * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. 29 */ 30 31 #include <sys/zfs_context.h> 32 #include <sys/spa.h> 33 #include <sys/dmu.h> 34 #include <sys/dnode.h> 35 #include <sys/zio.h> 36 #include <sys/range_tree.h> 37 #include <sys/sysmacros.h> 38 39 #ifndef _KERNEL 40 /* 41 * Need the extra 'abort()' here since is possible for PANIC() to return, and 42 * our panic() usage in this file assumes it's NORETURN. 43 */ 44 #define panic(...) do {PANIC(__VA_ARGS__); abort(); } while (0); 45 #define zfs_panic_recover(...) panic(__VA_ARGS__) 46 #endif 47 48 /* 49 * Range trees are tree-based data structures that can be used to 50 * track free space or generally any space allocation information. 51 * A range tree keeps track of individual segments and automatically 52 * provides facilities such as adjacent extent merging and extent 53 * splitting in response to range add/remove requests. 54 * 55 * A range tree starts out completely empty, with no segments in it. 56 * Adding an allocation via zfs_range_tree_add to the range tree can either: 57 * 1) create a new extent 58 * 2) extend an adjacent extent 59 * 3) merge two adjacent extents 60 * Conversely, removing an allocation via zfs_range_tree_remove can: 61 * 1) completely remove an extent 62 * 2) shorten an extent (if the allocation was near one of its ends) 63 * 3) split an extent into two extents, in effect punching a hole 64 * 65 * A range tree is also capable of 'bridging' gaps when adding 66 * allocations. This is useful for cases when close proximity of 67 * allocations is an important detail that needs to be represented 68 * in the range tree. See zfs_range_tree_set_gap(). The default behavior 69 * is not to bridge gaps (i.e. the maximum allowed gap size is 0). 70 * 71 * In order to traverse a range tree, use either the zfs_range_tree_walk() 72 * or zfs_range_tree_vacate() functions. 73 * 74 * To obtain more accurate information on individual segment 75 * operations that the range tree performs "under the hood", you can 76 * specify a set of callbacks by passing a zfs_range_tree_ops_t structure 77 * to the zfs_range_tree_create function. Any callbacks that are non-NULL 78 * are then called at the appropriate times. 79 * 80 * The range tree code also supports a special variant of range trees 81 * that can bridge small gaps between segments. This kind of tree is used 82 * by the dsl scanning code to group I/Os into mostly sequential chunks to 83 * optimize disk performance. The code here attempts to do this with as 84 * little memory and computational overhead as possible. One limitation of 85 * this implementation is that segments of range trees with gaps can only 86 * support removing complete segments. 87 */ 88 89 static inline void 90 zfs_rs_copy(zfs_range_seg_t *src, zfs_range_seg_t *dest, zfs_range_tree_t *rt) 91 { 92 ASSERT3U(rt->rt_type, <, ZFS_RANGE_SEG_NUM_TYPES); 93 size_t size = 0; 94 switch (rt->rt_type) { 95 case ZFS_RANGE_SEG32: 96 size = sizeof (zfs_range_seg32_t); 97 break; 98 case ZFS_RANGE_SEG64: 99 size = sizeof (zfs_range_seg64_t); 100 break; 101 case ZFS_RANGE_SEG_GAP: 102 size = sizeof (zfs_range_seg_gap_t); 103 break; 104 default: 105 __builtin_unreachable(); 106 } 107 memcpy(dest, src, size); 108 } 109 110 void 111 zfs_range_tree_stat_verify(zfs_range_tree_t *rt) 112 { 113 zfs_range_seg_t *rs; 114 zfs_btree_index_t where; 115 uint64_t hist[ZFS_RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 116 int i; 117 118 for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; 119 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 120 uint64_t size = zfs_rs_get_end(rs, rt) - 121 zfs_rs_get_start(rs, rt); 122 int idx = highbit64(size) - 1; 123 124 hist[idx]++; 125 ASSERT3U(hist[idx], !=, 0); 126 } 127 128 for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { 129 VERIFY3UF(hist[i], ==, rt->rt_histogram[i], 130 "i=%d, hist=%px, hist=%llu, rt_hist=%llu", 131 i, hist, (u_longlong_t)hist[i], 132 (u_longlong_t)rt->rt_histogram[i]); 133 } 134 } 135 136 static void 137 zfs_range_tree_stat_incr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 138 { 139 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 140 int idx = highbit64(size) - 1; 141 142 ASSERT(size != 0); 143 ASSERT3U(idx, <, 144 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 145 146 rt->rt_histogram[idx]++; 147 ASSERT3U(rt->rt_histogram[idx], !=, 0); 148 } 149 150 static void 151 zfs_range_tree_stat_decr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 152 { 153 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 154 int idx = highbit64(size) - 1; 155 156 ASSERT(size != 0); 157 ASSERT3U(idx, <, 158 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 159 160 ASSERT3U(rt->rt_histogram[idx], !=, 0); 161 rt->rt_histogram[idx]--; 162 } 163 164 __attribute__((always_inline)) inline 165 static int 166 zfs_range_tree_seg32_compare(const void *x1, const void *x2) 167 { 168 const zfs_range_seg32_t *r1 = x1; 169 const zfs_range_seg32_t *r2 = x2; 170 171 ASSERT3U(r1->rs_start, <=, r1->rs_end); 172 ASSERT3U(r2->rs_start, <=, r2->rs_end); 173 174 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 175 } 176 177 __attribute__((always_inline)) inline 178 static int 179 zfs_range_tree_seg64_compare(const void *x1, const void *x2) 180 { 181 const zfs_range_seg64_t *r1 = x1; 182 const zfs_range_seg64_t *r2 = x2; 183 184 ASSERT3U(r1->rs_start, <=, r1->rs_end); 185 ASSERT3U(r2->rs_start, <=, r2->rs_end); 186 187 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 188 } 189 190 __attribute__((always_inline)) inline 191 static int 192 zfs_range_tree_seg_gap_compare(const void *x1, const void *x2) 193 { 194 const zfs_range_seg_gap_t *r1 = x1; 195 const zfs_range_seg_gap_t *r2 = x2; 196 197 ASSERT3U(r1->rs_start, <=, r1->rs_end); 198 ASSERT3U(r2->rs_start, <=, r2->rs_end); 199 200 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 201 } 202 203 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg32_find_in_buf, zfs_range_seg32_t, 204 zfs_range_tree_seg32_compare) 205 206 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t, 207 zfs_range_tree_seg64_compare) 208 209 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf, 210 zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare) 211 212 static zfs_range_tree_t * 213 zfs_range_tree_create_impl(const zfs_range_tree_ops_t *ops, 214 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 215 uint64_t gap, uint64_t flags, const char *name) 216 { 217 zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP); 218 219 ASSERT3U(shift, <, 64); 220 ASSERT3U(type, <=, ZFS_RANGE_SEG_NUM_TYPES); 221 size_t size; 222 int (*compare) (const void *, const void *); 223 bt_find_in_buf_f bt_find; 224 switch (type) { 225 case ZFS_RANGE_SEG32: 226 size = sizeof (zfs_range_seg32_t); 227 compare = zfs_range_tree_seg32_compare; 228 bt_find = zfs_range_tree_seg32_find_in_buf; 229 break; 230 case ZFS_RANGE_SEG64: 231 size = sizeof (zfs_range_seg64_t); 232 compare = zfs_range_tree_seg64_compare; 233 bt_find = zfs_range_tree_seg64_find_in_buf; 234 break; 235 case ZFS_RANGE_SEG_GAP: 236 size = sizeof (zfs_range_seg_gap_t); 237 compare = zfs_range_tree_seg_gap_compare; 238 bt_find = zfs_range_tree_seg_gap_find_in_buf; 239 break; 240 default: 241 panic("Invalid range seg type %d", type); 242 } 243 zfs_btree_create(&rt->rt_root, compare, bt_find, size); 244 245 rt->rt_ops = ops; 246 rt->rt_gap = gap; 247 rt->rt_flags = flags; 248 rt->rt_name = name; 249 rt->rt_arg = arg; 250 rt->rt_type = type; 251 rt->rt_start = start; 252 rt->rt_shift = shift; 253 254 if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) 255 rt->rt_ops->rtop_create(rt, rt->rt_arg); 256 257 return (rt); 258 } 259 260 zfs_range_tree_t * 261 zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, 262 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 263 uint64_t gap) 264 { 265 return (zfs_range_tree_create_impl(ops, type, arg, start, shift, gap, 266 0, NULL)); 267 } 268 269 zfs_range_tree_t * 270 zfs_range_tree_create(const zfs_range_tree_ops_t *ops, 271 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) 272 { 273 return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0, 274 0, NULL)); 275 } 276 277 zfs_range_tree_t * 278 zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops, 279 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 280 uint64_t flags, const char *name) 281 { 282 return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0, 283 flags, name)); 284 } 285 286 void 287 zfs_range_tree_destroy(zfs_range_tree_t *rt) 288 { 289 VERIFY0(rt->rt_space); 290 291 if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) 292 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 293 294 if (rt->rt_name != NULL && (rt->rt_flags & ZFS_RT_F_DYN_NAME)) 295 kmem_strfree((char *)(uintptr_t)rt->rt_name); 296 297 zfs_btree_destroy(&rt->rt_root); 298 kmem_free(rt, sizeof (*rt)); 299 } 300 301 void 302 zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 303 int64_t delta) 304 { 305 if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) { 306 zfs_panic_recover("zfs: rt=%s: attempting to decrease fill to " 307 "or below 0; probable double remove in segment [%llx:%llx]", 308 ZFS_RT_NAME(rt), 309 (longlong_t)zfs_rs_get_start(rs, rt), 310 (longlong_t)zfs_rs_get_end(rs, rt)); 311 } 312 if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) - 313 zfs_rs_get_start(rs, rt)) { 314 zfs_panic_recover("zfs: rt=%s: attempting to increase fill " 315 "beyond max; probable double add in segment [%llx:%llx]", 316 ZFS_RT_NAME(rt), 317 (longlong_t)zfs_rs_get_start(rs, rt), 318 (longlong_t)zfs_rs_get_end(rs, rt)); 319 } 320 321 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 322 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 323 zfs_rs_set_fill(rs, rt, zfs_rs_get_fill(rs, rt) + delta); 324 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 325 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 326 } 327 328 static void 329 zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) 330 { 331 zfs_range_tree_t *rt = arg; 332 zfs_btree_index_t where; 333 zfs_range_seg_t *rs_before, *rs_after, *rs; 334 zfs_range_seg_max_t tmp, rsearch; 335 uint64_t end = start + size, gap = rt->rt_gap; 336 uint64_t bridge_size = 0; 337 boolean_t merge_before, merge_after; 338 339 ASSERT3U(size, !=, 0); 340 ASSERT3U(fill, <=, size); 341 ASSERT3U(start + size, >, start); 342 343 zfs_rs_set_start(&rsearch, rt, start); 344 zfs_rs_set_end(&rsearch, rt, end); 345 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 346 347 /* 348 * If this is a gap-supporting range tree, it is possible that we 349 * are inserting into an existing segment. In this case simply 350 * bump the fill count and call the remove / add callbacks. If the 351 * new range will extend an existing segment, we remove the 352 * existing one, apply the new extent to it and re-insert it using 353 * the normal code paths. 354 */ 355 if (rs != NULL) { 356 uint64_t rstart = zfs_rs_get_start(rs, rt); 357 uint64_t rend = zfs_rs_get_end(rs, rt); 358 if (gap == 0) { 359 zfs_panic_recover("zfs: rt=%s: adding segment " 360 "(offset=%llx size=%llx) overlapping with existing " 361 "one (offset=%llx size=%llx)", 362 ZFS_RT_NAME(rt), 363 (longlong_t)start, (longlong_t)size, 364 (longlong_t)rstart, (longlong_t)(rend - rstart)); 365 return; 366 } 367 if (rstart <= start && rend >= end) { 368 zfs_range_tree_adjust_fill(rt, rs, fill); 369 return; 370 } 371 372 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 373 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 374 375 zfs_range_tree_stat_decr(rt, rs); 376 rt->rt_space -= rend - rstart; 377 378 fill += zfs_rs_get_fill(rs, rt); 379 start = MIN(start, rstart); 380 end = MAX(end, rend); 381 size = end - start; 382 383 zfs_btree_remove(&rt->rt_root, rs); 384 zfs_range_tree_add_impl(rt, start, size, fill); 385 return; 386 } 387 388 ASSERT0P(rs); 389 390 /* 391 * Determine whether or not we will have to merge with our neighbors. 392 * If gap != 0, we might need to merge with our neighbors even if we 393 * aren't directly touching. 394 */ 395 zfs_btree_index_t where_before, where_after; 396 rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); 397 rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); 398 399 merge_before = (rs_before != NULL && zfs_rs_get_end(rs_before, rt) >= 400 start - gap); 401 merge_after = (rs_after != NULL && zfs_rs_get_start(rs_after, rt) <= 402 end + gap); 403 404 if (merge_before && gap != 0) 405 bridge_size += start - zfs_rs_get_end(rs_before, rt); 406 if (merge_after && gap != 0) 407 bridge_size += zfs_rs_get_start(rs_after, rt) - end; 408 409 if (merge_before && merge_after) { 410 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { 411 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 412 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 413 } 414 415 zfs_range_tree_stat_decr(rt, rs_before); 416 zfs_range_tree_stat_decr(rt, rs_after); 417 418 zfs_rs_copy(rs_after, &tmp, rt); 419 uint64_t before_start = zfs_rs_get_start_raw(rs_before, rt); 420 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 421 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 422 zfs_btree_remove_idx(&rt->rt_root, &where_before); 423 424 /* 425 * We have to re-find the node because our old reference is 426 * invalid as soon as we do any mutating btree operations. 427 */ 428 rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); 429 ASSERT3P(rs_after, !=, NULL); 430 zfs_rs_set_start_raw(rs_after, rt, before_start); 431 zfs_rs_set_fill(rs_after, rt, after_fill + before_fill + fill); 432 rs = rs_after; 433 } else if (merge_before) { 434 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 435 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 436 437 zfs_range_tree_stat_decr(rt, rs_before); 438 439 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 440 zfs_rs_set_end(rs_before, rt, end); 441 zfs_rs_set_fill(rs_before, rt, before_fill + fill); 442 rs = rs_before; 443 } else if (merge_after) { 444 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 445 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 446 447 zfs_range_tree_stat_decr(rt, rs_after); 448 449 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 450 zfs_rs_set_start(rs_after, rt, start); 451 zfs_rs_set_fill(rs_after, rt, after_fill + fill); 452 rs = rs_after; 453 } else { 454 rs = &tmp; 455 456 zfs_rs_set_start(rs, rt, start); 457 zfs_rs_set_end(rs, rt, end); 458 zfs_rs_set_fill(rs, rt, fill); 459 zfs_btree_add_idx(&rt->rt_root, rs, &where); 460 } 461 462 if (gap != 0) { 463 ASSERT3U(zfs_rs_get_fill(rs, rt), <=, zfs_rs_get_end(rs, rt) - 464 zfs_rs_get_start(rs, rt)); 465 } else { 466 ASSERT3U(zfs_rs_get_fill(rs, rt), ==, zfs_rs_get_end(rs, rt) - 467 zfs_rs_get_start(rs, rt)); 468 } 469 470 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 471 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 472 473 zfs_range_tree_stat_incr(rt, rs); 474 rt->rt_space += size + bridge_size; 475 } 476 477 void 478 zfs_range_tree_add(void *arg, uint64_t start, uint64_t size) 479 { 480 zfs_range_tree_add_impl(arg, start, size, size); 481 } 482 483 static void 484 zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 485 boolean_t do_fill) 486 { 487 zfs_btree_index_t where; 488 zfs_range_seg_t *rs; 489 zfs_range_seg_max_t rsearch, rs_tmp; 490 uint64_t end = start + size; 491 uint64_t rstart, rend; 492 boolean_t left_over, right_over; 493 494 VERIFY3U(size, !=, 0); 495 VERIFY3U(size, <=, rt->rt_space); 496 if (rt->rt_type == ZFS_RANGE_SEG64) 497 ASSERT3U(start + size, >, start); 498 499 zfs_rs_set_start(&rsearch, rt, start); 500 zfs_rs_set_end(&rsearch, rt, end); 501 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 502 503 /* Make sure we completely overlap with someone */ 504 if (rs == NULL) { 505 zfs_panic_recover("zfs: rt=%s: removing nonexistent segment " 506 "from range tree (offset=%llx size=%llx)", 507 ZFS_RT_NAME(rt), (longlong_t)start, (longlong_t)size); 508 return; 509 } 510 511 rstart = zfs_rs_get_start(rs, rt); 512 rend = zfs_rs_get_end(rs, rt); 513 514 /* 515 * Range trees with gap support must only remove complete segments 516 * from the tree. This allows us to maintain accurate fill accounting 517 * and to ensure that bridged sections are not leaked. If we need to 518 * remove less than the full segment, we can only adjust the fill count. 519 */ 520 if (rt->rt_gap != 0) { 521 if (do_fill) { 522 if (zfs_rs_get_fill(rs, rt) == size) { 523 start = rstart; 524 end = rend; 525 size = end - start; 526 } else { 527 zfs_range_tree_adjust_fill(rt, rs, -size); 528 return; 529 } 530 } else if (rstart != start || rend != end) { 531 zfs_panic_recover("zfs: rt=%s: freeing partial segment " 532 "of gap tree (offset=%llx size=%llx) of " 533 "(offset=%llx size=%llx)", 534 ZFS_RT_NAME(rt), 535 (longlong_t)start, (longlong_t)size, 536 (longlong_t)rstart, (longlong_t)(rend - rstart)); 537 return; 538 } 539 } 540 541 if (!(rstart <= start && rend >= end)) { 542 zfs_panic_recover("zfs: rt=%s: removing segment " 543 "(offset=%llx size=%llx) not completely overlapped by " 544 "existing one (offset=%llx size=%llx)", 545 ZFS_RT_NAME(rt), 546 (longlong_t)start, (longlong_t)size, 547 (longlong_t)rstart, (longlong_t)(rend - rstart)); 548 return; 549 } 550 551 left_over = (rstart != start); 552 right_over = (rend != end); 553 554 zfs_range_tree_stat_decr(rt, rs); 555 556 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 557 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 558 559 if (left_over && right_over) { 560 zfs_range_seg_max_t newseg; 561 zfs_rs_set_start(&newseg, rt, end); 562 zfs_rs_set_end_raw(&newseg, rt, zfs_rs_get_end_raw(rs, rt)); 563 zfs_rs_set_fill(&newseg, rt, zfs_rs_get_end(rs, rt) - end); 564 zfs_range_tree_stat_incr(rt, &newseg); 565 566 // This modifies the buffer already inside the range tree 567 zfs_rs_set_end(rs, rt, start); 568 569 zfs_rs_copy(rs, &rs_tmp, rt); 570 if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) 571 zfs_btree_add_idx(&rt->rt_root, &newseg, &where); 572 else 573 zfs_btree_add(&rt->rt_root, &newseg); 574 575 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 576 rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); 577 } else if (left_over) { 578 // This modifies the buffer already inside the range tree 579 zfs_rs_set_end(rs, rt, start); 580 zfs_rs_copy(rs, &rs_tmp, rt); 581 } else if (right_over) { 582 // This modifies the buffer already inside the range tree 583 zfs_rs_set_start(rs, rt, end); 584 zfs_rs_copy(rs, &rs_tmp, rt); 585 } else { 586 zfs_btree_remove_idx(&rt->rt_root, &where); 587 rs = NULL; 588 } 589 590 if (rs != NULL) { 591 /* 592 * The fill of the leftover segment will always be equal to 593 * the size, since we do not support removing partial segments 594 * of range trees with gaps. 595 */ 596 zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - 597 zfs_rs_get_start_raw(rs, rt)); 598 zfs_range_tree_stat_incr(rt, &rs_tmp); 599 600 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 601 rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); 602 } 603 604 rt->rt_space -= size; 605 } 606 607 void 608 zfs_range_tree_remove(void *arg, uint64_t start, uint64_t size) 609 { 610 zfs_range_tree_remove_impl(arg, start, size, B_FALSE); 611 } 612 613 void 614 zfs_range_tree_remove_fill(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 615 { 616 zfs_range_tree_remove_impl(rt, start, size, B_TRUE); 617 } 618 619 void 620 zfs_range_tree_resize_segment(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 621 uint64_t newstart, uint64_t newsize) 622 { 623 int64_t delta = newsize - (zfs_rs_get_end(rs, rt) - 624 zfs_rs_get_start(rs, rt)); 625 626 zfs_range_tree_stat_decr(rt, rs); 627 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 628 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 629 630 zfs_rs_set_start(rs, rt, newstart); 631 zfs_rs_set_end(rs, rt, newstart + newsize); 632 633 zfs_range_tree_stat_incr(rt, rs); 634 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 635 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 636 637 rt->rt_space += delta; 638 } 639 640 static zfs_range_seg_t * 641 zfs_range_tree_find_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 642 { 643 zfs_range_seg_max_t rsearch; 644 uint64_t end = start + size; 645 646 VERIFY(size != 0); 647 648 zfs_rs_set_start(&rsearch, rt, start); 649 zfs_rs_set_end(&rsearch, rt, end); 650 return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); 651 } 652 653 zfs_range_seg_t * 654 zfs_range_tree_find(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 655 { 656 if (rt->rt_type == ZFS_RANGE_SEG64) 657 ASSERT3U(start + size, >, start); 658 659 zfs_range_seg_t *rs = zfs_range_tree_find_impl(rt, start, size); 660 if (rs != NULL && zfs_rs_get_start(rs, rt) <= start && 661 zfs_rs_get_end(rs, rt) >= start + size) { 662 return (rs); 663 } 664 return (NULL); 665 } 666 667 void 668 zfs_range_tree_verify_not_present(zfs_range_tree_t *rt, uint64_t off, 669 uint64_t size) 670 { 671 zfs_range_seg_t *rs = zfs_range_tree_find(rt, off, size); 672 if (rs != NULL) 673 panic("segment already in tree; rs=%p", (void *)rs); 674 } 675 676 boolean_t 677 zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 678 { 679 return (zfs_range_tree_find(rt, start, size) != NULL); 680 } 681 682 /* 683 * Returns the first subset of the given range which overlaps with the range 684 * tree. Returns true if there is a segment in the range, and false if there 685 * isn't. 686 */ 687 boolean_t 688 zfs_range_tree_find_in(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 689 uint64_t *ostart, uint64_t *osize) 690 { 691 if (rt->rt_type == ZFS_RANGE_SEG64) 692 ASSERT3U(start + size, >, start); 693 694 zfs_range_seg_max_t rsearch; 695 zfs_rs_set_start(&rsearch, rt, start); 696 zfs_rs_set_end_raw(&rsearch, rt, zfs_rs_get_start_raw(&rsearch, rt) + 697 1); 698 699 zfs_btree_index_t where; 700 zfs_range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 701 if (rs != NULL) { 702 *ostart = start; 703 *osize = MIN(size, zfs_rs_get_end(rs, rt) - start); 704 return (B_TRUE); 705 } 706 707 rs = zfs_btree_next(&rt->rt_root, &where, &where); 708 if (rs == NULL || zfs_rs_get_start(rs, rt) >= start + size) 709 return (B_FALSE); 710 711 *ostart = zfs_rs_get_start(rs, rt); 712 *osize = MIN(start + size, zfs_rs_get_end(rs, rt)) - 713 zfs_rs_get_start(rs, rt); 714 return (B_TRUE); 715 } 716 717 /* 718 * Ensure that this range is not in the tree, regardless of whether 719 * it is currently in the tree. 720 */ 721 void 722 zfs_range_tree_clear(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 723 { 724 zfs_range_seg_t *rs; 725 726 if (size == 0) 727 return; 728 729 if (rt->rt_type == ZFS_RANGE_SEG64) 730 ASSERT3U(start + size, >, start); 731 732 while ((rs = zfs_range_tree_find_impl(rt, start, size)) != NULL) { 733 uint64_t free_start = MAX(zfs_rs_get_start(rs, rt), start); 734 uint64_t free_end = MIN(zfs_rs_get_end(rs, rt), start + size); 735 zfs_range_tree_remove(rt, free_start, free_end - free_start); 736 } 737 } 738 739 void 740 zfs_range_tree_swap(zfs_range_tree_t **rtsrc, zfs_range_tree_t **rtdst) 741 { 742 zfs_range_tree_t *rt; 743 744 ASSERT0(zfs_range_tree_space(*rtdst)); 745 ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); 746 747 rt = *rtsrc; 748 *rtsrc = *rtdst; 749 *rtdst = rt; 750 } 751 752 void 753 zfs_range_tree_vacate(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 754 void *arg) 755 { 756 if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) 757 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 758 759 if (func != NULL) { 760 zfs_range_seg_t *rs; 761 zfs_btree_index_t *cookie = NULL; 762 763 while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != 764 NULL) { 765 func(arg, zfs_rs_get_start(rs, rt), 766 zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)); 767 } 768 } else { 769 zfs_btree_clear(&rt->rt_root); 770 } 771 772 memset(rt->rt_histogram, 0, sizeof (rt->rt_histogram)); 773 rt->rt_space = 0; 774 } 775 776 void 777 zfs_range_tree_walk(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 778 void *arg) 779 { 780 zfs_btree_index_t where; 781 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); 782 rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 783 func(arg, zfs_rs_get_start(rs, rt), zfs_rs_get_end(rs, rt) - 784 zfs_rs_get_start(rs, rt)); 785 } 786 } 787 788 zfs_range_seg_t * 789 zfs_range_tree_first(zfs_range_tree_t *rt) 790 { 791 return (zfs_btree_first(&rt->rt_root, NULL)); 792 } 793 794 uint64_t 795 zfs_range_tree_space(zfs_range_tree_t *rt) 796 { 797 return (rt->rt_space); 798 } 799 800 uint64_t 801 zfs_range_tree_numsegs(zfs_range_tree_t *rt) 802 { 803 return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); 804 } 805 806 boolean_t 807 zfs_range_tree_is_empty(zfs_range_tree_t *rt) 808 { 809 ASSERT(rt != NULL); 810 return (zfs_range_tree_space(rt) == 0); 811 } 812 813 /* 814 * Remove any overlapping ranges between the given segment [start, end) 815 * from removefrom. Add non-overlapping leftovers to addto. 816 */ 817 void 818 zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 819 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 820 { 821 zfs_btree_index_t where; 822 zfs_range_seg_max_t starting_rs; 823 zfs_rs_set_start(&starting_rs, removefrom, start); 824 zfs_rs_set_end_raw(&starting_rs, removefrom, 825 zfs_rs_get_start_raw(&starting_rs, removefrom) + 1); 826 827 zfs_range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, 828 &starting_rs, &where); 829 830 if (curr == NULL) 831 curr = zfs_btree_next(&removefrom->rt_root, &where, &where); 832 833 zfs_range_seg_t *next; 834 for (; curr != NULL; curr = next) { 835 if (start == end) 836 return; 837 VERIFY3U(start, <, end); 838 839 /* there is no overlap */ 840 if (end <= zfs_rs_get_start(curr, removefrom)) { 841 zfs_range_tree_add(addto, start, end - start); 842 return; 843 } 844 845 uint64_t overlap_start = MAX(zfs_rs_get_start(curr, removefrom), 846 start); 847 uint64_t overlap_end = MIN(zfs_rs_get_end(curr, removefrom), 848 end); 849 uint64_t overlap_size = overlap_end - overlap_start; 850 ASSERT3S(overlap_size, >, 0); 851 zfs_range_seg_max_t rs; 852 zfs_rs_copy(curr, &rs, removefrom); 853 854 zfs_range_tree_remove(removefrom, overlap_start, overlap_size); 855 856 if (start < overlap_start) 857 zfs_range_tree_add(addto, start, overlap_start - start); 858 859 start = overlap_end; 860 next = zfs_btree_find(&removefrom->rt_root, &rs, &where); 861 /* 862 * If we find something here, we only removed part of the 863 * curr segment. Either there's some left at the end 864 * because we've reached the end of the range we're removing, 865 * or there's some left at the start because we started 866 * partway through the range. Either way, we continue with 867 * the loop. If it's the former, we'll return at the start of 868 * the loop, and if it's the latter we'll see if there is more 869 * area to process. 870 */ 871 if (next != NULL) { 872 ASSERT(start == end || start == zfs_rs_get_end(&rs, 873 removefrom)); 874 } 875 876 next = zfs_btree_next(&removefrom->rt_root, &where, &where); 877 } 878 VERIFY0P(curr); 879 880 if (start != end) { 881 VERIFY3U(start, <, end); 882 zfs_range_tree_add(addto, start, end - start); 883 } else { 884 VERIFY3U(start, ==, end); 885 } 886 } 887 888 /* 889 * For each entry in rt, if it exists in removefrom, remove it 890 * from removefrom. Otherwise, add it to addto. 891 */ 892 void 893 zfs_range_tree_remove_xor_add(zfs_range_tree_t *rt, 894 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 895 { 896 zfs_btree_index_t where; 897 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; 898 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 899 zfs_range_tree_remove_xor_add_segment(zfs_rs_get_start(rs, rt), 900 zfs_rs_get_end(rs, rt), removefrom, addto); 901 } 902 } 903 904 uint64_t 905 zfs_range_tree_min(zfs_range_tree_t *rt) 906 { 907 zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); 908 return (rs != NULL ? zfs_rs_get_start(rs, rt) : 0); 909 } 910 911 uint64_t 912 zfs_range_tree_max(zfs_range_tree_t *rt) 913 { 914 zfs_range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); 915 return (rs != NULL ? zfs_rs_get_end(rs, rt) : 0); 916 } 917 918 uint64_t 919 zfs_range_tree_span(zfs_range_tree_t *rt) 920 { 921 return (zfs_range_tree_max(rt) - zfs_range_tree_min(rt)); 922 } 923