1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 28 * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. 29 */ 30 31 #include <sys/zfs_context.h> 32 #include <sys/spa.h> 33 #include <sys/dmu.h> 34 #include <sys/dnode.h> 35 #include <sys/zio.h> 36 #include <sys/range_tree.h> 37 38 /* 39 * Range trees are tree-based data structures that can be used to 40 * track free space or generally any space allocation information. 41 * A range tree keeps track of individual segments and automatically 42 * provides facilities such as adjacent extent merging and extent 43 * splitting in response to range add/remove requests. 44 * 45 * A range tree starts out completely empty, with no segments in it. 46 * Adding an allocation via zfs_range_tree_add to the range tree can either: 47 * 1) create a new extent 48 * 2) extend an adjacent extent 49 * 3) merge two adjacent extents 50 * Conversely, removing an allocation via zfs_range_tree_remove can: 51 * 1) completely remove an extent 52 * 2) shorten an extent (if the allocation was near one of its ends) 53 * 3) split an extent into two extents, in effect punching a hole 54 * 55 * A range tree is also capable of 'bridging' gaps when adding 56 * allocations. This is useful for cases when close proximity of 57 * allocations is an important detail that needs to be represented 58 * in the range tree. See zfs_range_tree_set_gap(). The default behavior 59 * is not to bridge gaps (i.e. the maximum allowed gap size is 0). 60 * 61 * In order to traverse a range tree, use either the zfs_range_tree_walk() 62 * or zfs_range_tree_vacate() functions. 63 * 64 * To obtain more accurate information on individual segment 65 * operations that the range tree performs "under the hood", you can 66 * specify a set of callbacks by passing a zfs_range_tree_ops_t structure 67 * to the zfs_range_tree_create function. Any callbacks that are non-NULL 68 * are then called at the appropriate times. 69 * 70 * The range tree code also supports a special variant of range trees 71 * that can bridge small gaps between segments. This kind of tree is used 72 * by the dsl scanning code to group I/Os into mostly sequential chunks to 73 * optimize disk performance. The code here attempts to do this with as 74 * little memory and computational overhead as possible. One limitation of 75 * this implementation is that segments of range trees with gaps can only 76 * support removing complete segments. 77 */ 78 79 static inline void 80 zfs_rs_copy(zfs_range_seg_t *src, zfs_range_seg_t *dest, zfs_range_tree_t *rt) 81 { 82 ASSERT3U(rt->rt_type, <, ZFS_RANGE_SEG_NUM_TYPES); 83 size_t size = 0; 84 switch (rt->rt_type) { 85 case ZFS_RANGE_SEG32: 86 size = sizeof (zfs_range_seg32_t); 87 break; 88 case ZFS_RANGE_SEG64: 89 size = sizeof (zfs_range_seg64_t); 90 break; 91 case ZFS_RANGE_SEG_GAP: 92 size = sizeof (zfs_range_seg_gap_t); 93 break; 94 default: 95 __builtin_unreachable(); 96 } 97 memcpy(dest, src, size); 98 } 99 100 void 101 zfs_range_tree_stat_verify(zfs_range_tree_t *rt) 102 { 103 zfs_range_seg_t *rs; 104 zfs_btree_index_t where; 105 uint64_t hist[ZFS_RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 106 int i; 107 108 for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; 109 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 110 uint64_t size = zfs_rs_get_end(rs, rt) - 111 zfs_rs_get_start(rs, rt); 112 int idx = highbit64(size) - 1; 113 114 hist[idx]++; 115 ASSERT3U(hist[idx], !=, 0); 116 } 117 118 for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { 119 if (hist[i] != rt->rt_histogram[i]) { 120 zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu", 121 i, hist, (u_longlong_t)hist[i], 122 (u_longlong_t)rt->rt_histogram[i]); 123 } 124 VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 125 } 126 } 127 128 static void 129 zfs_range_tree_stat_incr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 130 { 131 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 132 int idx = highbit64(size) - 1; 133 134 ASSERT(size != 0); 135 ASSERT3U(idx, <, 136 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 137 138 rt->rt_histogram[idx]++; 139 ASSERT3U(rt->rt_histogram[idx], !=, 0); 140 } 141 142 static void 143 zfs_range_tree_stat_decr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 144 { 145 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 146 int idx = highbit64(size) - 1; 147 148 ASSERT(size != 0); 149 ASSERT3U(idx, <, 150 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 151 152 ASSERT3U(rt->rt_histogram[idx], !=, 0); 153 rt->rt_histogram[idx]--; 154 } 155 156 __attribute__((always_inline)) inline 157 static int 158 zfs_range_tree_seg32_compare(const void *x1, const void *x2) 159 { 160 const zfs_range_seg32_t *r1 = x1; 161 const zfs_range_seg32_t *r2 = x2; 162 163 ASSERT3U(r1->rs_start, <=, r1->rs_end); 164 ASSERT3U(r2->rs_start, <=, r2->rs_end); 165 166 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 167 } 168 169 __attribute__((always_inline)) inline 170 static int 171 zfs_range_tree_seg64_compare(const void *x1, const void *x2) 172 { 173 const zfs_range_seg64_t *r1 = x1; 174 const zfs_range_seg64_t *r2 = x2; 175 176 ASSERT3U(r1->rs_start, <=, r1->rs_end); 177 ASSERT3U(r2->rs_start, <=, r2->rs_end); 178 179 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 180 } 181 182 __attribute__((always_inline)) inline 183 static int 184 zfs_range_tree_seg_gap_compare(const void *x1, const void *x2) 185 { 186 const zfs_range_seg_gap_t *r1 = x1; 187 const zfs_range_seg_gap_t *r2 = x2; 188 189 ASSERT3U(r1->rs_start, <=, r1->rs_end); 190 ASSERT3U(r2->rs_start, <=, r2->rs_end); 191 192 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 193 } 194 195 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg32_find_in_buf, zfs_range_seg32_t, 196 zfs_range_tree_seg32_compare) 197 198 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t, 199 zfs_range_tree_seg64_compare) 200 201 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf, 202 zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare) 203 204 zfs_range_tree_t * 205 zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, 206 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 207 uint64_t gap) 208 { 209 zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP); 210 211 ASSERT3U(shift, <, 64); 212 ASSERT3U(type, <=, ZFS_RANGE_SEG_NUM_TYPES); 213 size_t size; 214 int (*compare) (const void *, const void *); 215 bt_find_in_buf_f bt_find; 216 switch (type) { 217 case ZFS_RANGE_SEG32: 218 size = sizeof (zfs_range_seg32_t); 219 compare = zfs_range_tree_seg32_compare; 220 bt_find = zfs_range_tree_seg32_find_in_buf; 221 break; 222 case ZFS_RANGE_SEG64: 223 size = sizeof (zfs_range_seg64_t); 224 compare = zfs_range_tree_seg64_compare; 225 bt_find = zfs_range_tree_seg64_find_in_buf; 226 break; 227 case ZFS_RANGE_SEG_GAP: 228 size = sizeof (zfs_range_seg_gap_t); 229 compare = zfs_range_tree_seg_gap_compare; 230 bt_find = zfs_range_tree_seg_gap_find_in_buf; 231 break; 232 default: 233 panic("Invalid range seg type %d", type); 234 } 235 zfs_btree_create(&rt->rt_root, compare, bt_find, size); 236 237 rt->rt_ops = ops; 238 rt->rt_gap = gap; 239 rt->rt_arg = arg; 240 rt->rt_type = type; 241 rt->rt_start = start; 242 rt->rt_shift = shift; 243 244 if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) 245 rt->rt_ops->rtop_create(rt, rt->rt_arg); 246 247 return (rt); 248 } 249 250 zfs_range_tree_t * 251 zfs_range_tree_create(const zfs_range_tree_ops_t *ops, 252 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) 253 { 254 return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0)); 255 } 256 257 void 258 zfs_range_tree_destroy(zfs_range_tree_t *rt) 259 { 260 VERIFY0(rt->rt_space); 261 262 if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) 263 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 264 265 zfs_btree_destroy(&rt->rt_root); 266 kmem_free(rt, sizeof (*rt)); 267 } 268 269 void 270 zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 271 int64_t delta) 272 { 273 if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) { 274 zfs_panic_recover("zfs: attempting to decrease fill to or " 275 "below 0; probable double remove in segment [%llx:%llx]", 276 (longlong_t)zfs_rs_get_start(rs, rt), 277 (longlong_t)zfs_rs_get_end(rs, rt)); 278 } 279 if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) - 280 zfs_rs_get_start(rs, rt)) { 281 zfs_panic_recover("zfs: attempting to increase fill beyond " 282 "max; probable double add in segment [%llx:%llx]", 283 (longlong_t)zfs_rs_get_start(rs, rt), 284 (longlong_t)zfs_rs_get_end(rs, rt)); 285 } 286 287 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 288 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 289 zfs_rs_set_fill(rs, rt, zfs_rs_get_fill(rs, rt) + delta); 290 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 291 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 292 } 293 294 static void 295 zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) 296 { 297 zfs_range_tree_t *rt = arg; 298 zfs_btree_index_t where; 299 zfs_range_seg_t *rs_before, *rs_after, *rs; 300 zfs_range_seg_max_t tmp, rsearch; 301 uint64_t end = start + size, gap = rt->rt_gap; 302 uint64_t bridge_size = 0; 303 boolean_t merge_before, merge_after; 304 305 ASSERT3U(size, !=, 0); 306 ASSERT3U(fill, <=, size); 307 ASSERT3U(start + size, >, start); 308 309 zfs_rs_set_start(&rsearch, rt, start); 310 zfs_rs_set_end(&rsearch, rt, end); 311 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 312 313 /* 314 * If this is a gap-supporting range tree, it is possible that we 315 * are inserting into an existing segment. In this case simply 316 * bump the fill count and call the remove / add callbacks. If the 317 * new range will extend an existing segment, we remove the 318 * existing one, apply the new extent to it and re-insert it using 319 * the normal code paths. 320 */ 321 if (rs != NULL) { 322 if (gap == 0) { 323 zfs_panic_recover("zfs: adding existent segment to " 324 "range tree (offset=%llx size=%llx)", 325 (longlong_t)start, (longlong_t)size); 326 return; 327 } 328 uint64_t rstart = zfs_rs_get_start(rs, rt); 329 uint64_t rend = zfs_rs_get_end(rs, rt); 330 if (rstart <= start && rend >= end) { 331 zfs_range_tree_adjust_fill(rt, rs, fill); 332 return; 333 } 334 335 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 336 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 337 338 zfs_range_tree_stat_decr(rt, rs); 339 rt->rt_space -= rend - rstart; 340 341 fill += zfs_rs_get_fill(rs, rt); 342 start = MIN(start, rstart); 343 end = MAX(end, rend); 344 size = end - start; 345 346 zfs_btree_remove(&rt->rt_root, rs); 347 zfs_range_tree_add_impl(rt, start, size, fill); 348 return; 349 } 350 351 ASSERT3P(rs, ==, NULL); 352 353 /* 354 * Determine whether or not we will have to merge with our neighbors. 355 * If gap != 0, we might need to merge with our neighbors even if we 356 * aren't directly touching. 357 */ 358 zfs_btree_index_t where_before, where_after; 359 rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); 360 rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); 361 362 merge_before = (rs_before != NULL && zfs_rs_get_end(rs_before, rt) >= 363 start - gap); 364 merge_after = (rs_after != NULL && zfs_rs_get_start(rs_after, rt) <= 365 end + gap); 366 367 if (merge_before && gap != 0) 368 bridge_size += start - zfs_rs_get_end(rs_before, rt); 369 if (merge_after && gap != 0) 370 bridge_size += zfs_rs_get_start(rs_after, rt) - end; 371 372 if (merge_before && merge_after) { 373 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { 374 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 375 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 376 } 377 378 zfs_range_tree_stat_decr(rt, rs_before); 379 zfs_range_tree_stat_decr(rt, rs_after); 380 381 zfs_rs_copy(rs_after, &tmp, rt); 382 uint64_t before_start = zfs_rs_get_start_raw(rs_before, rt); 383 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 384 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 385 zfs_btree_remove_idx(&rt->rt_root, &where_before); 386 387 /* 388 * We have to re-find the node because our old reference is 389 * invalid as soon as we do any mutating btree operations. 390 */ 391 rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); 392 ASSERT3P(rs_after, !=, NULL); 393 zfs_rs_set_start_raw(rs_after, rt, before_start); 394 zfs_rs_set_fill(rs_after, rt, after_fill + before_fill + fill); 395 rs = rs_after; 396 } else if (merge_before) { 397 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 398 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 399 400 zfs_range_tree_stat_decr(rt, rs_before); 401 402 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 403 zfs_rs_set_end(rs_before, rt, end); 404 zfs_rs_set_fill(rs_before, rt, before_fill + fill); 405 rs = rs_before; 406 } else if (merge_after) { 407 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 408 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 409 410 zfs_range_tree_stat_decr(rt, rs_after); 411 412 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 413 zfs_rs_set_start(rs_after, rt, start); 414 zfs_rs_set_fill(rs_after, rt, after_fill + fill); 415 rs = rs_after; 416 } else { 417 rs = &tmp; 418 419 zfs_rs_set_start(rs, rt, start); 420 zfs_rs_set_end(rs, rt, end); 421 zfs_rs_set_fill(rs, rt, fill); 422 zfs_btree_add_idx(&rt->rt_root, rs, &where); 423 } 424 425 if (gap != 0) { 426 ASSERT3U(zfs_rs_get_fill(rs, rt), <=, zfs_rs_get_end(rs, rt) - 427 zfs_rs_get_start(rs, rt)); 428 } else { 429 ASSERT3U(zfs_rs_get_fill(rs, rt), ==, zfs_rs_get_end(rs, rt) - 430 zfs_rs_get_start(rs, rt)); 431 } 432 433 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 434 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 435 436 zfs_range_tree_stat_incr(rt, rs); 437 rt->rt_space += size + bridge_size; 438 } 439 440 void 441 zfs_range_tree_add(void *arg, uint64_t start, uint64_t size) 442 { 443 zfs_range_tree_add_impl(arg, start, size, size); 444 } 445 446 static void 447 zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 448 boolean_t do_fill) 449 { 450 zfs_btree_index_t where; 451 zfs_range_seg_t *rs; 452 zfs_range_seg_max_t rsearch, rs_tmp; 453 uint64_t end = start + size; 454 boolean_t left_over, right_over; 455 456 VERIFY3U(size, !=, 0); 457 VERIFY3U(size, <=, rt->rt_space); 458 if (rt->rt_type == ZFS_RANGE_SEG64) 459 ASSERT3U(start + size, >, start); 460 461 zfs_rs_set_start(&rsearch, rt, start); 462 zfs_rs_set_end(&rsearch, rt, end); 463 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 464 465 /* Make sure we completely overlap with someone */ 466 if (rs == NULL) { 467 zfs_panic_recover("zfs: removing nonexistent segment from " 468 "range tree (offset=%llx size=%llx)", 469 (longlong_t)start, (longlong_t)size); 470 return; 471 } 472 473 /* 474 * Range trees with gap support must only remove complete segments 475 * from the tree. This allows us to maintain accurate fill accounting 476 * and to ensure that bridged sections are not leaked. If we need to 477 * remove less than the full segment, we can only adjust the fill count. 478 */ 479 if (rt->rt_gap != 0) { 480 if (do_fill) { 481 if (zfs_rs_get_fill(rs, rt) == size) { 482 start = zfs_rs_get_start(rs, rt); 483 end = zfs_rs_get_end(rs, rt); 484 size = end - start; 485 } else { 486 zfs_range_tree_adjust_fill(rt, rs, -size); 487 return; 488 } 489 } else if (zfs_rs_get_start(rs, rt) != start || 490 zfs_rs_get_end(rs, rt) != end) { 491 zfs_panic_recover("zfs: freeing partial segment of " 492 "gap tree (offset=%llx size=%llx) of " 493 "(offset=%llx size=%llx)", 494 (longlong_t)start, (longlong_t)size, 495 (longlong_t)zfs_rs_get_start(rs, rt), 496 (longlong_t)zfs_rs_get_end(rs, rt) - 497 zfs_rs_get_start(rs, rt)); 498 return; 499 } 500 } 501 502 VERIFY3U(zfs_rs_get_start(rs, rt), <=, start); 503 VERIFY3U(zfs_rs_get_end(rs, rt), >=, end); 504 505 left_over = (zfs_rs_get_start(rs, rt) != start); 506 right_over = (zfs_rs_get_end(rs, rt) != end); 507 508 zfs_range_tree_stat_decr(rt, rs); 509 510 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 511 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 512 513 if (left_over && right_over) { 514 zfs_range_seg_max_t newseg; 515 zfs_rs_set_start(&newseg, rt, end); 516 zfs_rs_set_end_raw(&newseg, rt, zfs_rs_get_end_raw(rs, rt)); 517 zfs_rs_set_fill(&newseg, rt, zfs_rs_get_end(rs, rt) - end); 518 zfs_range_tree_stat_incr(rt, &newseg); 519 520 // This modifies the buffer already inside the range tree 521 zfs_rs_set_end(rs, rt, start); 522 523 zfs_rs_copy(rs, &rs_tmp, rt); 524 if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) 525 zfs_btree_add_idx(&rt->rt_root, &newseg, &where); 526 else 527 zfs_btree_add(&rt->rt_root, &newseg); 528 529 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 530 rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); 531 } else if (left_over) { 532 // This modifies the buffer already inside the range tree 533 zfs_rs_set_end(rs, rt, start); 534 zfs_rs_copy(rs, &rs_tmp, rt); 535 } else if (right_over) { 536 // This modifies the buffer already inside the range tree 537 zfs_rs_set_start(rs, rt, end); 538 zfs_rs_copy(rs, &rs_tmp, rt); 539 } else { 540 zfs_btree_remove_idx(&rt->rt_root, &where); 541 rs = NULL; 542 } 543 544 if (rs != NULL) { 545 /* 546 * The fill of the leftover segment will always be equal to 547 * the size, since we do not support removing partial segments 548 * of range trees with gaps. 549 */ 550 zfs_zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - 551 zfs_rs_get_start_raw(rs, rt)); 552 zfs_range_tree_stat_incr(rt, &rs_tmp); 553 554 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 555 rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); 556 } 557 558 rt->rt_space -= size; 559 } 560 561 void 562 zfs_range_tree_remove(void *arg, uint64_t start, uint64_t size) 563 { 564 zfs_range_tree_remove_impl(arg, start, size, B_FALSE); 565 } 566 567 void 568 zfs_range_tree_remove_fill(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 569 { 570 zfs_range_tree_remove_impl(rt, start, size, B_TRUE); 571 } 572 573 void 574 zfs_range_tree_resize_segment(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 575 uint64_t newstart, uint64_t newsize) 576 { 577 int64_t delta = newsize - (zfs_rs_get_end(rs, rt) - 578 zfs_rs_get_start(rs, rt)); 579 580 zfs_range_tree_stat_decr(rt, rs); 581 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 582 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 583 584 zfs_rs_set_start(rs, rt, newstart); 585 zfs_rs_set_end(rs, rt, newstart + newsize); 586 587 zfs_range_tree_stat_incr(rt, rs); 588 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 589 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 590 591 rt->rt_space += delta; 592 } 593 594 static zfs_range_seg_t * 595 zfs_range_tree_find_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 596 { 597 zfs_range_seg_max_t rsearch; 598 uint64_t end = start + size; 599 600 VERIFY(size != 0); 601 602 zfs_rs_set_start(&rsearch, rt, start); 603 zfs_rs_set_end(&rsearch, rt, end); 604 return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); 605 } 606 607 zfs_range_seg_t * 608 zfs_range_tree_find(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 609 { 610 if (rt->rt_type == ZFS_RANGE_SEG64) 611 ASSERT3U(start + size, >, start); 612 613 zfs_range_seg_t *rs = zfs_range_tree_find_impl(rt, start, size); 614 if (rs != NULL && zfs_rs_get_start(rs, rt) <= start && 615 zfs_rs_get_end(rs, rt) >= start + size) { 616 return (rs); 617 } 618 return (NULL); 619 } 620 621 void 622 zfs_range_tree_verify_not_present(zfs_range_tree_t *rt, uint64_t off, 623 uint64_t size) 624 { 625 zfs_range_seg_t *rs = zfs_range_tree_find(rt, off, size); 626 if (rs != NULL) 627 panic("segment already in tree; rs=%p", (void *)rs); 628 } 629 630 boolean_t 631 zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 632 { 633 return (zfs_range_tree_find(rt, start, size) != NULL); 634 } 635 636 /* 637 * Returns the first subset of the given range which overlaps with the range 638 * tree. Returns true if there is a segment in the range, and false if there 639 * isn't. 640 */ 641 boolean_t 642 zfs_range_tree_find_in(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 643 uint64_t *ostart, uint64_t *osize) 644 { 645 if (rt->rt_type == ZFS_RANGE_SEG64) 646 ASSERT3U(start + size, >, start); 647 648 zfs_range_seg_max_t rsearch; 649 zfs_rs_set_start(&rsearch, rt, start); 650 zfs_rs_set_end_raw(&rsearch, rt, zfs_rs_get_start_raw(&rsearch, rt) + 651 1); 652 653 zfs_btree_index_t where; 654 zfs_range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 655 if (rs != NULL) { 656 *ostart = start; 657 *osize = MIN(size, zfs_rs_get_end(rs, rt) - start); 658 return (B_TRUE); 659 } 660 661 rs = zfs_btree_next(&rt->rt_root, &where, &where); 662 if (rs == NULL || zfs_rs_get_start(rs, rt) > start + size) 663 return (B_FALSE); 664 665 *ostart = zfs_rs_get_start(rs, rt); 666 *osize = MIN(start + size, zfs_rs_get_end(rs, rt)) - 667 zfs_rs_get_start(rs, rt); 668 return (B_TRUE); 669 } 670 671 /* 672 * Ensure that this range is not in the tree, regardless of whether 673 * it is currently in the tree. 674 */ 675 void 676 zfs_range_tree_clear(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 677 { 678 zfs_range_seg_t *rs; 679 680 if (size == 0) 681 return; 682 683 if (rt->rt_type == ZFS_RANGE_SEG64) 684 ASSERT3U(start + size, >, start); 685 686 while ((rs = zfs_range_tree_find_impl(rt, start, size)) != NULL) { 687 uint64_t free_start = MAX(zfs_rs_get_start(rs, rt), start); 688 uint64_t free_end = MIN(zfs_rs_get_end(rs, rt), start + size); 689 zfs_range_tree_remove(rt, free_start, free_end - free_start); 690 } 691 } 692 693 void 694 zfs_range_tree_swap(zfs_range_tree_t **rtsrc, zfs_range_tree_t **rtdst) 695 { 696 zfs_range_tree_t *rt; 697 698 ASSERT0(zfs_range_tree_space(*rtdst)); 699 ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); 700 701 rt = *rtsrc; 702 *rtsrc = *rtdst; 703 *rtdst = rt; 704 } 705 706 void 707 zfs_range_tree_vacate(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 708 void *arg) 709 { 710 if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) 711 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 712 713 if (func != NULL) { 714 zfs_range_seg_t *rs; 715 zfs_btree_index_t *cookie = NULL; 716 717 while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != 718 NULL) { 719 func(arg, zfs_rs_get_start(rs, rt), 720 zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)); 721 } 722 } else { 723 zfs_btree_clear(&rt->rt_root); 724 } 725 726 memset(rt->rt_histogram, 0, sizeof (rt->rt_histogram)); 727 rt->rt_space = 0; 728 } 729 730 void 731 zfs_range_tree_walk(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 732 void *arg) 733 { 734 zfs_btree_index_t where; 735 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); 736 rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 737 func(arg, zfs_rs_get_start(rs, rt), zfs_rs_get_end(rs, rt) - 738 zfs_rs_get_start(rs, rt)); 739 } 740 } 741 742 zfs_range_seg_t * 743 zfs_range_tree_first(zfs_range_tree_t *rt) 744 { 745 return (zfs_btree_first(&rt->rt_root, NULL)); 746 } 747 748 uint64_t 749 zfs_range_tree_space(zfs_range_tree_t *rt) 750 { 751 return (rt->rt_space); 752 } 753 754 uint64_t 755 zfs_range_tree_numsegs(zfs_range_tree_t *rt) 756 { 757 return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); 758 } 759 760 boolean_t 761 zfs_range_tree_is_empty(zfs_range_tree_t *rt) 762 { 763 ASSERT(rt != NULL); 764 return (zfs_range_tree_space(rt) == 0); 765 } 766 767 /* 768 * Remove any overlapping ranges between the given segment [start, end) 769 * from removefrom. Add non-overlapping leftovers to addto. 770 */ 771 void 772 zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 773 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 774 { 775 zfs_btree_index_t where; 776 zfs_range_seg_max_t starting_rs; 777 zfs_rs_set_start(&starting_rs, removefrom, start); 778 zfs_rs_set_end_raw(&starting_rs, removefrom, 779 zfs_rs_get_start_raw(&starting_rs, removefrom) + 1); 780 781 zfs_range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, 782 &starting_rs, &where); 783 784 if (curr == NULL) 785 curr = zfs_btree_next(&removefrom->rt_root, &where, &where); 786 787 zfs_range_seg_t *next; 788 for (; curr != NULL; curr = next) { 789 if (start == end) 790 return; 791 VERIFY3U(start, <, end); 792 793 /* there is no overlap */ 794 if (end <= zfs_rs_get_start(curr, removefrom)) { 795 zfs_range_tree_add(addto, start, end - start); 796 return; 797 } 798 799 uint64_t overlap_start = MAX(zfs_rs_get_start(curr, removefrom), 800 start); 801 uint64_t overlap_end = MIN(zfs_rs_get_end(curr, removefrom), 802 end); 803 uint64_t overlap_size = overlap_end - overlap_start; 804 ASSERT3S(overlap_size, >, 0); 805 zfs_range_seg_max_t rs; 806 zfs_rs_copy(curr, &rs, removefrom); 807 808 zfs_range_tree_remove(removefrom, overlap_start, overlap_size); 809 810 if (start < overlap_start) 811 zfs_range_tree_add(addto, start, overlap_start - start); 812 813 start = overlap_end; 814 next = zfs_btree_find(&removefrom->rt_root, &rs, &where); 815 /* 816 * If we find something here, we only removed part of the 817 * curr segment. Either there's some left at the end 818 * because we've reached the end of the range we're removing, 819 * or there's some left at the start because we started 820 * partway through the range. Either way, we continue with 821 * the loop. If it's the former, we'll return at the start of 822 * the loop, and if it's the latter we'll see if there is more 823 * area to process. 824 */ 825 if (next != NULL) { 826 ASSERT(start == end || start == zfs_rs_get_end(&rs, 827 removefrom)); 828 } 829 830 next = zfs_btree_next(&removefrom->rt_root, &where, &where); 831 } 832 VERIFY3P(curr, ==, NULL); 833 834 if (start != end) { 835 VERIFY3U(start, <, end); 836 zfs_range_tree_add(addto, start, end - start); 837 } else { 838 VERIFY3U(start, ==, end); 839 } 840 } 841 842 /* 843 * For each entry in rt, if it exists in removefrom, remove it 844 * from removefrom. Otherwise, add it to addto. 845 */ 846 void 847 zfs_range_tree_remove_xor_add(zfs_range_tree_t *rt, 848 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 849 { 850 zfs_btree_index_t where; 851 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; 852 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 853 zfs_range_tree_remove_xor_add_segment(zfs_rs_get_start(rs, rt), 854 zfs_rs_get_end(rs, rt), removefrom, addto); 855 } 856 } 857 858 uint64_t 859 zfs_range_tree_min(zfs_range_tree_t *rt) 860 { 861 zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); 862 return (rs != NULL ? zfs_rs_get_start(rs, rt) : 0); 863 } 864 865 uint64_t 866 zfs_range_tree_max(zfs_range_tree_t *rt) 867 { 868 zfs_range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); 869 return (rs != NULL ? zfs_rs_get_end(rs, rt) : 0); 870 } 871 872 uint64_t 873 zfs_range_tree_span(zfs_range_tree_t *rt) 874 { 875 return (zfs_range_tree_max(rt) - zfs_range_tree_min(rt)); 876 } 877