1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 27 * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 #include <sys/zfs_context.h> 31 #include <sys/spa.h> 32 #include <sys/dmu.h> 33 #include <sys/dnode.h> 34 #include <sys/zio.h> 35 #include <sys/range_tree.h> 36 37 /* 38 * Range trees are tree-based data structures that can be used to 39 * track free space or generally any space allocation information. 40 * A range tree keeps track of individual segments and automatically 41 * provides facilities such as adjacent extent merging and extent 42 * splitting in response to range add/remove requests. 43 * 44 * A range tree starts out completely empty, with no segments in it. 45 * Adding an allocation via range_tree_add to the range tree can either: 46 * 1) create a new extent 47 * 2) extend an adjacent extent 48 * 3) merge two adjacent extents 49 * Conversely, removing an allocation via range_tree_remove can: 50 * 1) completely remove an extent 51 * 2) shorten an extent (if the allocation was near one of its ends) 52 * 3) split an extent into two extents, in effect punching a hole 53 * 54 * A range tree is also capable of 'bridging' gaps when adding 55 * allocations. This is useful for cases when close proximity of 56 * allocations is an important detail that needs to be represented 57 * in the range tree. See range_tree_set_gap(). The default behavior 58 * is not to bridge gaps (i.e. the maximum allowed gap size is 0). 59 * 60 * In order to traverse a range tree, use either the range_tree_walk() 61 * or range_tree_vacate() functions. 62 * 63 * To obtain more accurate information on individual segment 64 * operations that the range tree performs "under the hood", you can 65 * specify a set of callbacks by passing a range_tree_ops_t structure 66 * to the range_tree_create function. Any callbacks that are non-NULL 67 * are then called at the appropriate times. 68 * 69 * The range tree code also supports a special variant of range trees 70 * that can bridge small gaps between segments. This kind of tree is used 71 * by the dsl scanning code to group I/Os into mostly sequential chunks to 72 * optimize disk performance. The code here attempts to do this with as 73 * little memory and computational overhead as possible. One limitation of 74 * this implementation is that segments of range trees with gaps can only 75 * support removing complete segments. 76 */ 77 78 static inline void 79 rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt) 80 { 81 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 82 size_t size = 0; 83 switch (rt->rt_type) { 84 case RANGE_SEG32: 85 size = sizeof (range_seg32_t); 86 break; 87 case RANGE_SEG64: 88 size = sizeof (range_seg64_t); 89 break; 90 case RANGE_SEG_GAP: 91 size = sizeof (range_seg_gap_t); 92 break; 93 default: 94 VERIFY(0); 95 } 96 bcopy(src, dest, size); 97 } 98 99 void 100 range_tree_stat_verify(range_tree_t *rt) 101 { 102 range_seg_t *rs; 103 zfs_btree_index_t where; 104 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 105 int i; 106 107 for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; 108 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 109 uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 110 int idx = highbit64(size) - 1; 111 112 hist[idx]++; 113 ASSERT3U(hist[idx], !=, 0); 114 } 115 116 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { 117 if (hist[i] != rt->rt_histogram[i]) { 118 zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu", 119 i, hist, hist[i], rt->rt_histogram[i]); 120 } 121 VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 122 } 123 } 124 125 static void 126 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) 127 { 128 uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 129 int idx = highbit64(size) - 1; 130 131 ASSERT(size != 0); 132 ASSERT3U(idx, <, 133 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 134 135 rt->rt_histogram[idx]++; 136 ASSERT3U(rt->rt_histogram[idx], !=, 0); 137 } 138 139 static void 140 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) 141 { 142 uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 143 int idx = highbit64(size) - 1; 144 145 ASSERT(size != 0); 146 ASSERT3U(idx, <, 147 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 148 149 ASSERT3U(rt->rt_histogram[idx], !=, 0); 150 rt->rt_histogram[idx]--; 151 } 152 153 static int 154 range_tree_seg32_compare(const void *x1, const void *x2) 155 { 156 const range_seg32_t *r1 = x1; 157 const range_seg32_t *r2 = x2; 158 159 ASSERT3U(r1->rs_start, <=, r1->rs_end); 160 ASSERT3U(r2->rs_start, <=, r2->rs_end); 161 162 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 163 } 164 165 static int 166 range_tree_seg64_compare(const void *x1, const void *x2) 167 { 168 const range_seg64_t *r1 = x1; 169 const range_seg64_t *r2 = x2; 170 171 ASSERT3U(r1->rs_start, <=, r1->rs_end); 172 ASSERT3U(r2->rs_start, <=, r2->rs_end); 173 174 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 175 } 176 177 static int 178 range_tree_seg_gap_compare(const void *x1, const void *x2) 179 { 180 const range_seg_gap_t *r1 = x1; 181 const range_seg_gap_t *r2 = x2; 182 183 ASSERT3U(r1->rs_start, <=, r1->rs_end); 184 ASSERT3U(r2->rs_start, <=, r2->rs_end); 185 186 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 187 } 188 189 range_tree_t * 190 range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg, 191 uint64_t start, uint64_t shift, 192 int (*zfs_btree_compare) (const void *, const void *), 193 uint64_t gap) 194 { 195 range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); 196 197 ASSERT3U(shift, <, 64); 198 ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES); 199 size_t size; 200 int (*compare) (const void *, const void *); 201 switch (type) { 202 case RANGE_SEG32: 203 size = sizeof (range_seg32_t); 204 compare = range_tree_seg32_compare; 205 break; 206 case RANGE_SEG64: 207 size = sizeof (range_seg64_t); 208 compare = range_tree_seg64_compare; 209 break; 210 case RANGE_SEG_GAP: 211 size = sizeof (range_seg_gap_t); 212 compare = range_tree_seg_gap_compare; 213 break; 214 default: 215 panic("Invalid range seg type %d", type); 216 } 217 zfs_btree_create(&rt->rt_root, compare, size); 218 219 rt->rt_ops = ops; 220 rt->rt_gap = gap; 221 rt->rt_arg = arg; 222 rt->rt_type = type; 223 rt->rt_start = start; 224 rt->rt_shift = shift; 225 rt->rt_btree_compare = zfs_btree_compare; 226 227 if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) 228 rt->rt_ops->rtop_create(rt, rt->rt_arg); 229 230 return (rt); 231 } 232 233 range_tree_t * 234 range_tree_create(range_tree_ops_t *ops, range_seg_type_t type, 235 void *arg, uint64_t start, uint64_t shift) 236 { 237 return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0)); 238 } 239 240 void 241 range_tree_destroy(range_tree_t *rt) 242 { 243 VERIFY0(rt->rt_space); 244 245 if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) 246 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 247 248 zfs_btree_destroy(&rt->rt_root); 249 kmem_free(rt, sizeof (*rt)); 250 } 251 252 void 253 range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta) 254 { 255 if (delta < 0 && delta * -1 >= rs_get_fill(rs, rt)) { 256 zfs_panic_recover("zfs: attempting to decrease fill to or " 257 "below 0; probable double remove in segment [%llx:%llx]", 258 (longlong_t)rs_get_start(rs, rt), 259 (longlong_t)rs_get_end(rs, rt)); 260 } 261 if (rs_get_fill(rs, rt) + delta > rs_get_end(rs, rt) - 262 rs_get_start(rs, rt)) { 263 zfs_panic_recover("zfs: attempting to increase fill beyond " 264 "max; probable double add in segment [%llx:%llx]", 265 (longlong_t)rs_get_start(rs, rt), 266 (longlong_t)rs_get_end(rs, rt)); 267 } 268 269 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 270 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 271 rs_set_fill(rs, rt, rs_get_fill(rs, rt) + delta); 272 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 273 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 274 } 275 276 static void 277 range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) 278 { 279 range_tree_t *rt = arg; 280 zfs_btree_index_t where; 281 range_seg_t *rs_before, *rs_after, *rs; 282 range_seg_max_t tmp, rsearch; 283 uint64_t end = start + size, gap = rt->rt_gap; 284 uint64_t bridge_size = 0; 285 boolean_t merge_before, merge_after; 286 287 ASSERT3U(size, !=, 0); 288 ASSERT3U(fill, <=, size); 289 ASSERT3U(start + size, >, start); 290 291 rs_set_start(&rsearch, rt, start); 292 rs_set_end(&rsearch, rt, end); 293 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 294 295 /* 296 * If this is a gap-supporting range tree, it is possible that we 297 * are inserting into an existing segment. In this case simply 298 * bump the fill count and call the remove / add callbacks. If the 299 * new range will extend an existing segment, we remove the 300 * existing one, apply the new extent to it and re-insert it using 301 * the normal code paths. 302 */ 303 if (rs != NULL) { 304 if (gap == 0) { 305 zfs_panic_recover("zfs: adding existent segment to " 306 "range tree (offset=%llx size=%llx)", 307 (longlong_t)start, (longlong_t)size); 308 return; 309 } 310 uint64_t rstart = rs_get_start(rs, rt); 311 uint64_t rend = rs_get_end(rs, rt); 312 if (rstart <= start && rend >= end) { 313 range_tree_adjust_fill(rt, rs, fill); 314 return; 315 } 316 317 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 318 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 319 320 range_tree_stat_decr(rt, rs); 321 rt->rt_space -= rend - rstart; 322 323 fill += rs_get_fill(rs, rt); 324 start = MIN(start, rstart); 325 end = MAX(end, rend); 326 size = end - start; 327 328 zfs_btree_remove(&rt->rt_root, rs); 329 range_tree_add_impl(rt, start, size, fill); 330 return; 331 } 332 333 ASSERT3P(rs, ==, NULL); 334 335 /* 336 * Determine whether or not we will have to merge with our neighbors. 337 * If gap != 0, we might need to merge with our neighbors even if we 338 * aren't directly touching. 339 */ 340 zfs_btree_index_t where_before, where_after; 341 rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); 342 rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); 343 344 merge_before = (rs_before != NULL && rs_get_end(rs_before, rt) >= 345 start - gap); 346 merge_after = (rs_after != NULL && rs_get_start(rs_after, rt) <= end + 347 gap); 348 349 if (merge_before && gap != 0) 350 bridge_size += start - rs_get_end(rs_before, rt); 351 if (merge_after && gap != 0) 352 bridge_size += rs_get_start(rs_after, rt) - end; 353 354 if (merge_before && merge_after) { 355 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { 356 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 357 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 358 } 359 360 range_tree_stat_decr(rt, rs_before); 361 range_tree_stat_decr(rt, rs_after); 362 363 rs_copy(rs_after, &tmp, rt); 364 uint64_t before_start = rs_get_start_raw(rs_before, rt); 365 uint64_t before_fill = rs_get_fill(rs_before, rt); 366 uint64_t after_fill = rs_get_fill(rs_after, rt); 367 zfs_btree_remove_idx(&rt->rt_root, &where_before); 368 369 /* 370 * We have to re-find the node because our old reference is 371 * invalid as soon as we do any mutating btree operations. 372 */ 373 rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); 374 rs_set_start_raw(rs_after, rt, before_start); 375 rs_set_fill(rs_after, rt, after_fill + before_fill + fill); 376 rs = rs_after; 377 } else if (merge_before) { 378 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 379 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 380 381 range_tree_stat_decr(rt, rs_before); 382 383 uint64_t before_fill = rs_get_fill(rs_before, rt); 384 rs_set_end(rs_before, rt, end); 385 rs_set_fill(rs_before, rt, before_fill + fill); 386 rs = rs_before; 387 } else if (merge_after) { 388 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 389 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 390 391 range_tree_stat_decr(rt, rs_after); 392 393 uint64_t after_fill = rs_get_fill(rs_after, rt); 394 rs_set_start(rs_after, rt, start); 395 rs_set_fill(rs_after, rt, after_fill + fill); 396 rs = rs_after; 397 } else { 398 rs = &tmp; 399 400 rs_set_start(rs, rt, start); 401 rs_set_end(rs, rt, end); 402 rs_set_fill(rs, rt, fill); 403 zfs_btree_add_idx(&rt->rt_root, rs, &where); 404 } 405 406 if (gap != 0) { 407 ASSERT3U(rs_get_fill(rs, rt), <=, rs_get_end(rs, rt) - 408 rs_get_start(rs, rt)); 409 } else { 410 ASSERT3U(rs_get_fill(rs, rt), ==, rs_get_end(rs, rt) - 411 rs_get_start(rs, rt)); 412 } 413 414 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 415 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 416 417 range_tree_stat_incr(rt, rs); 418 rt->rt_space += size + bridge_size; 419 } 420 421 void 422 range_tree_add(void *arg, uint64_t start, uint64_t size) 423 { 424 range_tree_add_impl(arg, start, size, size); 425 } 426 427 static void 428 range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, 429 boolean_t do_fill) 430 { 431 zfs_btree_index_t where; 432 range_seg_t *rs; 433 range_seg_max_t rsearch, rs_tmp; 434 uint64_t end = start + size; 435 boolean_t left_over, right_over; 436 437 VERIFY3U(size, !=, 0); 438 VERIFY3U(size, <=, rt->rt_space); 439 if (rt->rt_type == RANGE_SEG64) 440 ASSERT3U(start + size, >, start); 441 442 rs_set_start(&rsearch, rt, start); 443 rs_set_end(&rsearch, rt, end); 444 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 445 446 /* Make sure we completely overlap with someone */ 447 if (rs == NULL) { 448 zfs_panic_recover("zfs: removing nonexistent segment from " 449 "range tree (offset=%llx size=%llx)", 450 (longlong_t)start, (longlong_t)size); 451 return; 452 } 453 454 /* 455 * Range trees with gap support must only remove complete segments 456 * from the tree. This allows us to maintain accurate fill accounting 457 * and to ensure that bridged sections are not leaked. If we need to 458 * remove less than the full segment, we can only adjust the fill count. 459 */ 460 if (rt->rt_gap != 0) { 461 if (do_fill) { 462 if (rs_get_fill(rs, rt) == size) { 463 start = rs_get_start(rs, rt); 464 end = rs_get_end(rs, rt); 465 size = end - start; 466 } else { 467 range_tree_adjust_fill(rt, rs, -size); 468 return; 469 } 470 } else if (rs_get_start(rs, rt) != start || 471 rs_get_end(rs, rt) != end) { 472 zfs_panic_recover("zfs: freeing partial segment of " 473 "gap tree (offset=%llx size=%llx) of " 474 "(offset=%llx size=%llx)", 475 (longlong_t)start, (longlong_t)size, 476 (longlong_t)rs_get_start(rs, rt), 477 (longlong_t)rs_get_end(rs, rt) - rs_get_start(rs, 478 rt)); 479 return; 480 } 481 } 482 483 VERIFY3U(rs_get_start(rs, rt), <=, start); 484 VERIFY3U(rs_get_end(rs, rt), >=, end); 485 486 left_over = (rs_get_start(rs, rt) != start); 487 right_over = (rs_get_end(rs, rt) != end); 488 489 range_tree_stat_decr(rt, rs); 490 491 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 492 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 493 494 if (left_over && right_over) { 495 range_seg_max_t newseg; 496 rs_set_start(&newseg, rt, end); 497 rs_set_end_raw(&newseg, rt, rs_get_end_raw(rs, rt)); 498 rs_set_fill(&newseg, rt, rs_get_end(rs, rt) - end); 499 range_tree_stat_incr(rt, &newseg); 500 501 // This modifies the buffer already inside the range tree 502 rs_set_end(rs, rt, start); 503 504 rs_copy(rs, &rs_tmp, rt); 505 if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) 506 zfs_btree_add_idx(&rt->rt_root, &newseg, &where); 507 else 508 zfs_btree_add(&rt->rt_root, &newseg); 509 510 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 511 rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); 512 } else if (left_over) { 513 // This modifies the buffer already inside the range tree 514 rs_set_end(rs, rt, start); 515 rs_copy(rs, &rs_tmp, rt); 516 } else if (right_over) { 517 // This modifies the buffer already inside the range tree 518 rs_set_start(rs, rt, end); 519 rs_copy(rs, &rs_tmp, rt); 520 } else { 521 zfs_btree_remove_idx(&rt->rt_root, &where); 522 rs = NULL; 523 } 524 525 if (rs != NULL) { 526 /* 527 * The fill of the leftover segment will always be equal to 528 * the size, since we do not support removing partial segments 529 * of range trees with gaps. 530 */ 531 rs_set_fill_raw(rs, rt, rs_get_end_raw(rs, rt) - 532 rs_get_start_raw(rs, rt)); 533 range_tree_stat_incr(rt, &rs_tmp); 534 535 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 536 rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); 537 } 538 539 rt->rt_space -= size; 540 } 541 542 void 543 range_tree_remove(void *arg, uint64_t start, uint64_t size) 544 { 545 range_tree_remove_impl(arg, start, size, B_FALSE); 546 } 547 548 void 549 range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size) 550 { 551 range_tree_remove_impl(rt, start, size, B_TRUE); 552 } 553 554 void 555 range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, 556 uint64_t newstart, uint64_t newsize) 557 { 558 int64_t delta = newsize - (rs_get_end(rs, rt) - rs_get_start(rs, rt)); 559 560 range_tree_stat_decr(rt, rs); 561 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 562 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 563 564 rs_set_start(rs, rt, newstart); 565 rs_set_end(rs, rt, newstart + newsize); 566 567 range_tree_stat_incr(rt, rs); 568 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 569 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 570 571 rt->rt_space += delta; 572 } 573 574 static range_seg_t * 575 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) 576 { 577 range_seg_max_t rsearch; 578 uint64_t end = start + size; 579 580 VERIFY(size != 0); 581 582 rs_set_start(&rsearch, rt, start); 583 rs_set_end(&rsearch, rt, end); 584 return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); 585 } 586 587 range_seg_t * 588 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) 589 { 590 if (rt->rt_type == RANGE_SEG64) 591 ASSERT3U(start + size, >, start); 592 593 range_seg_t *rs = range_tree_find_impl(rt, start, size); 594 if (rs != NULL && rs_get_start(rs, rt) <= start && 595 rs_get_end(rs, rt) >= start + size) { 596 return (rs); 597 } 598 return (NULL); 599 } 600 601 void 602 range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size) 603 { 604 range_seg_t *rs = range_tree_find(rt, off, size); 605 if (rs != NULL) 606 panic("segment already in tree; rs=%p", (void *)rs); 607 } 608 609 boolean_t 610 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) 611 { 612 return (range_tree_find(rt, start, size) != NULL); 613 } 614 615 /* 616 * Returns the first subset of the given range which overlaps with the range 617 * tree. Returns true if there is a segment in the range, and false if there 618 * isn't. 619 */ 620 boolean_t 621 range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, 622 uint64_t *ostart, uint64_t *osize) 623 { 624 if (rt->rt_type == RANGE_SEG64) 625 ASSERT3U(start + size, >, start); 626 627 range_seg_max_t rsearch; 628 rs_set_start(&rsearch, rt, start); 629 rs_set_end_raw(&rsearch, rt, rs_get_start_raw(&rsearch, rt) + 1); 630 631 zfs_btree_index_t where; 632 range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 633 if (rs != NULL) { 634 *ostart = start; 635 *osize = MIN(size, rs_get_end(rs, rt) - start); 636 return (B_TRUE); 637 } 638 639 rs = zfs_btree_next(&rt->rt_root, &where, &where); 640 if (rs == NULL || rs_get_start(rs, rt) > start + size) 641 return (B_FALSE); 642 643 *ostart = rs_get_start(rs, rt); 644 *osize = MIN(start + size, rs_get_end(rs, rt)) - 645 rs_get_start(rs, rt); 646 return (B_TRUE); 647 } 648 649 /* 650 * Ensure that this range is not in the tree, regardless of whether 651 * it is currently in the tree. 652 */ 653 void 654 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) 655 { 656 range_seg_t *rs; 657 658 if (size == 0) 659 return; 660 661 if (rt->rt_type == RANGE_SEG64) 662 ASSERT3U(start + size, >, start); 663 664 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { 665 uint64_t free_start = MAX(rs_get_start(rs, rt), start); 666 uint64_t free_end = MIN(rs_get_end(rs, rt), start + size); 667 range_tree_remove(rt, free_start, free_end - free_start); 668 } 669 } 670 671 void 672 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) 673 { 674 range_tree_t *rt; 675 676 ASSERT0(range_tree_space(*rtdst)); 677 ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); 678 679 rt = *rtsrc; 680 *rtsrc = *rtdst; 681 *rtdst = rt; 682 } 683 684 void 685 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) 686 { 687 if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) 688 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 689 690 if (func != NULL) { 691 range_seg_t *rs; 692 zfs_btree_index_t *cookie = NULL; 693 694 while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != 695 NULL) { 696 func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - 697 rs_get_start(rs, rt)); 698 } 699 } else { 700 zfs_btree_clear(&rt->rt_root); 701 } 702 703 bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); 704 rt->rt_space = 0; 705 } 706 707 void 708 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) 709 { 710 zfs_btree_index_t where; 711 for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); 712 rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 713 func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - 714 rs_get_start(rs, rt)); 715 } 716 } 717 718 range_seg_t * 719 range_tree_first(range_tree_t *rt) 720 { 721 return (zfs_btree_first(&rt->rt_root, NULL)); 722 } 723 724 uint64_t 725 range_tree_space(range_tree_t *rt) 726 { 727 return (rt->rt_space); 728 } 729 730 uint64_t 731 range_tree_numsegs(range_tree_t *rt) 732 { 733 return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); 734 } 735 736 boolean_t 737 range_tree_is_empty(range_tree_t *rt) 738 { 739 ASSERT(rt != NULL); 740 return (range_tree_space(rt) == 0); 741 } 742 743 /* ARGSUSED */ 744 void 745 rt_btree_create(range_tree_t *rt, void *arg) 746 { 747 zfs_btree_t *size_tree = arg; 748 749 size_t size; 750 switch (rt->rt_type) { 751 case RANGE_SEG32: 752 size = sizeof (range_seg32_t); 753 break; 754 case RANGE_SEG64: 755 size = sizeof (range_seg64_t); 756 break; 757 case RANGE_SEG_GAP: 758 size = sizeof (range_seg_gap_t); 759 break; 760 default: 761 panic("Invalid range seg type %d", rt->rt_type); 762 } 763 zfs_btree_create(size_tree, rt->rt_btree_compare, size); 764 } 765 766 /* ARGSUSED */ 767 void 768 rt_btree_destroy(range_tree_t *rt, void *arg) 769 { 770 zfs_btree_t *size_tree = arg; 771 ASSERT0(zfs_btree_numnodes(size_tree)); 772 773 zfs_btree_destroy(size_tree); 774 } 775 776 /* ARGSUSED */ 777 void 778 rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg) 779 { 780 zfs_btree_t *size_tree = arg; 781 782 zfs_btree_add(size_tree, rs); 783 } 784 785 /* ARGSUSED */ 786 void 787 rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg) 788 { 789 zfs_btree_t *size_tree = arg; 790 791 zfs_btree_remove(size_tree, rs); 792 } 793 794 /* ARGSUSED */ 795 void 796 rt_btree_vacate(range_tree_t *rt, void *arg) 797 { 798 zfs_btree_t *size_tree = arg; 799 zfs_btree_clear(size_tree); 800 zfs_btree_destroy(size_tree); 801 802 rt_btree_create(rt, arg); 803 } 804 805 range_tree_ops_t rt_btree_ops = { 806 .rtop_create = rt_btree_create, 807 .rtop_destroy = rt_btree_destroy, 808 .rtop_add = rt_btree_add, 809 .rtop_remove = rt_btree_remove, 810 .rtop_vacate = rt_btree_vacate 811 }; 812 813 /* 814 * Remove any overlapping ranges between the given segment [start, end) 815 * from removefrom. Add non-overlapping leftovers to addto. 816 */ 817 void 818 range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 819 range_tree_t *removefrom, range_tree_t *addto) 820 { 821 zfs_btree_index_t where; 822 range_seg_max_t starting_rs; 823 rs_set_start(&starting_rs, removefrom, start); 824 rs_set_end_raw(&starting_rs, removefrom, rs_get_start_raw(&starting_rs, 825 removefrom) + 1); 826 827 range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, 828 &starting_rs, &where); 829 830 if (curr == NULL) 831 curr = zfs_btree_next(&removefrom->rt_root, &where, &where); 832 833 range_seg_t *next; 834 for (; curr != NULL; curr = next) { 835 if (start == end) 836 return; 837 VERIFY3U(start, <, end); 838 839 /* there is no overlap */ 840 if (end <= rs_get_start(curr, removefrom)) { 841 range_tree_add(addto, start, end - start); 842 return; 843 } 844 845 uint64_t overlap_start = MAX(rs_get_start(curr, removefrom), 846 start); 847 uint64_t overlap_end = MIN(rs_get_end(curr, removefrom), 848 end); 849 uint64_t overlap_size = overlap_end - overlap_start; 850 ASSERT3S(overlap_size, >, 0); 851 range_seg_max_t rs; 852 rs_copy(curr, &rs, removefrom); 853 854 range_tree_remove(removefrom, overlap_start, overlap_size); 855 856 if (start < overlap_start) 857 range_tree_add(addto, start, overlap_start - start); 858 859 start = overlap_end; 860 next = zfs_btree_find(&removefrom->rt_root, &rs, &where); 861 /* 862 * If we find something here, we only removed part of the 863 * curr segment. Either there's some left at the end 864 * because we've reached the end of the range we're removing, 865 * or there's some left at the start because we started 866 * partway through the range. Either way, we continue with 867 * the loop. If it's the former, we'll return at the start of 868 * the loop, and if it's the latter we'll see if there is more 869 * area to process. 870 */ 871 if (next != NULL) { 872 ASSERT(start == end || start == rs_get_end(&rs, 873 removefrom)); 874 } 875 876 next = zfs_btree_next(&removefrom->rt_root, &where, &where); 877 } 878 VERIFY3P(curr, ==, NULL); 879 880 if (start != end) { 881 VERIFY3U(start, <, end); 882 range_tree_add(addto, start, end - start); 883 } else { 884 VERIFY3U(start, ==, end); 885 } 886 } 887 888 /* 889 * For each entry in rt, if it exists in removefrom, remove it 890 * from removefrom. Otherwise, add it to addto. 891 */ 892 void 893 range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, 894 range_tree_t *addto) 895 { 896 zfs_btree_index_t where; 897 for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; 898 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 899 range_tree_remove_xor_add_segment(rs_get_start(rs, rt), 900 rs_get_end(rs, rt), removefrom, addto); 901 } 902 } 903 904 uint64_t 905 range_tree_min(range_tree_t *rt) 906 { 907 range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); 908 return (rs != NULL ? rs_get_start(rs, rt) : 0); 909 } 910 911 uint64_t 912 range_tree_max(range_tree_t *rt) 913 { 914 range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); 915 return (rs != NULL ? rs_get_end(rs, rt) : 0); 916 } 917 918 uint64_t 919 range_tree_span(range_tree_t *rt) 920 { 921 return (range_tree_max(rt) - range_tree_min(rt)); 922 } 923