1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 27 * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 #include <sys/zfs_context.h> 31 #include <sys/spa.h> 32 #include <sys/dmu.h> 33 #include <sys/dnode.h> 34 #include <sys/zio.h> 35 #include <sys/range_tree.h> 36 37 /* 38 * Range trees are tree-based data structures that can be used to 39 * track free space or generally any space allocation information. 40 * A range tree keeps track of individual segments and automatically 41 * provides facilities such as adjacent extent merging and extent 42 * splitting in response to range add/remove requests. 43 * 44 * A range tree starts out completely empty, with no segments in it. 45 * Adding an allocation via zfs_range_tree_add to the range tree can either: 46 * 1) create a new extent 47 * 2) extend an adjacent extent 48 * 3) merge two adjacent extents 49 * Conversely, removing an allocation via zfs_range_tree_remove can: 50 * 1) completely remove an extent 51 * 2) shorten an extent (if the allocation was near one of its ends) 52 * 3) split an extent into two extents, in effect punching a hole 53 * 54 * A range tree is also capable of 'bridging' gaps when adding 55 * allocations. This is useful for cases when close proximity of 56 * allocations is an important detail that needs to be represented 57 * in the range tree. See zfs_range_tree_set_gap(). The default behavior 58 * is not to bridge gaps (i.e. the maximum allowed gap size is 0). 59 * 60 * In order to traverse a range tree, use either the zfs_range_tree_walk() 61 * or zfs_range_tree_vacate() functions. 62 * 63 * To obtain more accurate information on individual segment 64 * operations that the range tree performs "under the hood", you can 65 * specify a set of callbacks by passing a zfs_range_tree_ops_t structure 66 * to the zfs_range_tree_create function. Any callbacks that are non-NULL 67 * are then called at the appropriate times. 68 * 69 * The range tree code also supports a special variant of range trees 70 * that can bridge small gaps between segments. This kind of tree is used 71 * by the dsl scanning code to group I/Os into mostly sequential chunks to 72 * optimize disk performance. The code here attempts to do this with as 73 * little memory and computational overhead as possible. One limitation of 74 * this implementation is that segments of range trees with gaps can only 75 * support removing complete segments. 76 */ 77 78 static inline void 79 zfs_rs_copy(zfs_range_seg_t *src, zfs_range_seg_t *dest, zfs_range_tree_t *rt) 80 { 81 ASSERT3U(rt->rt_type, <, ZFS_RANGE_SEG_NUM_TYPES); 82 size_t size = 0; 83 switch (rt->rt_type) { 84 case ZFS_RANGE_SEG32: 85 size = sizeof (zfs_range_seg32_t); 86 break; 87 case ZFS_RANGE_SEG64: 88 size = sizeof (zfs_range_seg64_t); 89 break; 90 case ZFS_RANGE_SEG_GAP: 91 size = sizeof (zfs_range_seg_gap_t); 92 break; 93 default: 94 __builtin_unreachable(); 95 } 96 memcpy(dest, src, size); 97 } 98 99 void 100 zfs_range_tree_stat_verify(zfs_range_tree_t *rt) 101 { 102 zfs_range_seg_t *rs; 103 zfs_btree_index_t where; 104 uint64_t hist[ZFS_RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 105 int i; 106 107 for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; 108 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 109 uint64_t size = zfs_rs_get_end(rs, rt) - 110 zfs_rs_get_start(rs, rt); 111 int idx = highbit64(size) - 1; 112 113 hist[idx]++; 114 ASSERT3U(hist[idx], !=, 0); 115 } 116 117 for (i = 0; i < ZFS_RANGE_TREE_HISTOGRAM_SIZE; i++) { 118 if (hist[i] != rt->rt_histogram[i]) { 119 zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu", 120 i, hist, (u_longlong_t)hist[i], 121 (u_longlong_t)rt->rt_histogram[i]); 122 } 123 VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 124 } 125 } 126 127 static void 128 zfs_range_tree_stat_incr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 129 { 130 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 131 int idx = highbit64(size) - 1; 132 133 ASSERT(size != 0); 134 ASSERT3U(idx, <, 135 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 136 137 rt->rt_histogram[idx]++; 138 ASSERT3U(rt->rt_histogram[idx], !=, 0); 139 } 140 141 static void 142 zfs_range_tree_stat_decr(zfs_range_tree_t *rt, zfs_range_seg_t *rs) 143 { 144 uint64_t size = zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt); 145 int idx = highbit64(size) - 1; 146 147 ASSERT(size != 0); 148 ASSERT3U(idx, <, 149 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 150 151 ASSERT3U(rt->rt_histogram[idx], !=, 0); 152 rt->rt_histogram[idx]--; 153 } 154 155 __attribute__((always_inline)) inline 156 static int 157 zfs_range_tree_seg32_compare(const void *x1, const void *x2) 158 { 159 const zfs_range_seg32_t *r1 = x1; 160 const zfs_range_seg32_t *r2 = x2; 161 162 ASSERT3U(r1->rs_start, <=, r1->rs_end); 163 ASSERT3U(r2->rs_start, <=, r2->rs_end); 164 165 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 166 } 167 168 __attribute__((always_inline)) inline 169 static int 170 zfs_range_tree_seg64_compare(const void *x1, const void *x2) 171 { 172 const zfs_range_seg64_t *r1 = x1; 173 const zfs_range_seg64_t *r2 = x2; 174 175 ASSERT3U(r1->rs_start, <=, r1->rs_end); 176 ASSERT3U(r2->rs_start, <=, r2->rs_end); 177 178 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 179 } 180 181 __attribute__((always_inline)) inline 182 static int 183 zfs_range_tree_seg_gap_compare(const void *x1, const void *x2) 184 { 185 const zfs_range_seg_gap_t *r1 = x1; 186 const zfs_range_seg_gap_t *r2 = x2; 187 188 ASSERT3U(r1->rs_start, <=, r1->rs_end); 189 ASSERT3U(r2->rs_start, <=, r2->rs_end); 190 191 return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 192 } 193 194 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg32_find_in_buf, zfs_range_seg32_t, 195 zfs_range_tree_seg32_compare) 196 197 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t, 198 zfs_range_tree_seg64_compare) 199 200 ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf, 201 zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare) 202 203 zfs_range_tree_t * 204 zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, 205 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, 206 uint64_t gap) 207 { 208 zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP); 209 210 ASSERT3U(shift, <, 64); 211 ASSERT3U(type, <=, ZFS_RANGE_SEG_NUM_TYPES); 212 size_t size; 213 int (*compare) (const void *, const void *); 214 bt_find_in_buf_f bt_find; 215 switch (type) { 216 case ZFS_RANGE_SEG32: 217 size = sizeof (zfs_range_seg32_t); 218 compare = zfs_range_tree_seg32_compare; 219 bt_find = zfs_range_tree_seg32_find_in_buf; 220 break; 221 case ZFS_RANGE_SEG64: 222 size = sizeof (zfs_range_seg64_t); 223 compare = zfs_range_tree_seg64_compare; 224 bt_find = zfs_range_tree_seg64_find_in_buf; 225 break; 226 case ZFS_RANGE_SEG_GAP: 227 size = sizeof (zfs_range_seg_gap_t); 228 compare = zfs_range_tree_seg_gap_compare; 229 bt_find = zfs_range_tree_seg_gap_find_in_buf; 230 break; 231 default: 232 panic("Invalid range seg type %d", type); 233 } 234 zfs_btree_create(&rt->rt_root, compare, bt_find, size); 235 236 rt->rt_ops = ops; 237 rt->rt_gap = gap; 238 rt->rt_arg = arg; 239 rt->rt_type = type; 240 rt->rt_start = start; 241 rt->rt_shift = shift; 242 243 if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) 244 rt->rt_ops->rtop_create(rt, rt->rt_arg); 245 246 return (rt); 247 } 248 249 zfs_range_tree_t * 250 zfs_range_tree_create(const zfs_range_tree_ops_t *ops, 251 zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) 252 { 253 return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0)); 254 } 255 256 void 257 zfs_range_tree_destroy(zfs_range_tree_t *rt) 258 { 259 VERIFY0(rt->rt_space); 260 261 if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) 262 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 263 264 zfs_btree_destroy(&rt->rt_root); 265 kmem_free(rt, sizeof (*rt)); 266 } 267 268 void 269 zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 270 int64_t delta) 271 { 272 if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) { 273 zfs_panic_recover("zfs: attempting to decrease fill to or " 274 "below 0; probable double remove in segment [%llx:%llx]", 275 (longlong_t)zfs_rs_get_start(rs, rt), 276 (longlong_t)zfs_rs_get_end(rs, rt)); 277 } 278 if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) - 279 zfs_rs_get_start(rs, rt)) { 280 zfs_panic_recover("zfs: attempting to increase fill beyond " 281 "max; probable double add in segment [%llx:%llx]", 282 (longlong_t)zfs_rs_get_start(rs, rt), 283 (longlong_t)zfs_rs_get_end(rs, rt)); 284 } 285 286 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 287 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 288 zfs_rs_set_fill(rs, rt, zfs_rs_get_fill(rs, rt) + delta); 289 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 290 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 291 } 292 293 static void 294 zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) 295 { 296 zfs_range_tree_t *rt = arg; 297 zfs_btree_index_t where; 298 zfs_range_seg_t *rs_before, *rs_after, *rs; 299 zfs_range_seg_max_t tmp, rsearch; 300 uint64_t end = start + size, gap = rt->rt_gap; 301 uint64_t bridge_size = 0; 302 boolean_t merge_before, merge_after; 303 304 ASSERT3U(size, !=, 0); 305 ASSERT3U(fill, <=, size); 306 ASSERT3U(start + size, >, start); 307 308 zfs_rs_set_start(&rsearch, rt, start); 309 zfs_rs_set_end(&rsearch, rt, end); 310 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 311 312 /* 313 * If this is a gap-supporting range tree, it is possible that we 314 * are inserting into an existing segment. In this case simply 315 * bump the fill count and call the remove / add callbacks. If the 316 * new range will extend an existing segment, we remove the 317 * existing one, apply the new extent to it and re-insert it using 318 * the normal code paths. 319 */ 320 if (rs != NULL) { 321 if (gap == 0) { 322 zfs_panic_recover("zfs: adding existent segment to " 323 "range tree (offset=%llx size=%llx)", 324 (longlong_t)start, (longlong_t)size); 325 return; 326 } 327 uint64_t rstart = zfs_rs_get_start(rs, rt); 328 uint64_t rend = zfs_rs_get_end(rs, rt); 329 if (rstart <= start && rend >= end) { 330 zfs_range_tree_adjust_fill(rt, rs, fill); 331 return; 332 } 333 334 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 335 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 336 337 zfs_range_tree_stat_decr(rt, rs); 338 rt->rt_space -= rend - rstart; 339 340 fill += zfs_rs_get_fill(rs, rt); 341 start = MIN(start, rstart); 342 end = MAX(end, rend); 343 size = end - start; 344 345 zfs_btree_remove(&rt->rt_root, rs); 346 zfs_range_tree_add_impl(rt, start, size, fill); 347 return; 348 } 349 350 ASSERT3P(rs, ==, NULL); 351 352 /* 353 * Determine whether or not we will have to merge with our neighbors. 354 * If gap != 0, we might need to merge with our neighbors even if we 355 * aren't directly touching. 356 */ 357 zfs_btree_index_t where_before, where_after; 358 rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); 359 rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); 360 361 merge_before = (rs_before != NULL && zfs_rs_get_end(rs_before, rt) >= 362 start - gap); 363 merge_after = (rs_after != NULL && zfs_rs_get_start(rs_after, rt) <= 364 end + gap); 365 366 if (merge_before && gap != 0) 367 bridge_size += start - zfs_rs_get_end(rs_before, rt); 368 if (merge_after && gap != 0) 369 bridge_size += zfs_rs_get_start(rs_after, rt) - end; 370 371 if (merge_before && merge_after) { 372 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { 373 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 374 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 375 } 376 377 zfs_range_tree_stat_decr(rt, rs_before); 378 zfs_range_tree_stat_decr(rt, rs_after); 379 380 zfs_rs_copy(rs_after, &tmp, rt); 381 uint64_t before_start = zfs_rs_get_start_raw(rs_before, rt); 382 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 383 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 384 zfs_btree_remove_idx(&rt->rt_root, &where_before); 385 386 /* 387 * We have to re-find the node because our old reference is 388 * invalid as soon as we do any mutating btree operations. 389 */ 390 rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); 391 ASSERT3P(rs_after, !=, NULL); 392 zfs_rs_set_start_raw(rs_after, rt, before_start); 393 zfs_rs_set_fill(rs_after, rt, after_fill + before_fill + fill); 394 rs = rs_after; 395 } else if (merge_before) { 396 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 397 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 398 399 zfs_range_tree_stat_decr(rt, rs_before); 400 401 uint64_t before_fill = zfs_rs_get_fill(rs_before, rt); 402 zfs_rs_set_end(rs_before, rt, end); 403 zfs_rs_set_fill(rs_before, rt, before_fill + fill); 404 rs = rs_before; 405 } else if (merge_after) { 406 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 407 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 408 409 zfs_range_tree_stat_decr(rt, rs_after); 410 411 uint64_t after_fill = zfs_rs_get_fill(rs_after, rt); 412 zfs_rs_set_start(rs_after, rt, start); 413 zfs_rs_set_fill(rs_after, rt, after_fill + fill); 414 rs = rs_after; 415 } else { 416 rs = &tmp; 417 418 zfs_rs_set_start(rs, rt, start); 419 zfs_rs_set_end(rs, rt, end); 420 zfs_rs_set_fill(rs, rt, fill); 421 zfs_btree_add_idx(&rt->rt_root, rs, &where); 422 } 423 424 if (gap != 0) { 425 ASSERT3U(zfs_rs_get_fill(rs, rt), <=, zfs_rs_get_end(rs, rt) - 426 zfs_rs_get_start(rs, rt)); 427 } else { 428 ASSERT3U(zfs_rs_get_fill(rs, rt), ==, zfs_rs_get_end(rs, rt) - 429 zfs_rs_get_start(rs, rt)); 430 } 431 432 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 433 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 434 435 zfs_range_tree_stat_incr(rt, rs); 436 rt->rt_space += size + bridge_size; 437 } 438 439 void 440 zfs_range_tree_add(void *arg, uint64_t start, uint64_t size) 441 { 442 zfs_range_tree_add_impl(arg, start, size, size); 443 } 444 445 static void 446 zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 447 boolean_t do_fill) 448 { 449 zfs_btree_index_t where; 450 zfs_range_seg_t *rs; 451 zfs_range_seg_max_t rsearch, rs_tmp; 452 uint64_t end = start + size; 453 boolean_t left_over, right_over; 454 455 VERIFY3U(size, !=, 0); 456 VERIFY3U(size, <=, rt->rt_space); 457 if (rt->rt_type == ZFS_RANGE_SEG64) 458 ASSERT3U(start + size, >, start); 459 460 zfs_rs_set_start(&rsearch, rt, start); 461 zfs_rs_set_end(&rsearch, rt, end); 462 rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 463 464 /* Make sure we completely overlap with someone */ 465 if (rs == NULL) { 466 zfs_panic_recover("zfs: removing nonexistent segment from " 467 "range tree (offset=%llx size=%llx)", 468 (longlong_t)start, (longlong_t)size); 469 return; 470 } 471 472 /* 473 * Range trees with gap support must only remove complete segments 474 * from the tree. This allows us to maintain accurate fill accounting 475 * and to ensure that bridged sections are not leaked. If we need to 476 * remove less than the full segment, we can only adjust the fill count. 477 */ 478 if (rt->rt_gap != 0) { 479 if (do_fill) { 480 if (zfs_rs_get_fill(rs, rt) == size) { 481 start = zfs_rs_get_start(rs, rt); 482 end = zfs_rs_get_end(rs, rt); 483 size = end - start; 484 } else { 485 zfs_range_tree_adjust_fill(rt, rs, -size); 486 return; 487 } 488 } else if (zfs_rs_get_start(rs, rt) != start || 489 zfs_rs_get_end(rs, rt) != end) { 490 zfs_panic_recover("zfs: freeing partial segment of " 491 "gap tree (offset=%llx size=%llx) of " 492 "(offset=%llx size=%llx)", 493 (longlong_t)start, (longlong_t)size, 494 (longlong_t)zfs_rs_get_start(rs, rt), 495 (longlong_t)zfs_rs_get_end(rs, rt) - 496 zfs_rs_get_start(rs, rt)); 497 return; 498 } 499 } 500 501 VERIFY3U(zfs_rs_get_start(rs, rt), <=, start); 502 VERIFY3U(zfs_rs_get_end(rs, rt), >=, end); 503 504 left_over = (zfs_rs_get_start(rs, rt) != start); 505 right_over = (zfs_rs_get_end(rs, rt) != end); 506 507 zfs_range_tree_stat_decr(rt, rs); 508 509 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 510 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 511 512 if (left_over && right_over) { 513 zfs_range_seg_max_t newseg; 514 zfs_rs_set_start(&newseg, rt, end); 515 zfs_rs_set_end_raw(&newseg, rt, zfs_rs_get_end_raw(rs, rt)); 516 zfs_rs_set_fill(&newseg, rt, zfs_rs_get_end(rs, rt) - end); 517 zfs_range_tree_stat_incr(rt, &newseg); 518 519 // This modifies the buffer already inside the range tree 520 zfs_rs_set_end(rs, rt, start); 521 522 zfs_rs_copy(rs, &rs_tmp, rt); 523 if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) 524 zfs_btree_add_idx(&rt->rt_root, &newseg, &where); 525 else 526 zfs_btree_add(&rt->rt_root, &newseg); 527 528 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 529 rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); 530 } else if (left_over) { 531 // This modifies the buffer already inside the range tree 532 zfs_rs_set_end(rs, rt, start); 533 zfs_rs_copy(rs, &rs_tmp, rt); 534 } else if (right_over) { 535 // This modifies the buffer already inside the range tree 536 zfs_rs_set_start(rs, rt, end); 537 zfs_rs_copy(rs, &rs_tmp, rt); 538 } else { 539 zfs_btree_remove_idx(&rt->rt_root, &where); 540 rs = NULL; 541 } 542 543 if (rs != NULL) { 544 /* 545 * The fill of the leftover segment will always be equal to 546 * the size, since we do not support removing partial segments 547 * of range trees with gaps. 548 */ 549 zfs_zfs_rs_set_fill_raw(rs, rt, zfs_rs_get_end_raw(rs, rt) - 550 zfs_rs_get_start_raw(rs, rt)); 551 zfs_range_tree_stat_incr(rt, &rs_tmp); 552 553 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 554 rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); 555 } 556 557 rt->rt_space -= size; 558 } 559 560 void 561 zfs_range_tree_remove(void *arg, uint64_t start, uint64_t size) 562 { 563 zfs_range_tree_remove_impl(arg, start, size, B_FALSE); 564 } 565 566 void 567 zfs_range_tree_remove_fill(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 568 { 569 zfs_range_tree_remove_impl(rt, start, size, B_TRUE); 570 } 571 572 void 573 zfs_range_tree_resize_segment(zfs_range_tree_t *rt, zfs_range_seg_t *rs, 574 uint64_t newstart, uint64_t newsize) 575 { 576 int64_t delta = newsize - (zfs_rs_get_end(rs, rt) - 577 zfs_rs_get_start(rs, rt)); 578 579 zfs_range_tree_stat_decr(rt, rs); 580 if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 581 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 582 583 zfs_rs_set_start(rs, rt, newstart); 584 zfs_rs_set_end(rs, rt, newstart + newsize); 585 586 zfs_range_tree_stat_incr(rt, rs); 587 if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 588 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 589 590 rt->rt_space += delta; 591 } 592 593 static zfs_range_seg_t * 594 zfs_range_tree_find_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 595 { 596 zfs_range_seg_max_t rsearch; 597 uint64_t end = start + size; 598 599 VERIFY(size != 0); 600 601 zfs_rs_set_start(&rsearch, rt, start); 602 zfs_rs_set_end(&rsearch, rt, end); 603 return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); 604 } 605 606 zfs_range_seg_t * 607 zfs_range_tree_find(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 608 { 609 if (rt->rt_type == ZFS_RANGE_SEG64) 610 ASSERT3U(start + size, >, start); 611 612 zfs_range_seg_t *rs = zfs_range_tree_find_impl(rt, start, size); 613 if (rs != NULL && zfs_rs_get_start(rs, rt) <= start && 614 zfs_rs_get_end(rs, rt) >= start + size) { 615 return (rs); 616 } 617 return (NULL); 618 } 619 620 void 621 zfs_range_tree_verify_not_present(zfs_range_tree_t *rt, uint64_t off, 622 uint64_t size) 623 { 624 zfs_range_seg_t *rs = zfs_range_tree_find(rt, off, size); 625 if (rs != NULL) 626 panic("segment already in tree; rs=%p", (void *)rs); 627 } 628 629 boolean_t 630 zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 631 { 632 return (zfs_range_tree_find(rt, start, size) != NULL); 633 } 634 635 /* 636 * Returns the first subset of the given range which overlaps with the range 637 * tree. Returns true if there is a segment in the range, and false if there 638 * isn't. 639 */ 640 boolean_t 641 zfs_range_tree_find_in(zfs_range_tree_t *rt, uint64_t start, uint64_t size, 642 uint64_t *ostart, uint64_t *osize) 643 { 644 if (rt->rt_type == ZFS_RANGE_SEG64) 645 ASSERT3U(start + size, >, start); 646 647 zfs_range_seg_max_t rsearch; 648 zfs_rs_set_start(&rsearch, rt, start); 649 zfs_rs_set_end_raw(&rsearch, rt, zfs_rs_get_start_raw(&rsearch, rt) + 650 1); 651 652 zfs_btree_index_t where; 653 zfs_range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 654 if (rs != NULL) { 655 *ostart = start; 656 *osize = MIN(size, zfs_rs_get_end(rs, rt) - start); 657 return (B_TRUE); 658 } 659 660 rs = zfs_btree_next(&rt->rt_root, &where, &where); 661 if (rs == NULL || zfs_rs_get_start(rs, rt) > start + size) 662 return (B_FALSE); 663 664 *ostart = zfs_rs_get_start(rs, rt); 665 *osize = MIN(start + size, zfs_rs_get_end(rs, rt)) - 666 zfs_rs_get_start(rs, rt); 667 return (B_TRUE); 668 } 669 670 /* 671 * Ensure that this range is not in the tree, regardless of whether 672 * it is currently in the tree. 673 */ 674 void 675 zfs_range_tree_clear(zfs_range_tree_t *rt, uint64_t start, uint64_t size) 676 { 677 zfs_range_seg_t *rs; 678 679 if (size == 0) 680 return; 681 682 if (rt->rt_type == ZFS_RANGE_SEG64) 683 ASSERT3U(start + size, >, start); 684 685 while ((rs = zfs_range_tree_find_impl(rt, start, size)) != NULL) { 686 uint64_t free_start = MAX(zfs_rs_get_start(rs, rt), start); 687 uint64_t free_end = MIN(zfs_rs_get_end(rs, rt), start + size); 688 zfs_range_tree_remove(rt, free_start, free_end - free_start); 689 } 690 } 691 692 void 693 zfs_range_tree_swap(zfs_range_tree_t **rtsrc, zfs_range_tree_t **rtdst) 694 { 695 zfs_range_tree_t *rt; 696 697 ASSERT0(zfs_range_tree_space(*rtdst)); 698 ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); 699 700 rt = *rtsrc; 701 *rtsrc = *rtdst; 702 *rtdst = rt; 703 } 704 705 void 706 zfs_range_tree_vacate(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 707 void *arg) 708 { 709 if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) 710 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 711 712 if (func != NULL) { 713 zfs_range_seg_t *rs; 714 zfs_btree_index_t *cookie = NULL; 715 716 while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != 717 NULL) { 718 func(arg, zfs_rs_get_start(rs, rt), 719 zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)); 720 } 721 } else { 722 zfs_btree_clear(&rt->rt_root); 723 } 724 725 memset(rt->rt_histogram, 0, sizeof (rt->rt_histogram)); 726 rt->rt_space = 0; 727 } 728 729 void 730 zfs_range_tree_walk(zfs_range_tree_t *rt, zfs_range_tree_func_t *func, 731 void *arg) 732 { 733 zfs_btree_index_t where; 734 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); 735 rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 736 func(arg, zfs_rs_get_start(rs, rt), zfs_rs_get_end(rs, rt) - 737 zfs_rs_get_start(rs, rt)); 738 } 739 } 740 741 zfs_range_seg_t * 742 zfs_range_tree_first(zfs_range_tree_t *rt) 743 { 744 return (zfs_btree_first(&rt->rt_root, NULL)); 745 } 746 747 uint64_t 748 zfs_range_tree_space(zfs_range_tree_t *rt) 749 { 750 return (rt->rt_space); 751 } 752 753 uint64_t 754 zfs_range_tree_numsegs(zfs_range_tree_t *rt) 755 { 756 return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); 757 } 758 759 boolean_t 760 zfs_range_tree_is_empty(zfs_range_tree_t *rt) 761 { 762 ASSERT(rt != NULL); 763 return (zfs_range_tree_space(rt) == 0); 764 } 765 766 /* 767 * Remove any overlapping ranges between the given segment [start, end) 768 * from removefrom. Add non-overlapping leftovers to addto. 769 */ 770 void 771 zfs_range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 772 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 773 { 774 zfs_btree_index_t where; 775 zfs_range_seg_max_t starting_rs; 776 zfs_rs_set_start(&starting_rs, removefrom, start); 777 zfs_rs_set_end_raw(&starting_rs, removefrom, 778 zfs_rs_get_start_raw(&starting_rs, removefrom) + 1); 779 780 zfs_range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, 781 &starting_rs, &where); 782 783 if (curr == NULL) 784 curr = zfs_btree_next(&removefrom->rt_root, &where, &where); 785 786 zfs_range_seg_t *next; 787 for (; curr != NULL; curr = next) { 788 if (start == end) 789 return; 790 VERIFY3U(start, <, end); 791 792 /* there is no overlap */ 793 if (end <= zfs_rs_get_start(curr, removefrom)) { 794 zfs_range_tree_add(addto, start, end - start); 795 return; 796 } 797 798 uint64_t overlap_start = MAX(zfs_rs_get_start(curr, removefrom), 799 start); 800 uint64_t overlap_end = MIN(zfs_rs_get_end(curr, removefrom), 801 end); 802 uint64_t overlap_size = overlap_end - overlap_start; 803 ASSERT3S(overlap_size, >, 0); 804 zfs_range_seg_max_t rs; 805 zfs_rs_copy(curr, &rs, removefrom); 806 807 zfs_range_tree_remove(removefrom, overlap_start, overlap_size); 808 809 if (start < overlap_start) 810 zfs_range_tree_add(addto, start, overlap_start - start); 811 812 start = overlap_end; 813 next = zfs_btree_find(&removefrom->rt_root, &rs, &where); 814 /* 815 * If we find something here, we only removed part of the 816 * curr segment. Either there's some left at the end 817 * because we've reached the end of the range we're removing, 818 * or there's some left at the start because we started 819 * partway through the range. Either way, we continue with 820 * the loop. If it's the former, we'll return at the start of 821 * the loop, and if it's the latter we'll see if there is more 822 * area to process. 823 */ 824 if (next != NULL) { 825 ASSERT(start == end || start == zfs_rs_get_end(&rs, 826 removefrom)); 827 } 828 829 next = zfs_btree_next(&removefrom->rt_root, &where, &where); 830 } 831 VERIFY3P(curr, ==, NULL); 832 833 if (start != end) { 834 VERIFY3U(start, <, end); 835 zfs_range_tree_add(addto, start, end - start); 836 } else { 837 VERIFY3U(start, ==, end); 838 } 839 } 840 841 /* 842 * For each entry in rt, if it exists in removefrom, remove it 843 * from removefrom. Otherwise, add it to addto. 844 */ 845 void 846 zfs_range_tree_remove_xor_add(zfs_range_tree_t *rt, 847 zfs_range_tree_t *removefrom, zfs_range_tree_t *addto) 848 { 849 zfs_btree_index_t where; 850 for (zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; 851 rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 852 zfs_range_tree_remove_xor_add_segment(zfs_rs_get_start(rs, rt), 853 zfs_rs_get_end(rs, rt), removefrom, addto); 854 } 855 } 856 857 uint64_t 858 zfs_range_tree_min(zfs_range_tree_t *rt) 859 { 860 zfs_range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); 861 return (rs != NULL ? zfs_rs_get_start(rs, rt) : 0); 862 } 863 864 uint64_t 865 zfs_range_tree_max(zfs_range_tree_t *rt) 866 { 867 zfs_range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); 868 return (rs != NULL ? zfs_rs_get_end(rs, rt) : 0); 869 } 870 871 uint64_t 872 zfs_range_tree_span(zfs_range_tree_t *rt) 873 { 874 return (zfs_range_tree_max(rt) - zfs_range_tree_min(rt)); 875 } 876