1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23*eda14cbcSMatt Macy * Use is subject to license terms. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy /* 26*eda14cbcSMatt Macy * Copyright (c) 2013, 2019 by Delphix. All rights reserved. 27*eda14cbcSMatt Macy */ 28*eda14cbcSMatt Macy 29*eda14cbcSMatt Macy #include <sys/zfs_context.h> 30*eda14cbcSMatt Macy #include <sys/spa.h> 31*eda14cbcSMatt Macy #include <sys/dmu.h> 32*eda14cbcSMatt Macy #include <sys/dnode.h> 33*eda14cbcSMatt Macy #include <sys/zio.h> 34*eda14cbcSMatt Macy #include <sys/range_tree.h> 35*eda14cbcSMatt Macy 36*eda14cbcSMatt Macy /* 37*eda14cbcSMatt Macy * Range trees are tree-based data structures that can be used to 38*eda14cbcSMatt Macy * track free space or generally any space allocation information. 39*eda14cbcSMatt Macy * A range tree keeps track of individual segments and automatically 40*eda14cbcSMatt Macy * provides facilities such as adjacent extent merging and extent 41*eda14cbcSMatt Macy * splitting in response to range add/remove requests. 42*eda14cbcSMatt Macy * 43*eda14cbcSMatt Macy * A range tree starts out completely empty, with no segments in it. 44*eda14cbcSMatt Macy * Adding an allocation via range_tree_add to the range tree can either: 45*eda14cbcSMatt Macy * 1) create a new extent 46*eda14cbcSMatt Macy * 2) extend an adjacent extent 47*eda14cbcSMatt Macy * 3) merge two adjacent extents 48*eda14cbcSMatt Macy * Conversely, removing an allocation via range_tree_remove can: 49*eda14cbcSMatt Macy * 1) completely remove an extent 50*eda14cbcSMatt Macy * 2) shorten an extent (if the allocation was near one of its ends) 51*eda14cbcSMatt Macy * 3) split an extent into two extents, in effect punching a hole 52*eda14cbcSMatt Macy * 53*eda14cbcSMatt Macy * A range tree is also capable of 'bridging' gaps when adding 54*eda14cbcSMatt Macy * allocations. This is useful for cases when close proximity of 55*eda14cbcSMatt Macy * allocations is an important detail that needs to be represented 56*eda14cbcSMatt Macy * in the range tree. See range_tree_set_gap(). The default behavior 57*eda14cbcSMatt Macy * is not to bridge gaps (i.e. the maximum allowed gap size is 0). 58*eda14cbcSMatt Macy * 59*eda14cbcSMatt Macy * In order to traverse a range tree, use either the range_tree_walk() 60*eda14cbcSMatt Macy * or range_tree_vacate() functions. 61*eda14cbcSMatt Macy * 62*eda14cbcSMatt Macy * To obtain more accurate information on individual segment 63*eda14cbcSMatt Macy * operations that the range tree performs "under the hood", you can 64*eda14cbcSMatt Macy * specify a set of callbacks by passing a range_tree_ops_t structure 65*eda14cbcSMatt Macy * to the range_tree_create function. Any callbacks that are non-NULL 66*eda14cbcSMatt Macy * are then called at the appropriate times. 67*eda14cbcSMatt Macy * 68*eda14cbcSMatt Macy * The range tree code also supports a special variant of range trees 69*eda14cbcSMatt Macy * that can bridge small gaps between segments. This kind of tree is used 70*eda14cbcSMatt Macy * by the dsl scanning code to group I/Os into mostly sequential chunks to 71*eda14cbcSMatt Macy * optimize disk performance. The code here attempts to do this with as 72*eda14cbcSMatt Macy * little memory and computational overhead as possible. One limitation of 73*eda14cbcSMatt Macy * this implementation is that segments of range trees with gaps can only 74*eda14cbcSMatt Macy * support removing complete segments. 75*eda14cbcSMatt Macy */ 76*eda14cbcSMatt Macy 77*eda14cbcSMatt Macy static inline void 78*eda14cbcSMatt Macy rs_copy(range_seg_t *src, range_seg_t *dest, range_tree_t *rt) 79*eda14cbcSMatt Macy { 80*eda14cbcSMatt Macy ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES); 81*eda14cbcSMatt Macy size_t size = 0; 82*eda14cbcSMatt Macy switch (rt->rt_type) { 83*eda14cbcSMatt Macy case RANGE_SEG32: 84*eda14cbcSMatt Macy size = sizeof (range_seg32_t); 85*eda14cbcSMatt Macy break; 86*eda14cbcSMatt Macy case RANGE_SEG64: 87*eda14cbcSMatt Macy size = sizeof (range_seg64_t); 88*eda14cbcSMatt Macy break; 89*eda14cbcSMatt Macy case RANGE_SEG_GAP: 90*eda14cbcSMatt Macy size = sizeof (range_seg_gap_t); 91*eda14cbcSMatt Macy break; 92*eda14cbcSMatt Macy default: 93*eda14cbcSMatt Macy VERIFY(0); 94*eda14cbcSMatt Macy } 95*eda14cbcSMatt Macy bcopy(src, dest, size); 96*eda14cbcSMatt Macy } 97*eda14cbcSMatt Macy 98*eda14cbcSMatt Macy void 99*eda14cbcSMatt Macy range_tree_stat_verify(range_tree_t *rt) 100*eda14cbcSMatt Macy { 101*eda14cbcSMatt Macy range_seg_t *rs; 102*eda14cbcSMatt Macy zfs_btree_index_t where; 103*eda14cbcSMatt Macy uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 104*eda14cbcSMatt Macy int i; 105*eda14cbcSMatt Macy 106*eda14cbcSMatt Macy for (rs = zfs_btree_first(&rt->rt_root, &where); rs != NULL; 107*eda14cbcSMatt Macy rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 108*eda14cbcSMatt Macy uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 109*eda14cbcSMatt Macy int idx = highbit64(size) - 1; 110*eda14cbcSMatt Macy 111*eda14cbcSMatt Macy hist[idx]++; 112*eda14cbcSMatt Macy ASSERT3U(hist[idx], !=, 0); 113*eda14cbcSMatt Macy } 114*eda14cbcSMatt Macy 115*eda14cbcSMatt Macy for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { 116*eda14cbcSMatt Macy if (hist[i] != rt->rt_histogram[i]) { 117*eda14cbcSMatt Macy zfs_dbgmsg("i=%d, hist=%px, hist=%llu, rt_hist=%llu", 118*eda14cbcSMatt Macy i, hist, hist[i], rt->rt_histogram[i]); 119*eda14cbcSMatt Macy } 120*eda14cbcSMatt Macy VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 121*eda14cbcSMatt Macy } 122*eda14cbcSMatt Macy } 123*eda14cbcSMatt Macy 124*eda14cbcSMatt Macy static void 125*eda14cbcSMatt Macy range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) 126*eda14cbcSMatt Macy { 127*eda14cbcSMatt Macy uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 128*eda14cbcSMatt Macy int idx = highbit64(size) - 1; 129*eda14cbcSMatt Macy 130*eda14cbcSMatt Macy ASSERT(size != 0); 131*eda14cbcSMatt Macy ASSERT3U(idx, <, 132*eda14cbcSMatt Macy sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 133*eda14cbcSMatt Macy 134*eda14cbcSMatt Macy rt->rt_histogram[idx]++; 135*eda14cbcSMatt Macy ASSERT3U(rt->rt_histogram[idx], !=, 0); 136*eda14cbcSMatt Macy } 137*eda14cbcSMatt Macy 138*eda14cbcSMatt Macy static void 139*eda14cbcSMatt Macy range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) 140*eda14cbcSMatt Macy { 141*eda14cbcSMatt Macy uint64_t size = rs_get_end(rs, rt) - rs_get_start(rs, rt); 142*eda14cbcSMatt Macy int idx = highbit64(size) - 1; 143*eda14cbcSMatt Macy 144*eda14cbcSMatt Macy ASSERT(size != 0); 145*eda14cbcSMatt Macy ASSERT3U(idx, <, 146*eda14cbcSMatt Macy sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 147*eda14cbcSMatt Macy 148*eda14cbcSMatt Macy ASSERT3U(rt->rt_histogram[idx], !=, 0); 149*eda14cbcSMatt Macy rt->rt_histogram[idx]--; 150*eda14cbcSMatt Macy } 151*eda14cbcSMatt Macy 152*eda14cbcSMatt Macy static int 153*eda14cbcSMatt Macy range_tree_seg32_compare(const void *x1, const void *x2) 154*eda14cbcSMatt Macy { 155*eda14cbcSMatt Macy const range_seg32_t *r1 = x1; 156*eda14cbcSMatt Macy const range_seg32_t *r2 = x2; 157*eda14cbcSMatt Macy 158*eda14cbcSMatt Macy ASSERT3U(r1->rs_start, <=, r1->rs_end); 159*eda14cbcSMatt Macy ASSERT3U(r2->rs_start, <=, r2->rs_end); 160*eda14cbcSMatt Macy 161*eda14cbcSMatt Macy return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 162*eda14cbcSMatt Macy } 163*eda14cbcSMatt Macy 164*eda14cbcSMatt Macy static int 165*eda14cbcSMatt Macy range_tree_seg64_compare(const void *x1, const void *x2) 166*eda14cbcSMatt Macy { 167*eda14cbcSMatt Macy const range_seg64_t *r1 = x1; 168*eda14cbcSMatt Macy const range_seg64_t *r2 = x2; 169*eda14cbcSMatt Macy 170*eda14cbcSMatt Macy ASSERT3U(r1->rs_start, <=, r1->rs_end); 171*eda14cbcSMatt Macy ASSERT3U(r2->rs_start, <=, r2->rs_end); 172*eda14cbcSMatt Macy 173*eda14cbcSMatt Macy return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 174*eda14cbcSMatt Macy } 175*eda14cbcSMatt Macy 176*eda14cbcSMatt Macy static int 177*eda14cbcSMatt Macy range_tree_seg_gap_compare(const void *x1, const void *x2) 178*eda14cbcSMatt Macy { 179*eda14cbcSMatt Macy const range_seg_gap_t *r1 = x1; 180*eda14cbcSMatt Macy const range_seg_gap_t *r2 = x2; 181*eda14cbcSMatt Macy 182*eda14cbcSMatt Macy ASSERT3U(r1->rs_start, <=, r1->rs_end); 183*eda14cbcSMatt Macy ASSERT3U(r2->rs_start, <=, r2->rs_end); 184*eda14cbcSMatt Macy 185*eda14cbcSMatt Macy return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start)); 186*eda14cbcSMatt Macy } 187*eda14cbcSMatt Macy 188*eda14cbcSMatt Macy range_tree_t * 189*eda14cbcSMatt Macy range_tree_create_impl(range_tree_ops_t *ops, range_seg_type_t type, void *arg, 190*eda14cbcSMatt Macy uint64_t start, uint64_t shift, 191*eda14cbcSMatt Macy int (*zfs_btree_compare) (const void *, const void *), 192*eda14cbcSMatt Macy uint64_t gap) 193*eda14cbcSMatt Macy { 194*eda14cbcSMatt Macy range_tree_t *rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); 195*eda14cbcSMatt Macy 196*eda14cbcSMatt Macy ASSERT3U(shift, <, 64); 197*eda14cbcSMatt Macy ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES); 198*eda14cbcSMatt Macy size_t size; 199*eda14cbcSMatt Macy int (*compare) (const void *, const void *); 200*eda14cbcSMatt Macy switch (type) { 201*eda14cbcSMatt Macy case RANGE_SEG32: 202*eda14cbcSMatt Macy size = sizeof (range_seg32_t); 203*eda14cbcSMatt Macy compare = range_tree_seg32_compare; 204*eda14cbcSMatt Macy break; 205*eda14cbcSMatt Macy case RANGE_SEG64: 206*eda14cbcSMatt Macy size = sizeof (range_seg64_t); 207*eda14cbcSMatt Macy compare = range_tree_seg64_compare; 208*eda14cbcSMatt Macy break; 209*eda14cbcSMatt Macy case RANGE_SEG_GAP: 210*eda14cbcSMatt Macy size = sizeof (range_seg_gap_t); 211*eda14cbcSMatt Macy compare = range_tree_seg_gap_compare; 212*eda14cbcSMatt Macy break; 213*eda14cbcSMatt Macy default: 214*eda14cbcSMatt Macy panic("Invalid range seg type %d", type); 215*eda14cbcSMatt Macy } 216*eda14cbcSMatt Macy zfs_btree_create(&rt->rt_root, compare, size); 217*eda14cbcSMatt Macy 218*eda14cbcSMatt Macy rt->rt_ops = ops; 219*eda14cbcSMatt Macy rt->rt_gap = gap; 220*eda14cbcSMatt Macy rt->rt_arg = arg; 221*eda14cbcSMatt Macy rt->rt_type = type; 222*eda14cbcSMatt Macy rt->rt_start = start; 223*eda14cbcSMatt Macy rt->rt_shift = shift; 224*eda14cbcSMatt Macy rt->rt_btree_compare = zfs_btree_compare; 225*eda14cbcSMatt Macy 226*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_create != NULL) 227*eda14cbcSMatt Macy rt->rt_ops->rtop_create(rt, rt->rt_arg); 228*eda14cbcSMatt Macy 229*eda14cbcSMatt Macy return (rt); 230*eda14cbcSMatt Macy } 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy range_tree_t * 233*eda14cbcSMatt Macy range_tree_create(range_tree_ops_t *ops, range_seg_type_t type, 234*eda14cbcSMatt Macy void *arg, uint64_t start, uint64_t shift) 235*eda14cbcSMatt Macy { 236*eda14cbcSMatt Macy return (range_tree_create_impl(ops, type, arg, start, shift, NULL, 0)); 237*eda14cbcSMatt Macy } 238*eda14cbcSMatt Macy 239*eda14cbcSMatt Macy void 240*eda14cbcSMatt Macy range_tree_destroy(range_tree_t *rt) 241*eda14cbcSMatt Macy { 242*eda14cbcSMatt Macy VERIFY0(rt->rt_space); 243*eda14cbcSMatt Macy 244*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) 245*eda14cbcSMatt Macy rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 246*eda14cbcSMatt Macy 247*eda14cbcSMatt Macy zfs_btree_destroy(&rt->rt_root); 248*eda14cbcSMatt Macy kmem_free(rt, sizeof (*rt)); 249*eda14cbcSMatt Macy } 250*eda14cbcSMatt Macy 251*eda14cbcSMatt Macy void 252*eda14cbcSMatt Macy range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta) 253*eda14cbcSMatt Macy { 254*eda14cbcSMatt Macy if (delta < 0 && delta * -1 >= rs_get_fill(rs, rt)) { 255*eda14cbcSMatt Macy zfs_panic_recover("zfs: attempting to decrease fill to or " 256*eda14cbcSMatt Macy "below 0; probable double remove in segment [%llx:%llx]", 257*eda14cbcSMatt Macy (longlong_t)rs_get_start(rs, rt), 258*eda14cbcSMatt Macy (longlong_t)rs_get_end(rs, rt)); 259*eda14cbcSMatt Macy } 260*eda14cbcSMatt Macy if (rs_get_fill(rs, rt) + delta > rs_get_end(rs, rt) - 261*eda14cbcSMatt Macy rs_get_start(rs, rt)) { 262*eda14cbcSMatt Macy zfs_panic_recover("zfs: attempting to increase fill beyond " 263*eda14cbcSMatt Macy "max; probable double add in segment [%llx:%llx]", 264*eda14cbcSMatt Macy (longlong_t)rs_get_start(rs, rt), 265*eda14cbcSMatt Macy (longlong_t)rs_get_end(rs, rt)); 266*eda14cbcSMatt Macy } 267*eda14cbcSMatt Macy 268*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 269*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 270*eda14cbcSMatt Macy rs_set_fill(rs, rt, rs_get_fill(rs, rt) + delta); 271*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 272*eda14cbcSMatt Macy rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 273*eda14cbcSMatt Macy } 274*eda14cbcSMatt Macy 275*eda14cbcSMatt Macy static void 276*eda14cbcSMatt Macy range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) 277*eda14cbcSMatt Macy { 278*eda14cbcSMatt Macy range_tree_t *rt = arg; 279*eda14cbcSMatt Macy zfs_btree_index_t where; 280*eda14cbcSMatt Macy range_seg_t *rs_before, *rs_after, *rs; 281*eda14cbcSMatt Macy range_seg_max_t tmp, rsearch; 282*eda14cbcSMatt Macy uint64_t end = start + size, gap = rt->rt_gap; 283*eda14cbcSMatt Macy uint64_t bridge_size = 0; 284*eda14cbcSMatt Macy boolean_t merge_before, merge_after; 285*eda14cbcSMatt Macy 286*eda14cbcSMatt Macy ASSERT3U(size, !=, 0); 287*eda14cbcSMatt Macy ASSERT3U(fill, <=, size); 288*eda14cbcSMatt Macy ASSERT3U(start + size, >, start); 289*eda14cbcSMatt Macy 290*eda14cbcSMatt Macy rs_set_start(&rsearch, rt, start); 291*eda14cbcSMatt Macy rs_set_end(&rsearch, rt, end); 292*eda14cbcSMatt Macy rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 293*eda14cbcSMatt Macy 294*eda14cbcSMatt Macy /* 295*eda14cbcSMatt Macy * If this is a gap-supporting range tree, it is possible that we 296*eda14cbcSMatt Macy * are inserting into an existing segment. In this case simply 297*eda14cbcSMatt Macy * bump the fill count and call the remove / add callbacks. If the 298*eda14cbcSMatt Macy * new range will extend an existing segment, we remove the 299*eda14cbcSMatt Macy * existing one, apply the new extent to it and re-insert it using 300*eda14cbcSMatt Macy * the normal code paths. 301*eda14cbcSMatt Macy */ 302*eda14cbcSMatt Macy if (rs != NULL) { 303*eda14cbcSMatt Macy if (gap == 0) { 304*eda14cbcSMatt Macy zfs_panic_recover("zfs: adding existent segment to " 305*eda14cbcSMatt Macy "range tree (offset=%llx size=%llx)", 306*eda14cbcSMatt Macy (longlong_t)start, (longlong_t)size); 307*eda14cbcSMatt Macy return; 308*eda14cbcSMatt Macy } 309*eda14cbcSMatt Macy uint64_t rstart = rs_get_start(rs, rt); 310*eda14cbcSMatt Macy uint64_t rend = rs_get_end(rs, rt); 311*eda14cbcSMatt Macy if (rstart <= start && rend >= end) { 312*eda14cbcSMatt Macy range_tree_adjust_fill(rt, rs, fill); 313*eda14cbcSMatt Macy return; 314*eda14cbcSMatt Macy } 315*eda14cbcSMatt Macy 316*eda14cbcSMatt Macy zfs_btree_remove(&rt->rt_root, rs); 317*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 318*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 319*eda14cbcSMatt Macy 320*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs); 321*eda14cbcSMatt Macy rt->rt_space -= rend - rstart; 322*eda14cbcSMatt Macy 323*eda14cbcSMatt Macy fill += rs_get_fill(rs, rt); 324*eda14cbcSMatt Macy start = MIN(start, rstart); 325*eda14cbcSMatt Macy end = MAX(end, rend); 326*eda14cbcSMatt Macy size = end - start; 327*eda14cbcSMatt Macy 328*eda14cbcSMatt Macy range_tree_add_impl(rt, start, size, fill); 329*eda14cbcSMatt Macy return; 330*eda14cbcSMatt Macy } 331*eda14cbcSMatt Macy 332*eda14cbcSMatt Macy ASSERT3P(rs, ==, NULL); 333*eda14cbcSMatt Macy 334*eda14cbcSMatt Macy /* 335*eda14cbcSMatt Macy * Determine whether or not we will have to merge with our neighbors. 336*eda14cbcSMatt Macy * If gap != 0, we might need to merge with our neighbors even if we 337*eda14cbcSMatt Macy * aren't directly touching. 338*eda14cbcSMatt Macy */ 339*eda14cbcSMatt Macy zfs_btree_index_t where_before, where_after; 340*eda14cbcSMatt Macy rs_before = zfs_btree_prev(&rt->rt_root, &where, &where_before); 341*eda14cbcSMatt Macy rs_after = zfs_btree_next(&rt->rt_root, &where, &where_after); 342*eda14cbcSMatt Macy 343*eda14cbcSMatt Macy merge_before = (rs_before != NULL && rs_get_end(rs_before, rt) >= 344*eda14cbcSMatt Macy start - gap); 345*eda14cbcSMatt Macy merge_after = (rs_after != NULL && rs_get_start(rs_after, rt) <= end + 346*eda14cbcSMatt Macy gap); 347*eda14cbcSMatt Macy 348*eda14cbcSMatt Macy if (merge_before && gap != 0) 349*eda14cbcSMatt Macy bridge_size += start - rs_get_end(rs_before, rt); 350*eda14cbcSMatt Macy if (merge_after && gap != 0) 351*eda14cbcSMatt Macy bridge_size += rs_get_start(rs_after, rt) - end; 352*eda14cbcSMatt Macy 353*eda14cbcSMatt Macy if (merge_before && merge_after) { 354*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) { 355*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 356*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 357*eda14cbcSMatt Macy } 358*eda14cbcSMatt Macy 359*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs_before); 360*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs_after); 361*eda14cbcSMatt Macy 362*eda14cbcSMatt Macy rs_copy(rs_after, &tmp, rt); 363*eda14cbcSMatt Macy uint64_t before_start = rs_get_start_raw(rs_before, rt); 364*eda14cbcSMatt Macy uint64_t before_fill = rs_get_fill(rs_before, rt); 365*eda14cbcSMatt Macy uint64_t after_fill = rs_get_fill(rs_after, rt); 366*eda14cbcSMatt Macy zfs_btree_remove_idx(&rt->rt_root, &where_before); 367*eda14cbcSMatt Macy 368*eda14cbcSMatt Macy /* 369*eda14cbcSMatt Macy * We have to re-find the node because our old reference is 370*eda14cbcSMatt Macy * invalid as soon as we do any mutating btree operations. 371*eda14cbcSMatt Macy */ 372*eda14cbcSMatt Macy rs_after = zfs_btree_find(&rt->rt_root, &tmp, &where_after); 373*eda14cbcSMatt Macy rs_set_start_raw(rs_after, rt, before_start); 374*eda14cbcSMatt Macy rs_set_fill(rs_after, rt, after_fill + before_fill + fill); 375*eda14cbcSMatt Macy rs = rs_after; 376*eda14cbcSMatt Macy } else if (merge_before) { 377*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 378*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 379*eda14cbcSMatt Macy 380*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs_before); 381*eda14cbcSMatt Macy 382*eda14cbcSMatt Macy uint64_t before_fill = rs_get_fill(rs_before, rt); 383*eda14cbcSMatt Macy rs_set_end(rs_before, rt, end); 384*eda14cbcSMatt Macy rs_set_fill(rs_before, rt, before_fill + fill); 385*eda14cbcSMatt Macy rs = rs_before; 386*eda14cbcSMatt Macy } else if (merge_after) { 387*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 388*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 389*eda14cbcSMatt Macy 390*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs_after); 391*eda14cbcSMatt Macy 392*eda14cbcSMatt Macy uint64_t after_fill = rs_get_fill(rs_after, rt); 393*eda14cbcSMatt Macy rs_set_start(rs_after, rt, start); 394*eda14cbcSMatt Macy rs_set_fill(rs_after, rt, after_fill + fill); 395*eda14cbcSMatt Macy rs = rs_after; 396*eda14cbcSMatt Macy } else { 397*eda14cbcSMatt Macy rs = &tmp; 398*eda14cbcSMatt Macy 399*eda14cbcSMatt Macy rs_set_start(rs, rt, start); 400*eda14cbcSMatt Macy rs_set_end(rs, rt, end); 401*eda14cbcSMatt Macy rs_set_fill(rs, rt, fill); 402*eda14cbcSMatt Macy zfs_btree_add_idx(&rt->rt_root, rs, &where); 403*eda14cbcSMatt Macy } 404*eda14cbcSMatt Macy 405*eda14cbcSMatt Macy if (gap != 0) { 406*eda14cbcSMatt Macy ASSERT3U(rs_get_fill(rs, rt), <=, rs_get_end(rs, rt) - 407*eda14cbcSMatt Macy rs_get_start(rs, rt)); 408*eda14cbcSMatt Macy } else { 409*eda14cbcSMatt Macy ASSERT3U(rs_get_fill(rs, rt), ==, rs_get_end(rs, rt) - 410*eda14cbcSMatt Macy rs_get_start(rs, rt)); 411*eda14cbcSMatt Macy } 412*eda14cbcSMatt Macy 413*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 414*eda14cbcSMatt Macy rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 415*eda14cbcSMatt Macy 416*eda14cbcSMatt Macy range_tree_stat_incr(rt, rs); 417*eda14cbcSMatt Macy rt->rt_space += size + bridge_size; 418*eda14cbcSMatt Macy } 419*eda14cbcSMatt Macy 420*eda14cbcSMatt Macy void 421*eda14cbcSMatt Macy range_tree_add(void *arg, uint64_t start, uint64_t size) 422*eda14cbcSMatt Macy { 423*eda14cbcSMatt Macy range_tree_add_impl(arg, start, size, size); 424*eda14cbcSMatt Macy } 425*eda14cbcSMatt Macy 426*eda14cbcSMatt Macy static void 427*eda14cbcSMatt Macy range_tree_remove_impl(range_tree_t *rt, uint64_t start, uint64_t size, 428*eda14cbcSMatt Macy boolean_t do_fill) 429*eda14cbcSMatt Macy { 430*eda14cbcSMatt Macy zfs_btree_index_t where; 431*eda14cbcSMatt Macy range_seg_t *rs; 432*eda14cbcSMatt Macy range_seg_max_t rsearch, rs_tmp; 433*eda14cbcSMatt Macy uint64_t end = start + size; 434*eda14cbcSMatt Macy boolean_t left_over, right_over; 435*eda14cbcSMatt Macy 436*eda14cbcSMatt Macy VERIFY3U(size, !=, 0); 437*eda14cbcSMatt Macy VERIFY3U(size, <=, rt->rt_space); 438*eda14cbcSMatt Macy if (rt->rt_type == RANGE_SEG64) 439*eda14cbcSMatt Macy ASSERT3U(start + size, >, start); 440*eda14cbcSMatt Macy 441*eda14cbcSMatt Macy rs_set_start(&rsearch, rt, start); 442*eda14cbcSMatt Macy rs_set_end(&rsearch, rt, end); 443*eda14cbcSMatt Macy rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 444*eda14cbcSMatt Macy 445*eda14cbcSMatt Macy /* Make sure we completely overlap with someone */ 446*eda14cbcSMatt Macy if (rs == NULL) { 447*eda14cbcSMatt Macy zfs_panic_recover("zfs: removing nonexistent segment from " 448*eda14cbcSMatt Macy "range tree (offset=%llx size=%llx)", 449*eda14cbcSMatt Macy (longlong_t)start, (longlong_t)size); 450*eda14cbcSMatt Macy return; 451*eda14cbcSMatt Macy } 452*eda14cbcSMatt Macy 453*eda14cbcSMatt Macy /* 454*eda14cbcSMatt Macy * Range trees with gap support must only remove complete segments 455*eda14cbcSMatt Macy * from the tree. This allows us to maintain accurate fill accounting 456*eda14cbcSMatt Macy * and to ensure that bridged sections are not leaked. If we need to 457*eda14cbcSMatt Macy * remove less than the full segment, we can only adjust the fill count. 458*eda14cbcSMatt Macy */ 459*eda14cbcSMatt Macy if (rt->rt_gap != 0) { 460*eda14cbcSMatt Macy if (do_fill) { 461*eda14cbcSMatt Macy if (rs_get_fill(rs, rt) == size) { 462*eda14cbcSMatt Macy start = rs_get_start(rs, rt); 463*eda14cbcSMatt Macy end = rs_get_end(rs, rt); 464*eda14cbcSMatt Macy size = end - start; 465*eda14cbcSMatt Macy } else { 466*eda14cbcSMatt Macy range_tree_adjust_fill(rt, rs, -size); 467*eda14cbcSMatt Macy return; 468*eda14cbcSMatt Macy } 469*eda14cbcSMatt Macy } else if (rs_get_start(rs, rt) != start || 470*eda14cbcSMatt Macy rs_get_end(rs, rt) != end) { 471*eda14cbcSMatt Macy zfs_panic_recover("zfs: freeing partial segment of " 472*eda14cbcSMatt Macy "gap tree (offset=%llx size=%llx) of " 473*eda14cbcSMatt Macy "(offset=%llx size=%llx)", 474*eda14cbcSMatt Macy (longlong_t)start, (longlong_t)size, 475*eda14cbcSMatt Macy (longlong_t)rs_get_start(rs, rt), 476*eda14cbcSMatt Macy (longlong_t)rs_get_end(rs, rt) - rs_get_start(rs, 477*eda14cbcSMatt Macy rt)); 478*eda14cbcSMatt Macy return; 479*eda14cbcSMatt Macy } 480*eda14cbcSMatt Macy } 481*eda14cbcSMatt Macy 482*eda14cbcSMatt Macy VERIFY3U(rs_get_start(rs, rt), <=, start); 483*eda14cbcSMatt Macy VERIFY3U(rs_get_end(rs, rt), >=, end); 484*eda14cbcSMatt Macy 485*eda14cbcSMatt Macy left_over = (rs_get_start(rs, rt) != start); 486*eda14cbcSMatt Macy right_over = (rs_get_end(rs, rt) != end); 487*eda14cbcSMatt Macy 488*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs); 489*eda14cbcSMatt Macy 490*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 491*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 492*eda14cbcSMatt Macy 493*eda14cbcSMatt Macy if (left_over && right_over) { 494*eda14cbcSMatt Macy range_seg_max_t newseg; 495*eda14cbcSMatt Macy rs_set_start(&newseg, rt, end); 496*eda14cbcSMatt Macy rs_set_end_raw(&newseg, rt, rs_get_end_raw(rs, rt)); 497*eda14cbcSMatt Macy rs_set_fill(&newseg, rt, rs_get_end(rs, rt) - end); 498*eda14cbcSMatt Macy range_tree_stat_incr(rt, &newseg); 499*eda14cbcSMatt Macy 500*eda14cbcSMatt Macy // This modifies the buffer already inside the range tree 501*eda14cbcSMatt Macy rs_set_end(rs, rt, start); 502*eda14cbcSMatt Macy 503*eda14cbcSMatt Macy rs_copy(rs, &rs_tmp, rt); 504*eda14cbcSMatt Macy if (zfs_btree_next(&rt->rt_root, &where, &where) != NULL) 505*eda14cbcSMatt Macy zfs_btree_add_idx(&rt->rt_root, &newseg, &where); 506*eda14cbcSMatt Macy else 507*eda14cbcSMatt Macy zfs_btree_add(&rt->rt_root, &newseg); 508*eda14cbcSMatt Macy 509*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 510*eda14cbcSMatt Macy rt->rt_ops->rtop_add(rt, &newseg, rt->rt_arg); 511*eda14cbcSMatt Macy } else if (left_over) { 512*eda14cbcSMatt Macy // This modifies the buffer already inside the range tree 513*eda14cbcSMatt Macy rs_set_end(rs, rt, start); 514*eda14cbcSMatt Macy rs_copy(rs, &rs_tmp, rt); 515*eda14cbcSMatt Macy } else if (right_over) { 516*eda14cbcSMatt Macy // This modifies the buffer already inside the range tree 517*eda14cbcSMatt Macy rs_set_start(rs, rt, end); 518*eda14cbcSMatt Macy rs_copy(rs, &rs_tmp, rt); 519*eda14cbcSMatt Macy } else { 520*eda14cbcSMatt Macy zfs_btree_remove_idx(&rt->rt_root, &where); 521*eda14cbcSMatt Macy rs = NULL; 522*eda14cbcSMatt Macy } 523*eda14cbcSMatt Macy 524*eda14cbcSMatt Macy if (rs != NULL) { 525*eda14cbcSMatt Macy /* 526*eda14cbcSMatt Macy * The fill of the leftover segment will always be equal to 527*eda14cbcSMatt Macy * the size, since we do not support removing partial segments 528*eda14cbcSMatt Macy * of range trees with gaps. 529*eda14cbcSMatt Macy */ 530*eda14cbcSMatt Macy rs_set_fill_raw(rs, rt, rs_get_end_raw(rs, rt) - 531*eda14cbcSMatt Macy rs_get_start_raw(rs, rt)); 532*eda14cbcSMatt Macy range_tree_stat_incr(rt, &rs_tmp); 533*eda14cbcSMatt Macy 534*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 535*eda14cbcSMatt Macy rt->rt_ops->rtop_add(rt, &rs_tmp, rt->rt_arg); 536*eda14cbcSMatt Macy } 537*eda14cbcSMatt Macy 538*eda14cbcSMatt Macy rt->rt_space -= size; 539*eda14cbcSMatt Macy } 540*eda14cbcSMatt Macy 541*eda14cbcSMatt Macy void 542*eda14cbcSMatt Macy range_tree_remove(void *arg, uint64_t start, uint64_t size) 543*eda14cbcSMatt Macy { 544*eda14cbcSMatt Macy range_tree_remove_impl(arg, start, size, B_FALSE); 545*eda14cbcSMatt Macy } 546*eda14cbcSMatt Macy 547*eda14cbcSMatt Macy void 548*eda14cbcSMatt Macy range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size) 549*eda14cbcSMatt Macy { 550*eda14cbcSMatt Macy range_tree_remove_impl(rt, start, size, B_TRUE); 551*eda14cbcSMatt Macy } 552*eda14cbcSMatt Macy 553*eda14cbcSMatt Macy void 554*eda14cbcSMatt Macy range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs, 555*eda14cbcSMatt Macy uint64_t newstart, uint64_t newsize) 556*eda14cbcSMatt Macy { 557*eda14cbcSMatt Macy int64_t delta = newsize - (rs_get_end(rs, rt) - rs_get_start(rs, rt)); 558*eda14cbcSMatt Macy 559*eda14cbcSMatt Macy range_tree_stat_decr(rt, rs); 560*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_remove != NULL) 561*eda14cbcSMatt Macy rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 562*eda14cbcSMatt Macy 563*eda14cbcSMatt Macy rs_set_start(rs, rt, newstart); 564*eda14cbcSMatt Macy rs_set_end(rs, rt, newstart + newsize); 565*eda14cbcSMatt Macy 566*eda14cbcSMatt Macy range_tree_stat_incr(rt, rs); 567*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_add != NULL) 568*eda14cbcSMatt Macy rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 569*eda14cbcSMatt Macy 570*eda14cbcSMatt Macy rt->rt_space += delta; 571*eda14cbcSMatt Macy } 572*eda14cbcSMatt Macy 573*eda14cbcSMatt Macy static range_seg_t * 574*eda14cbcSMatt Macy range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) 575*eda14cbcSMatt Macy { 576*eda14cbcSMatt Macy range_seg_max_t rsearch; 577*eda14cbcSMatt Macy uint64_t end = start + size; 578*eda14cbcSMatt Macy 579*eda14cbcSMatt Macy VERIFY(size != 0); 580*eda14cbcSMatt Macy 581*eda14cbcSMatt Macy rs_set_start(&rsearch, rt, start); 582*eda14cbcSMatt Macy rs_set_end(&rsearch, rt, end); 583*eda14cbcSMatt Macy return (zfs_btree_find(&rt->rt_root, &rsearch, NULL)); 584*eda14cbcSMatt Macy } 585*eda14cbcSMatt Macy 586*eda14cbcSMatt Macy range_seg_t * 587*eda14cbcSMatt Macy range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) 588*eda14cbcSMatt Macy { 589*eda14cbcSMatt Macy if (rt->rt_type == RANGE_SEG64) 590*eda14cbcSMatt Macy ASSERT3U(start + size, >, start); 591*eda14cbcSMatt Macy 592*eda14cbcSMatt Macy range_seg_t *rs = range_tree_find_impl(rt, start, size); 593*eda14cbcSMatt Macy if (rs != NULL && rs_get_start(rs, rt) <= start && 594*eda14cbcSMatt Macy rs_get_end(rs, rt) >= start + size) { 595*eda14cbcSMatt Macy return (rs); 596*eda14cbcSMatt Macy } 597*eda14cbcSMatt Macy return (NULL); 598*eda14cbcSMatt Macy } 599*eda14cbcSMatt Macy 600*eda14cbcSMatt Macy void 601*eda14cbcSMatt Macy range_tree_verify_not_present(range_tree_t *rt, uint64_t off, uint64_t size) 602*eda14cbcSMatt Macy { 603*eda14cbcSMatt Macy range_seg_t *rs = range_tree_find(rt, off, size); 604*eda14cbcSMatt Macy if (rs != NULL) 605*eda14cbcSMatt Macy panic("segment already in tree; rs=%p", (void *)rs); 606*eda14cbcSMatt Macy } 607*eda14cbcSMatt Macy 608*eda14cbcSMatt Macy boolean_t 609*eda14cbcSMatt Macy range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) 610*eda14cbcSMatt Macy { 611*eda14cbcSMatt Macy return (range_tree_find(rt, start, size) != NULL); 612*eda14cbcSMatt Macy } 613*eda14cbcSMatt Macy 614*eda14cbcSMatt Macy /* 615*eda14cbcSMatt Macy * Returns the first subset of the given range which overlaps with the range 616*eda14cbcSMatt Macy * tree. Returns true if there is a segment in the range, and false if there 617*eda14cbcSMatt Macy * isn't. 618*eda14cbcSMatt Macy */ 619*eda14cbcSMatt Macy boolean_t 620*eda14cbcSMatt Macy range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size, 621*eda14cbcSMatt Macy uint64_t *ostart, uint64_t *osize) 622*eda14cbcSMatt Macy { 623*eda14cbcSMatt Macy if (rt->rt_type == RANGE_SEG64) 624*eda14cbcSMatt Macy ASSERT3U(start + size, >, start); 625*eda14cbcSMatt Macy 626*eda14cbcSMatt Macy range_seg_max_t rsearch; 627*eda14cbcSMatt Macy rs_set_start(&rsearch, rt, start); 628*eda14cbcSMatt Macy rs_set_end_raw(&rsearch, rt, rs_get_start_raw(&rsearch, rt) + 1); 629*eda14cbcSMatt Macy 630*eda14cbcSMatt Macy zfs_btree_index_t where; 631*eda14cbcSMatt Macy range_seg_t *rs = zfs_btree_find(&rt->rt_root, &rsearch, &where); 632*eda14cbcSMatt Macy if (rs != NULL) { 633*eda14cbcSMatt Macy *ostart = start; 634*eda14cbcSMatt Macy *osize = MIN(size, rs_get_end(rs, rt) - start); 635*eda14cbcSMatt Macy return (B_TRUE); 636*eda14cbcSMatt Macy } 637*eda14cbcSMatt Macy 638*eda14cbcSMatt Macy rs = zfs_btree_next(&rt->rt_root, &where, &where); 639*eda14cbcSMatt Macy if (rs == NULL || rs_get_start(rs, rt) > start + size) 640*eda14cbcSMatt Macy return (B_FALSE); 641*eda14cbcSMatt Macy 642*eda14cbcSMatt Macy *ostart = rs_get_start(rs, rt); 643*eda14cbcSMatt Macy *osize = MIN(start + size, rs_get_end(rs, rt)) - 644*eda14cbcSMatt Macy rs_get_start(rs, rt); 645*eda14cbcSMatt Macy return (B_TRUE); 646*eda14cbcSMatt Macy } 647*eda14cbcSMatt Macy 648*eda14cbcSMatt Macy /* 649*eda14cbcSMatt Macy * Ensure that this range is not in the tree, regardless of whether 650*eda14cbcSMatt Macy * it is currently in the tree. 651*eda14cbcSMatt Macy */ 652*eda14cbcSMatt Macy void 653*eda14cbcSMatt Macy range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) 654*eda14cbcSMatt Macy { 655*eda14cbcSMatt Macy range_seg_t *rs; 656*eda14cbcSMatt Macy 657*eda14cbcSMatt Macy if (size == 0) 658*eda14cbcSMatt Macy return; 659*eda14cbcSMatt Macy 660*eda14cbcSMatt Macy if (rt->rt_type == RANGE_SEG64) 661*eda14cbcSMatt Macy ASSERT3U(start + size, >, start); 662*eda14cbcSMatt Macy 663*eda14cbcSMatt Macy while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { 664*eda14cbcSMatt Macy uint64_t free_start = MAX(rs_get_start(rs, rt), start); 665*eda14cbcSMatt Macy uint64_t free_end = MIN(rs_get_end(rs, rt), start + size); 666*eda14cbcSMatt Macy range_tree_remove(rt, free_start, free_end - free_start); 667*eda14cbcSMatt Macy } 668*eda14cbcSMatt Macy } 669*eda14cbcSMatt Macy 670*eda14cbcSMatt Macy void 671*eda14cbcSMatt Macy range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) 672*eda14cbcSMatt Macy { 673*eda14cbcSMatt Macy range_tree_t *rt; 674*eda14cbcSMatt Macy 675*eda14cbcSMatt Macy ASSERT0(range_tree_space(*rtdst)); 676*eda14cbcSMatt Macy ASSERT0(zfs_btree_numnodes(&(*rtdst)->rt_root)); 677*eda14cbcSMatt Macy 678*eda14cbcSMatt Macy rt = *rtsrc; 679*eda14cbcSMatt Macy *rtsrc = *rtdst; 680*eda14cbcSMatt Macy *rtdst = rt; 681*eda14cbcSMatt Macy } 682*eda14cbcSMatt Macy 683*eda14cbcSMatt Macy void 684*eda14cbcSMatt Macy range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) 685*eda14cbcSMatt Macy { 686*eda14cbcSMatt Macy if (rt->rt_ops != NULL && rt->rt_ops->rtop_vacate != NULL) 687*eda14cbcSMatt Macy rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 688*eda14cbcSMatt Macy 689*eda14cbcSMatt Macy if (func != NULL) { 690*eda14cbcSMatt Macy range_seg_t *rs; 691*eda14cbcSMatt Macy zfs_btree_index_t *cookie = NULL; 692*eda14cbcSMatt Macy 693*eda14cbcSMatt Macy while ((rs = zfs_btree_destroy_nodes(&rt->rt_root, &cookie)) != 694*eda14cbcSMatt Macy NULL) { 695*eda14cbcSMatt Macy func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - 696*eda14cbcSMatt Macy rs_get_start(rs, rt)); 697*eda14cbcSMatt Macy } 698*eda14cbcSMatt Macy } else { 699*eda14cbcSMatt Macy zfs_btree_clear(&rt->rt_root); 700*eda14cbcSMatt Macy } 701*eda14cbcSMatt Macy 702*eda14cbcSMatt Macy bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); 703*eda14cbcSMatt Macy rt->rt_space = 0; 704*eda14cbcSMatt Macy } 705*eda14cbcSMatt Macy 706*eda14cbcSMatt Macy void 707*eda14cbcSMatt Macy range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) 708*eda14cbcSMatt Macy { 709*eda14cbcSMatt Macy zfs_btree_index_t where; 710*eda14cbcSMatt Macy for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); 711*eda14cbcSMatt Macy rs != NULL; rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 712*eda14cbcSMatt Macy func(arg, rs_get_start(rs, rt), rs_get_end(rs, rt) - 713*eda14cbcSMatt Macy rs_get_start(rs, rt)); 714*eda14cbcSMatt Macy } 715*eda14cbcSMatt Macy } 716*eda14cbcSMatt Macy 717*eda14cbcSMatt Macy range_seg_t * 718*eda14cbcSMatt Macy range_tree_first(range_tree_t *rt) 719*eda14cbcSMatt Macy { 720*eda14cbcSMatt Macy return (zfs_btree_first(&rt->rt_root, NULL)); 721*eda14cbcSMatt Macy } 722*eda14cbcSMatt Macy 723*eda14cbcSMatt Macy uint64_t 724*eda14cbcSMatt Macy range_tree_space(range_tree_t *rt) 725*eda14cbcSMatt Macy { 726*eda14cbcSMatt Macy return (rt->rt_space); 727*eda14cbcSMatt Macy } 728*eda14cbcSMatt Macy 729*eda14cbcSMatt Macy uint64_t 730*eda14cbcSMatt Macy range_tree_numsegs(range_tree_t *rt) 731*eda14cbcSMatt Macy { 732*eda14cbcSMatt Macy return ((rt == NULL) ? 0 : zfs_btree_numnodes(&rt->rt_root)); 733*eda14cbcSMatt Macy } 734*eda14cbcSMatt Macy 735*eda14cbcSMatt Macy boolean_t 736*eda14cbcSMatt Macy range_tree_is_empty(range_tree_t *rt) 737*eda14cbcSMatt Macy { 738*eda14cbcSMatt Macy ASSERT(rt != NULL); 739*eda14cbcSMatt Macy return (range_tree_space(rt) == 0); 740*eda14cbcSMatt Macy } 741*eda14cbcSMatt Macy 742*eda14cbcSMatt Macy /* ARGSUSED */ 743*eda14cbcSMatt Macy void 744*eda14cbcSMatt Macy rt_btree_create(range_tree_t *rt, void *arg) 745*eda14cbcSMatt Macy { 746*eda14cbcSMatt Macy zfs_btree_t *size_tree = arg; 747*eda14cbcSMatt Macy 748*eda14cbcSMatt Macy size_t size; 749*eda14cbcSMatt Macy switch (rt->rt_type) { 750*eda14cbcSMatt Macy case RANGE_SEG32: 751*eda14cbcSMatt Macy size = sizeof (range_seg32_t); 752*eda14cbcSMatt Macy break; 753*eda14cbcSMatt Macy case RANGE_SEG64: 754*eda14cbcSMatt Macy size = sizeof (range_seg64_t); 755*eda14cbcSMatt Macy break; 756*eda14cbcSMatt Macy case RANGE_SEG_GAP: 757*eda14cbcSMatt Macy size = sizeof (range_seg_gap_t); 758*eda14cbcSMatt Macy break; 759*eda14cbcSMatt Macy default: 760*eda14cbcSMatt Macy panic("Invalid range seg type %d", rt->rt_type); 761*eda14cbcSMatt Macy } 762*eda14cbcSMatt Macy zfs_btree_create(size_tree, rt->rt_btree_compare, size); 763*eda14cbcSMatt Macy } 764*eda14cbcSMatt Macy 765*eda14cbcSMatt Macy /* ARGSUSED */ 766*eda14cbcSMatt Macy void 767*eda14cbcSMatt Macy rt_btree_destroy(range_tree_t *rt, void *arg) 768*eda14cbcSMatt Macy { 769*eda14cbcSMatt Macy zfs_btree_t *size_tree = arg; 770*eda14cbcSMatt Macy ASSERT0(zfs_btree_numnodes(size_tree)); 771*eda14cbcSMatt Macy 772*eda14cbcSMatt Macy zfs_btree_destroy(size_tree); 773*eda14cbcSMatt Macy } 774*eda14cbcSMatt Macy 775*eda14cbcSMatt Macy /* ARGSUSED */ 776*eda14cbcSMatt Macy void 777*eda14cbcSMatt Macy rt_btree_add(range_tree_t *rt, range_seg_t *rs, void *arg) 778*eda14cbcSMatt Macy { 779*eda14cbcSMatt Macy zfs_btree_t *size_tree = arg; 780*eda14cbcSMatt Macy 781*eda14cbcSMatt Macy zfs_btree_add(size_tree, rs); 782*eda14cbcSMatt Macy } 783*eda14cbcSMatt Macy 784*eda14cbcSMatt Macy /* ARGSUSED */ 785*eda14cbcSMatt Macy void 786*eda14cbcSMatt Macy rt_btree_remove(range_tree_t *rt, range_seg_t *rs, void *arg) 787*eda14cbcSMatt Macy { 788*eda14cbcSMatt Macy zfs_btree_t *size_tree = arg; 789*eda14cbcSMatt Macy 790*eda14cbcSMatt Macy zfs_btree_remove(size_tree, rs); 791*eda14cbcSMatt Macy } 792*eda14cbcSMatt Macy 793*eda14cbcSMatt Macy /* ARGSUSED */ 794*eda14cbcSMatt Macy void 795*eda14cbcSMatt Macy rt_btree_vacate(range_tree_t *rt, void *arg) 796*eda14cbcSMatt Macy { 797*eda14cbcSMatt Macy zfs_btree_t *size_tree = arg; 798*eda14cbcSMatt Macy zfs_btree_clear(size_tree); 799*eda14cbcSMatt Macy zfs_btree_destroy(size_tree); 800*eda14cbcSMatt Macy 801*eda14cbcSMatt Macy rt_btree_create(rt, arg); 802*eda14cbcSMatt Macy } 803*eda14cbcSMatt Macy 804*eda14cbcSMatt Macy range_tree_ops_t rt_btree_ops = { 805*eda14cbcSMatt Macy .rtop_create = rt_btree_create, 806*eda14cbcSMatt Macy .rtop_destroy = rt_btree_destroy, 807*eda14cbcSMatt Macy .rtop_add = rt_btree_add, 808*eda14cbcSMatt Macy .rtop_remove = rt_btree_remove, 809*eda14cbcSMatt Macy .rtop_vacate = rt_btree_vacate 810*eda14cbcSMatt Macy }; 811*eda14cbcSMatt Macy 812*eda14cbcSMatt Macy /* 813*eda14cbcSMatt Macy * Remove any overlapping ranges between the given segment [start, end) 814*eda14cbcSMatt Macy * from removefrom. Add non-overlapping leftovers to addto. 815*eda14cbcSMatt Macy */ 816*eda14cbcSMatt Macy void 817*eda14cbcSMatt Macy range_tree_remove_xor_add_segment(uint64_t start, uint64_t end, 818*eda14cbcSMatt Macy range_tree_t *removefrom, range_tree_t *addto) 819*eda14cbcSMatt Macy { 820*eda14cbcSMatt Macy zfs_btree_index_t where; 821*eda14cbcSMatt Macy range_seg_max_t starting_rs; 822*eda14cbcSMatt Macy rs_set_start(&starting_rs, removefrom, start); 823*eda14cbcSMatt Macy rs_set_end_raw(&starting_rs, removefrom, rs_get_start_raw(&starting_rs, 824*eda14cbcSMatt Macy removefrom) + 1); 825*eda14cbcSMatt Macy 826*eda14cbcSMatt Macy range_seg_t *curr = zfs_btree_find(&removefrom->rt_root, 827*eda14cbcSMatt Macy &starting_rs, &where); 828*eda14cbcSMatt Macy 829*eda14cbcSMatt Macy if (curr == NULL) 830*eda14cbcSMatt Macy curr = zfs_btree_next(&removefrom->rt_root, &where, &where); 831*eda14cbcSMatt Macy 832*eda14cbcSMatt Macy range_seg_t *next; 833*eda14cbcSMatt Macy for (; curr != NULL; curr = next) { 834*eda14cbcSMatt Macy if (start == end) 835*eda14cbcSMatt Macy return; 836*eda14cbcSMatt Macy VERIFY3U(start, <, end); 837*eda14cbcSMatt Macy 838*eda14cbcSMatt Macy /* there is no overlap */ 839*eda14cbcSMatt Macy if (end <= rs_get_start(curr, removefrom)) { 840*eda14cbcSMatt Macy range_tree_add(addto, start, end - start); 841*eda14cbcSMatt Macy return; 842*eda14cbcSMatt Macy } 843*eda14cbcSMatt Macy 844*eda14cbcSMatt Macy uint64_t overlap_start = MAX(rs_get_start(curr, removefrom), 845*eda14cbcSMatt Macy start); 846*eda14cbcSMatt Macy uint64_t overlap_end = MIN(rs_get_end(curr, removefrom), 847*eda14cbcSMatt Macy end); 848*eda14cbcSMatt Macy uint64_t overlap_size = overlap_end - overlap_start; 849*eda14cbcSMatt Macy ASSERT3S(overlap_size, >, 0); 850*eda14cbcSMatt Macy range_seg_max_t rs; 851*eda14cbcSMatt Macy rs_copy(curr, &rs, removefrom); 852*eda14cbcSMatt Macy 853*eda14cbcSMatt Macy range_tree_remove(removefrom, overlap_start, overlap_size); 854*eda14cbcSMatt Macy 855*eda14cbcSMatt Macy if (start < overlap_start) 856*eda14cbcSMatt Macy range_tree_add(addto, start, overlap_start - start); 857*eda14cbcSMatt Macy 858*eda14cbcSMatt Macy start = overlap_end; 859*eda14cbcSMatt Macy next = zfs_btree_find(&removefrom->rt_root, &rs, &where); 860*eda14cbcSMatt Macy /* 861*eda14cbcSMatt Macy * If we find something here, we only removed part of the 862*eda14cbcSMatt Macy * curr segment. Either there's some left at the end 863*eda14cbcSMatt Macy * because we've reached the end of the range we're removing, 864*eda14cbcSMatt Macy * or there's some left at the start because we started 865*eda14cbcSMatt Macy * partway through the range. Either way, we continue with 866*eda14cbcSMatt Macy * the loop. If it's the former, we'll return at the start of 867*eda14cbcSMatt Macy * the loop, and if it's the latter we'll see if there is more 868*eda14cbcSMatt Macy * area to process. 869*eda14cbcSMatt Macy */ 870*eda14cbcSMatt Macy if (next != NULL) { 871*eda14cbcSMatt Macy ASSERT(start == end || start == rs_get_end(&rs, 872*eda14cbcSMatt Macy removefrom)); 873*eda14cbcSMatt Macy } 874*eda14cbcSMatt Macy 875*eda14cbcSMatt Macy next = zfs_btree_next(&removefrom->rt_root, &where, &where); 876*eda14cbcSMatt Macy } 877*eda14cbcSMatt Macy VERIFY3P(curr, ==, NULL); 878*eda14cbcSMatt Macy 879*eda14cbcSMatt Macy if (start != end) { 880*eda14cbcSMatt Macy VERIFY3U(start, <, end); 881*eda14cbcSMatt Macy range_tree_add(addto, start, end - start); 882*eda14cbcSMatt Macy } else { 883*eda14cbcSMatt Macy VERIFY3U(start, ==, end); 884*eda14cbcSMatt Macy } 885*eda14cbcSMatt Macy } 886*eda14cbcSMatt Macy 887*eda14cbcSMatt Macy /* 888*eda14cbcSMatt Macy * For each entry in rt, if it exists in removefrom, remove it 889*eda14cbcSMatt Macy * from removefrom. Otherwise, add it to addto. 890*eda14cbcSMatt Macy */ 891*eda14cbcSMatt Macy void 892*eda14cbcSMatt Macy range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom, 893*eda14cbcSMatt Macy range_tree_t *addto) 894*eda14cbcSMatt Macy { 895*eda14cbcSMatt Macy zfs_btree_index_t where; 896*eda14cbcSMatt Macy for (range_seg_t *rs = zfs_btree_first(&rt->rt_root, &where); rs; 897*eda14cbcSMatt Macy rs = zfs_btree_next(&rt->rt_root, &where, &where)) { 898*eda14cbcSMatt Macy range_tree_remove_xor_add_segment(rs_get_start(rs, rt), 899*eda14cbcSMatt Macy rs_get_end(rs, rt), removefrom, addto); 900*eda14cbcSMatt Macy } 901*eda14cbcSMatt Macy } 902*eda14cbcSMatt Macy 903*eda14cbcSMatt Macy uint64_t 904*eda14cbcSMatt Macy range_tree_min(range_tree_t *rt) 905*eda14cbcSMatt Macy { 906*eda14cbcSMatt Macy range_seg_t *rs = zfs_btree_first(&rt->rt_root, NULL); 907*eda14cbcSMatt Macy return (rs != NULL ? rs_get_start(rs, rt) : 0); 908*eda14cbcSMatt Macy } 909*eda14cbcSMatt Macy 910*eda14cbcSMatt Macy uint64_t 911*eda14cbcSMatt Macy range_tree_max(range_tree_t *rt) 912*eda14cbcSMatt Macy { 913*eda14cbcSMatt Macy range_seg_t *rs = zfs_btree_last(&rt->rt_root, NULL); 914*eda14cbcSMatt Macy return (rs != NULL ? rs_get_end(rs, rt) : 0); 915*eda14cbcSMatt Macy } 916*eda14cbcSMatt Macy 917*eda14cbcSMatt Macy uint64_t 918*eda14cbcSMatt Macy range_tree_span(range_tree_t *rt) 919*eda14cbcSMatt Macy { 920*eda14cbcSMatt Macy return (range_tree_max(rt) - range_tree_min(rt)); 921*eda14cbcSMatt Macy } 922