1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/spa.h> 31 #include <sys/dmu.h> 32 #include <sys/dnode.h> 33 #include <sys/zio.h> 34 #include <sys/range_tree.h> 35 36 kmem_cache_t *range_seg_cache; 37 38 void 39 range_tree_init(void) 40 { 41 ASSERT(range_seg_cache == NULL); 42 range_seg_cache = kmem_cache_create("range_seg_cache", 43 sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 44 } 45 46 void 47 range_tree_fini(void) 48 { 49 kmem_cache_destroy(range_seg_cache); 50 range_seg_cache = NULL; 51 } 52 53 void 54 range_tree_stat_verify(range_tree_t *rt) 55 { 56 range_seg_t *rs; 57 uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 }; 58 int i; 59 60 for (rs = avl_first(&rt->rt_root); rs != NULL; 61 rs = AVL_NEXT(&rt->rt_root, rs)) { 62 uint64_t size = rs->rs_end - rs->rs_start; 63 int idx = highbit64(size) - 1; 64 65 hist[idx]++; 66 ASSERT3U(hist[idx], !=, 0); 67 } 68 69 for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { 70 if (hist[i] != rt->rt_histogram[i]) { 71 zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu", 72 i, hist, hist[i], rt->rt_histogram[i]); 73 } 74 VERIFY3U(hist[i], ==, rt->rt_histogram[i]); 75 } 76 } 77 78 static void 79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs) 80 { 81 uint64_t size = rs->rs_end - rs->rs_start; 82 int idx = highbit64(size) - 1; 83 84 ASSERT(size != 0); 85 ASSERT3U(idx, <, 86 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 87 88 rt->rt_histogram[idx]++; 89 ASSERT3U(rt->rt_histogram[idx], !=, 0); 90 } 91 92 static void 93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs) 94 { 95 uint64_t size = rs->rs_end - rs->rs_start; 96 int idx = highbit64(size) - 1; 97 98 ASSERT(size != 0); 99 ASSERT3U(idx, <, 100 sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram)); 101 102 ASSERT3U(rt->rt_histogram[idx], !=, 0); 103 rt->rt_histogram[idx]--; 104 } 105 106 /* 107 * NOTE: caller is responsible for all locking. 108 */ 109 static int 110 range_tree_seg_compare(const void *x1, const void *x2) 111 { 112 const range_seg_t *r1 = x1; 113 const range_seg_t *r2 = x2; 114 115 if (r1->rs_start < r2->rs_start) { 116 if (r1->rs_end > r2->rs_start) 117 return (0); 118 return (-1); 119 } 120 if (r1->rs_start > r2->rs_start) { 121 if (r1->rs_start < r2->rs_end) 122 return (0); 123 return (1); 124 } 125 return (0); 126 } 127 128 range_tree_t * 129 range_tree_create(range_tree_ops_t *ops, void *arg) 130 { 131 range_tree_t *rt; 132 133 rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); 134 135 avl_create(&rt->rt_root, range_tree_seg_compare, 136 sizeof (range_seg_t), offsetof(range_seg_t, rs_node)); 137 138 rt->rt_ops = ops; 139 rt->rt_arg = arg; 140 141 if (rt->rt_ops != NULL) 142 rt->rt_ops->rtop_create(rt, rt->rt_arg); 143 144 return (rt); 145 } 146 147 void 148 range_tree_destroy(range_tree_t *rt) 149 { 150 VERIFY0(rt->rt_space); 151 152 if (rt->rt_ops != NULL) 153 rt->rt_ops->rtop_destroy(rt, rt->rt_arg); 154 155 avl_destroy(&rt->rt_root); 156 kmem_free(rt, sizeof (*rt)); 157 } 158 159 void 160 range_tree_add(void *arg, uint64_t start, uint64_t size) 161 { 162 range_tree_t *rt = arg; 163 avl_index_t where; 164 range_seg_t rsearch, *rs_before, *rs_after, *rs; 165 uint64_t end = start + size; 166 boolean_t merge_before, merge_after; 167 168 VERIFY(size != 0); 169 170 rsearch.rs_start = start; 171 rsearch.rs_end = end; 172 rs = avl_find(&rt->rt_root, &rsearch, &where); 173 174 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) { 175 zfs_panic_recover("zfs: allocating allocated segment" 176 "(offset=%llu size=%llu)\n", 177 (longlong_t)start, (longlong_t)size); 178 return; 179 } 180 181 /* Make sure we don't overlap with either of our neighbors */ 182 VERIFY3P(rs, ==, NULL); 183 184 rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE); 185 rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER); 186 187 merge_before = (rs_before != NULL && rs_before->rs_end == start); 188 merge_after = (rs_after != NULL && rs_after->rs_start == end); 189 190 if (merge_before && merge_after) { 191 avl_remove(&rt->rt_root, rs_before); 192 if (rt->rt_ops != NULL) { 193 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 194 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 195 } 196 197 range_tree_stat_decr(rt, rs_before); 198 range_tree_stat_decr(rt, rs_after); 199 200 rs_after->rs_start = rs_before->rs_start; 201 kmem_cache_free(range_seg_cache, rs_before); 202 rs = rs_after; 203 } else if (merge_before) { 204 if (rt->rt_ops != NULL) 205 rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg); 206 207 range_tree_stat_decr(rt, rs_before); 208 209 rs_before->rs_end = end; 210 rs = rs_before; 211 } else if (merge_after) { 212 if (rt->rt_ops != NULL) 213 rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg); 214 215 range_tree_stat_decr(rt, rs_after); 216 217 rs_after->rs_start = start; 218 rs = rs_after; 219 } else { 220 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP); 221 rs->rs_start = start; 222 rs->rs_end = end; 223 avl_insert(&rt->rt_root, rs, where); 224 } 225 226 if (rt->rt_ops != NULL) 227 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 228 229 range_tree_stat_incr(rt, rs); 230 rt->rt_space += size; 231 } 232 233 void 234 range_tree_remove(void *arg, uint64_t start, uint64_t size) 235 { 236 range_tree_t *rt = arg; 237 avl_index_t where; 238 range_seg_t rsearch, *rs, *newseg; 239 uint64_t end = start + size; 240 boolean_t left_over, right_over; 241 242 VERIFY3U(size, !=, 0); 243 VERIFY3U(size, <=, rt->rt_space); 244 245 rsearch.rs_start = start; 246 rsearch.rs_end = end; 247 rs = avl_find(&rt->rt_root, &rsearch, &where); 248 249 /* Make sure we completely overlap with someone */ 250 if (rs == NULL) { 251 zfs_panic_recover("zfs: freeing free segment " 252 "(offset=%llu size=%llu)", 253 (longlong_t)start, (longlong_t)size); 254 return; 255 } 256 VERIFY3U(rs->rs_start, <=, start); 257 VERIFY3U(rs->rs_end, >=, end); 258 259 left_over = (rs->rs_start != start); 260 right_over = (rs->rs_end != end); 261 262 range_tree_stat_decr(rt, rs); 263 264 if (rt->rt_ops != NULL) 265 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); 266 267 if (left_over && right_over) { 268 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP); 269 newseg->rs_start = end; 270 newseg->rs_end = rs->rs_end; 271 range_tree_stat_incr(rt, newseg); 272 273 rs->rs_end = start; 274 275 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER); 276 if (rt->rt_ops != NULL) 277 rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg); 278 } else if (left_over) { 279 rs->rs_end = start; 280 } else if (right_over) { 281 rs->rs_start = end; 282 } else { 283 avl_remove(&rt->rt_root, rs); 284 kmem_cache_free(range_seg_cache, rs); 285 rs = NULL; 286 } 287 288 if (rs != NULL) { 289 range_tree_stat_incr(rt, rs); 290 291 if (rt->rt_ops != NULL) 292 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg); 293 } 294 295 rt->rt_space -= size; 296 } 297 298 static range_seg_t * 299 range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size) 300 { 301 range_seg_t rsearch; 302 uint64_t end = start + size; 303 304 VERIFY(size != 0); 305 306 rsearch.rs_start = start; 307 rsearch.rs_end = end; 308 return (avl_find(&rt->rt_root, &rsearch, NULL)); 309 } 310 311 static range_seg_t * 312 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size) 313 { 314 range_seg_t *rs = range_tree_find_impl(rt, start, size); 315 if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size) 316 return (rs); 317 return (NULL); 318 } 319 320 void 321 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size) 322 { 323 range_seg_t *rs; 324 325 rs = range_tree_find(rt, off, size); 326 if (rs != NULL) 327 panic("freeing free block; rs=%p", (void *)rs); 328 } 329 330 boolean_t 331 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size) 332 { 333 return (range_tree_find(rt, start, size) != NULL); 334 } 335 336 /* 337 * Ensure that this range is not in the tree, regardless of whether 338 * it is currently in the tree. 339 */ 340 void 341 range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size) 342 { 343 range_seg_t *rs; 344 345 if (size == 0) 346 return; 347 348 while ((rs = range_tree_find_impl(rt, start, size)) != NULL) { 349 uint64_t free_start = MAX(rs->rs_start, start); 350 uint64_t free_end = MIN(rs->rs_end, start + size); 351 range_tree_remove(rt, free_start, free_end - free_start); 352 } 353 } 354 355 void 356 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst) 357 { 358 range_tree_t *rt; 359 360 ASSERT0(range_tree_space(*rtdst)); 361 ASSERT0(avl_numnodes(&(*rtdst)->rt_root)); 362 363 rt = *rtsrc; 364 *rtsrc = *rtdst; 365 *rtdst = rt; 366 } 367 368 void 369 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg) 370 { 371 range_seg_t *rs; 372 void *cookie = NULL; 373 374 375 if (rt->rt_ops != NULL) 376 rt->rt_ops->rtop_vacate(rt, rt->rt_arg); 377 378 while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) { 379 if (func != NULL) 380 func(arg, rs->rs_start, rs->rs_end - rs->rs_start); 381 kmem_cache_free(range_seg_cache, rs); 382 } 383 384 bzero(rt->rt_histogram, sizeof (rt->rt_histogram)); 385 rt->rt_space = 0; 386 } 387 388 void 389 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg) 390 { 391 range_seg_t *rs; 392 393 for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs)) 394 func(arg, rs->rs_start, rs->rs_end - rs->rs_start); 395 } 396 397 uint64_t 398 range_tree_space(range_tree_t *rt) 399 { 400 return (rt->rt_space); 401 } 402 403 boolean_t 404 range_tree_is_empty(range_tree_t *rt) 405 { 406 ASSERT(rt != NULL); 407 return (range_tree_space(rt) == 0); 408 } 409 410 uint64_t 411 range_tree_min(range_tree_t *rt) 412 { 413 range_seg_t *rs = avl_first(&rt->rt_root); 414 return (rs != NULL ? rs->rs_start : 0); 415 } 416 417 uint64_t 418 range_tree_max(range_tree_t *rt) 419 { 420 range_seg_t *rs = avl_last(&rt->rt_root); 421 return (rs != NULL ? rs->rs_end : 0); 422 } 423 424 uint64_t 425 range_tree_span(range_tree_t *rt) 426 { 427 return (range_tree_max(rt) - range_tree_min(rt)); 428 } 429