1 /*- 2 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 3 * All rights reserved 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * Binary heap and hash tables, used in dummynet 29 * 30 * $FreeBSD$ 31 */ 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #ifdef _KERNEL 36 __FBSDID("$FreeBSD$"); 37 #include <sys/systm.h> 38 #include <sys/malloc.h> 39 #include <sys/kernel.h> 40 #include <netpfil/ipfw/dn_heap.h> 41 #ifndef log 42 #define log(x, arg...) 43 #endif 44 45 #else /* !_KERNEL */ 46 47 #include <stdio.h> 48 #include <dn_test.h> 49 #include <strings.h> 50 #include <stdlib.h> 51 52 #include "dn_heap.h" 53 #define log(x, arg...) fprintf(stderr, ## arg) 54 #define panic(x...) fprintf(stderr, ## x), exit(1) 55 #define MALLOC_DEFINE(a, b, c) volatile int __dummy__ ## a __attribute__((__unused__)) 56 static void *my_malloc(int s) { return malloc(s); } 57 static void my_free(void *p) { free(p); } 58 #define malloc(s, t, w) my_malloc(s) 59 #define free(p, t) my_free(p) 60 #endif /* !_KERNEL */ 61 62 static MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap"); 63 64 /* 65 * Heap management functions. 66 * 67 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. 68 * Some macros help finding parent/children so we can optimize them. 69 * 70 * heap_init() is called to expand the heap when needed. 71 * Increment size in blocks of 16 entries. 72 * Returns 1 on error, 0 on success 73 */ 74 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) 75 #define HEAP_LEFT(x) ( (x)+(x) + 1 ) 76 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } 77 #define HEAP_INCREMENT 15 78 79 static int 80 heap_resize(struct dn_heap *h, unsigned int new_size) 81 { 82 struct dn_heap_entry *p; 83 84 if ((unsigned int)h->size >= new_size ) /* have enough room */ 85 return 0; 86 #if 1 /* round to the next power of 2 */ 87 new_size |= new_size >> 1; 88 new_size |= new_size >> 2; 89 new_size |= new_size >> 4; 90 new_size |= new_size >> 8; 91 new_size |= new_size >> 16; 92 #else 93 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT; 94 #endif 95 p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT); 96 if (p == NULL) { 97 printf("--- %s, resize %d failed\n", __func__, new_size ); 98 return 1; /* error */ 99 } 100 if (h->size > 0) { 101 bcopy(h->p, p, h->size * sizeof(*p) ); 102 free(h->p, M_DN_HEAP); 103 } 104 h->p = p; 105 h->size = new_size; 106 return 0; 107 } 108 109 int 110 heap_init(struct dn_heap *h, int size, int ofs) 111 { 112 if (heap_resize(h, size)) 113 return 1; 114 h->elements = 0; 115 h->ofs = ofs; 116 return 0; 117 } 118 119 /* 120 * Insert element in heap. Normally, p != NULL, we insert p in 121 * a new position and bubble up. If p == NULL, then the element is 122 * already in place, and key is the position where to start the 123 * bubble-up. 124 * Returns 1 on failure (cannot allocate new heap entry) 125 * 126 * If ofs > 0 the position (index, int) of the element in the heap is 127 * also stored in the element itself at the given offset in bytes. 128 */ 129 #define SET_OFFSET(h, i) do { \ 130 if (h->ofs > 0) \ 131 *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i; \ 132 } while (0) 133 /* 134 * RESET_OFFSET is used for sanity checks. It sets ofs 135 * to an invalid value. 136 */ 137 #define RESET_OFFSET(h, i) do { \ 138 if (h->ofs > 0) \ 139 *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16; \ 140 } while (0) 141 142 int 143 heap_insert(struct dn_heap *h, uint64_t key1, void *p) 144 { 145 int son = h->elements; 146 147 //log("%s key %llu p %p\n", __FUNCTION__, key1, p); 148 if (p == NULL) { /* data already there, set starting point */ 149 son = key1; 150 } else { /* insert new element at the end, possibly resize */ 151 son = h->elements; 152 if (son == h->size) /* need resize... */ 153 // XXX expand by 16 or so 154 if (heap_resize(h, h->elements+16) ) 155 return 1; /* failure... */ 156 h->p[son].object = p; 157 h->p[son].key = key1; 158 h->elements++; 159 } 160 /* make sure that son >= father along the path */ 161 while (son > 0) { 162 int father = HEAP_FATHER(son); 163 struct dn_heap_entry tmp; 164 165 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) 166 break; /* found right position */ 167 /* son smaller than father, swap and repeat */ 168 HEAP_SWAP(h->p[son], h->p[father], tmp); 169 SET_OFFSET(h, son); 170 son = father; 171 } 172 SET_OFFSET(h, son); 173 return 0; 174 } 175 176 /* 177 * remove top element from heap, or obj if obj != NULL 178 */ 179 void 180 heap_extract(struct dn_heap *h, void *obj) 181 { 182 int child, father, max = h->elements - 1; 183 184 if (max < 0) { 185 printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h); 186 return; 187 } 188 if (obj == NULL) 189 father = 0; /* default: move up smallest child */ 190 else { /* extract specific element, index is at offset */ 191 if (h->ofs <= 0) 192 panic("%s: extract from middle not set on %p\n", 193 __FUNCTION__, h); 194 father = *((int *)((char *)obj + h->ofs)); 195 if (father < 0 || father >= h->elements) { 196 panic("%s: father %d out of bound 0..%d\n", 197 __FUNCTION__, father, h->elements); 198 } 199 } 200 /* 201 * below, father is the index of the empty element, which 202 * we replace at each step with the smallest child until we 203 * reach the bottom level. 204 */ 205 // XXX why removing RESET_OFFSET increases runtime by 10% ? 206 RESET_OFFSET(h, father); 207 while ( (child = HEAP_LEFT(father)) <= max ) { 208 if (child != max && 209 DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) 210 child++; /* take right child, otherwise left */ 211 h->p[father] = h->p[child]; 212 SET_OFFSET(h, father); 213 father = child; 214 } 215 h->elements--; 216 if (father != max) { 217 /* 218 * Fill hole with last entry and bubble up, 219 * reusing the insert code 220 */ 221 h->p[father] = h->p[max]; 222 heap_insert(h, father, NULL); 223 } 224 } 225 226 #if 0 227 /* 228 * change object position and update references 229 * XXX this one is never used! 230 */ 231 static void 232 heap_move(struct dn_heap *h, uint64_t new_key, void *object) 233 { 234 int temp, i, max = h->elements-1; 235 struct dn_heap_entry *p, buf; 236 237 if (h->ofs <= 0) 238 panic("cannot move items on this heap"); 239 p = h->p; /* shortcut */ 240 241 i = *((int *)((char *)object + h->ofs)); 242 if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */ 243 p[i].key = new_key; 244 for (; i>0 && 245 DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key); 246 i = temp ) { /* bubble up */ 247 HEAP_SWAP(p[i], p[temp], buf); 248 SET_OFFSET(h, i); 249 } 250 } else { /* must move down */ 251 p[i].key = new_key; 252 while ( (temp = HEAP_LEFT(i)) <= max ) { 253 /* found left child */ 254 if (temp != max && 255 DN_KEY_LT(p[temp+1].key, p[temp].key)) 256 temp++; /* select child with min key */ 257 if (DN_KEY_LT(>p[temp].key, new_key)) { 258 /* go down */ 259 HEAP_SWAP(p[i], p[temp], buf); 260 SET_OFFSET(h, i); 261 } else 262 break; 263 i = temp; 264 } 265 } 266 SET_OFFSET(h, i); 267 } 268 #endif /* heap_move, unused */ 269 270 /* 271 * heapify() will reorganize data inside an array to maintain the 272 * heap property. It is needed when we delete a bunch of entries. 273 */ 274 static void 275 heapify(struct dn_heap *h) 276 { 277 int i; 278 279 for (i = 0; i < h->elements; i++ ) 280 heap_insert(h, i , NULL); 281 } 282 283 int 284 heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t), 285 uintptr_t arg) 286 { 287 int i, ret, found; 288 289 for (i = found = 0 ; i < h->elements ;) { 290 ret = fn(h->p[i].object, arg); 291 if (ret & HEAP_SCAN_DEL) { 292 h->elements-- ; 293 h->p[i] = h->p[h->elements] ; 294 found++ ; 295 } else 296 i++ ; 297 if (ret & HEAP_SCAN_END) 298 break; 299 } 300 if (found) 301 heapify(h); 302 return found; 303 } 304 305 /* 306 * cleanup the heap and free data structure 307 */ 308 void 309 heap_free(struct dn_heap *h) 310 { 311 if (h->size >0 ) 312 free(h->p, M_DN_HEAP); 313 bzero(h, sizeof(*h) ); 314 } 315 316 /* 317 * hash table support. 318 */ 319 320 struct dn_ht { 321 int buckets; /* how many buckets, really buckets - 1*/ 322 int entries; /* how many entries */ 323 int ofs; /* offset of link field */ 324 uint32_t (*hash)(uintptr_t, int, void *arg); 325 int (*match)(void *_el, uintptr_t key, int, void *); 326 void *(*newh)(uintptr_t, int, void *); 327 void **ht; /* bucket heads */ 328 }; 329 /* 330 * Initialize, allocating bucket pointers inline. 331 * Recycle previous record if possible. 332 * If the 'newh' function is not supplied, we assume that the 333 * key passed to ht_find is the same object to be stored in. 334 */ 335 struct dn_ht * 336 dn_ht_init(struct dn_ht *ht, int buckets, int ofs, 337 uint32_t (*h)(uintptr_t, int, void *), 338 int (*match)(void *, uintptr_t, int, void *), 339 void *(*newh)(uintptr_t, int, void *)) 340 { 341 int l; 342 343 /* 344 * Notes about rounding bucket size to a power of two. 345 * Given the original bucket size, we compute the nearest lower and 346 * higher power of two, minus 1 (respectively b_min and b_max) because 347 * this value will be used to do an AND with the index returned 348 * by hash function. 349 * To choice between these two values, the original bucket size is 350 * compared with b_min. If the original size is greater than 4/3 b_min, 351 * we round the bucket size to b_max, else to b_min. 352 * This ratio try to round to the nearest power of two, advantaging 353 * the greater size if the different between two power is relatively 354 * big. 355 * Rounding the bucket size to a power of two avoid the use of 356 * module when calculating the correct bucket. 357 * The ht->buckets variable store the bucket size - 1 to simply 358 * do an AND between the index returned by hash function and ht->bucket 359 * instead of a module. 360 */ 361 int b_min; /* min buckets */ 362 int b_max; /* max buckets */ 363 int b_ori; /* original buckets */ 364 365 if (h == NULL || match == NULL) { 366 printf("--- missing hash or match function"); 367 return NULL; 368 } 369 if (buckets < 1 || buckets > 65536) 370 return NULL; 371 372 b_ori = buckets; 373 /* calculate next power of 2, - 1*/ 374 buckets |= buckets >> 1; 375 buckets |= buckets >> 2; 376 buckets |= buckets >> 4; 377 buckets |= buckets >> 8; 378 buckets |= buckets >> 16; 379 380 b_max = buckets; /* Next power */ 381 b_min = buckets >> 1; /* Previous power */ 382 383 /* Calculate the 'nearest' bucket size */ 384 if (b_min * 4000 / 3000 < b_ori) 385 buckets = b_max; 386 else 387 buckets = b_min; 388 389 if (ht) { /* see if we can reuse */ 390 if (buckets <= ht->buckets) { 391 ht->buckets = buckets; 392 } else { 393 /* free pointers if not allocated inline */ 394 if (ht->ht != (void *)(ht + 1)) 395 free(ht->ht, M_DN_HEAP); 396 free(ht, M_DN_HEAP); 397 ht = NULL; 398 } 399 } 400 if (ht == NULL) { 401 /* Allocate buckets + 1 entries because buckets is use to 402 * do the AND with the index returned by hash function 403 */ 404 l = sizeof(*ht) + (buckets + 1) * sizeof(void **); 405 ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO); 406 } 407 if (ht) { 408 ht->ht = (void **)(ht + 1); 409 ht->buckets = buckets; 410 ht->ofs = ofs; 411 ht->hash = h; 412 ht->match = match; 413 ht->newh = newh; 414 } 415 return ht; 416 } 417 418 /* dummy callback for dn_ht_free to unlink all */ 419 static int 420 do_del(void *obj, void *arg) 421 { 422 (void)obj; 423 (void)arg; 424 return DNHT_SCAN_DEL; 425 } 426 427 void 428 dn_ht_free(struct dn_ht *ht, int flags) 429 { 430 if (ht == NULL) 431 return; 432 if (flags & DNHT_REMOVE) { 433 (void)dn_ht_scan(ht, do_del, NULL); 434 } else { 435 if (ht->ht && ht->ht != (void *)(ht + 1)) 436 free(ht->ht, M_DN_HEAP); 437 free(ht, M_DN_HEAP); 438 } 439 } 440 441 int 442 dn_ht_entries(struct dn_ht *ht) 443 { 444 return ht ? ht->entries : 0; 445 } 446 447 /* lookup and optionally create or delete element */ 448 void * 449 dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg) 450 { 451 int i; 452 void **pp, *p; 453 454 if (ht == NULL) /* easy on an empty hash */ 455 return NULL; 456 i = (ht->buckets == 1) ? 0 : 457 (ht->hash(key, flags, arg) & ht->buckets); 458 459 for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) { 460 if (flags & DNHT_MATCH_PTR) { 461 if (key == (uintptr_t)p) 462 break; 463 } else if (ht->match(p, key, flags, arg)) /* found match */ 464 break; 465 } 466 if (p) { 467 if (flags & DNHT_REMOVE) { 468 /* link in the next element */ 469 *pp = *(void **)((char *)p + ht->ofs); 470 *(void **)((char *)p + ht->ofs) = NULL; 471 ht->entries--; 472 } 473 } else if (flags & DNHT_INSERT) { 474 // printf("%s before calling new, bucket %d ofs %d\n", 475 // __FUNCTION__, i, ht->ofs); 476 p = ht->newh ? ht->newh(key, flags, arg) : (void *)key; 477 // printf("%s newh returns %p\n", __FUNCTION__, p); 478 if (p) { 479 ht->entries++; 480 *(void **)((char *)p + ht->ofs) = ht->ht[i]; 481 ht->ht[i] = p; 482 } 483 } 484 return p; 485 } 486 487 /* 488 * do a scan with the option to delete the object. Extract next before 489 * running the callback because the element may be destroyed there. 490 */ 491 int 492 dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg) 493 { 494 int i, ret, found = 0; 495 void **curp, *cur, *next; 496 497 if (ht == NULL || fn == NULL) 498 return 0; 499 for (i = 0; i <= ht->buckets; i++) { 500 curp = &ht->ht[i]; 501 while ( (cur = *curp) != NULL) { 502 next = *(void **)((char *)cur + ht->ofs); 503 ret = fn(cur, arg); 504 if (ret & DNHT_SCAN_DEL) { 505 found++; 506 ht->entries--; 507 *curp = next; 508 } else { 509 curp = (void **)((char *)cur + ht->ofs); 510 } 511 if (ret & DNHT_SCAN_END) 512 return found; 513 } 514 } 515 return found; 516 } 517 518 /* 519 * Similar to dn_ht_scan(), except that the scan is performed only 520 * in the bucket 'bucket'. The function returns a correct bucket number if 521 * the original is invalid. 522 * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i] 523 * pointer to the last entry processed. Moreover, the bucket number passed 524 * by caller is decremented, because usually the caller increment it. 525 */ 526 int 527 dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *), 528 void *arg) 529 { 530 int i, ret, found = 0; 531 void **curp, *cur, *next; 532 533 if (ht == NULL || fn == NULL) 534 return 0; 535 if (*bucket > ht->buckets) 536 *bucket = 0; 537 i = *bucket; 538 539 curp = &ht->ht[i]; 540 while ( (cur = *curp) != NULL) { 541 next = *(void **)((char *)cur + ht->ofs); 542 ret = fn(cur, arg); 543 if (ret & DNHT_SCAN_DEL) { 544 found++; 545 ht->entries--; 546 *curp = next; 547 } else { 548 curp = (void **)((char *)cur + ht->ofs); 549 } 550 if (ret & DNHT_SCAN_END) 551 return found; 552 } 553 return found; 554 } 555