1 /*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 39 #include <machine/stdarg.h> 40 41 #include <linux/bitmap.h> 42 #include <linux/kobject.h> 43 #include <linux/slab.h> 44 #include <linux/idr.h> 45 #include <linux/err.h> 46 47 #define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS) 48 #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2) 49 50 struct linux_idr_cache { 51 spinlock_t lock; 52 struct idr_layer *head; 53 unsigned count; 54 }; 55 56 DPCPU_DEFINE_STATIC(struct linux_idr_cache, linux_idr_cache); 57 58 /* 59 * IDR Implementation. 60 * 61 * This is quick and dirty and not as re-entrant as the linux version 62 * however it should be fairly fast. It is basically a radix tree with 63 * a builtin bitmap for allocation. 64 */ 65 static MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat"); 66 67 static struct idr_layer * 68 idr_preload_dequeue_locked(struct linux_idr_cache *lic) 69 { 70 struct idr_layer *retval; 71 72 /* check if wrong thread is trying to dequeue */ 73 if (mtx_owned(&lic->lock.m) == 0) 74 return (NULL); 75 76 retval = lic->head; 77 if (likely(retval != NULL)) { 78 lic->head = retval->ary[0]; 79 lic->count--; 80 retval->ary[0] = NULL; 81 } 82 return (retval); 83 } 84 85 static void 86 idr_preload_init(void *arg) 87 { 88 int cpu; 89 90 CPU_FOREACH(cpu) { 91 struct linux_idr_cache *lic = 92 DPCPU_ID_PTR(cpu, linux_idr_cache); 93 94 spin_lock_init(&lic->lock); 95 } 96 } 97 SYSINIT(idr_preload_init, SI_SUB_CPU, SI_ORDER_ANY, idr_preload_init, NULL); 98 99 static void 100 idr_preload_uninit(void *arg) 101 { 102 int cpu; 103 104 CPU_FOREACH(cpu) { 105 struct idr_layer *cacheval; 106 struct linux_idr_cache *lic = 107 DPCPU_ID_PTR(cpu, linux_idr_cache); 108 109 while (1) { 110 spin_lock(&lic->lock); 111 cacheval = idr_preload_dequeue_locked(lic); 112 spin_unlock(&lic->lock); 113 114 if (cacheval == NULL) 115 break; 116 free(cacheval, M_IDR); 117 } 118 spin_lock_destroy(&lic->lock); 119 } 120 } 121 SYSUNINIT(idr_preload_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, idr_preload_uninit, NULL); 122 123 void 124 idr_preload(gfp_t gfp_mask) 125 { 126 struct linux_idr_cache *lic; 127 struct idr_layer *cacheval; 128 129 sched_pin(); 130 131 lic = &DPCPU_GET(linux_idr_cache); 132 133 /* fill up cache */ 134 spin_lock(&lic->lock); 135 while (lic->count < MAX_IDR_FREE) { 136 spin_unlock(&lic->lock); 137 cacheval = malloc(sizeof(*cacheval), M_IDR, M_ZERO | gfp_mask); 138 spin_lock(&lic->lock); 139 if (cacheval == NULL) 140 break; 141 cacheval->ary[0] = lic->head; 142 lic->head = cacheval; 143 lic->count++; 144 } 145 } 146 147 void 148 idr_preload_end(void) 149 { 150 struct linux_idr_cache *lic; 151 152 lic = &DPCPU_GET(linux_idr_cache); 153 spin_unlock(&lic->lock); 154 sched_unpin(); 155 } 156 157 static inline int 158 idr_max(struct idr *idr) 159 { 160 return (1 << (idr->layers * IDR_BITS)) - 1; 161 } 162 163 static inline int 164 idr_pos(int id, int layer) 165 { 166 return (id >> (IDR_BITS * layer)) & IDR_MASK; 167 } 168 169 void 170 idr_init(struct idr *idr) 171 { 172 bzero(idr, sizeof(*idr)); 173 mtx_init(&idr->lock, "idr", NULL, MTX_DEF); 174 } 175 176 /* Only frees cached pages. */ 177 void 178 idr_destroy(struct idr *idr) 179 { 180 struct idr_layer *il, *iln; 181 182 idr_remove_all(idr); 183 mtx_lock(&idr->lock); 184 for (il = idr->free; il != NULL; il = iln) { 185 iln = il->ary[0]; 186 free(il, M_IDR); 187 } 188 mtx_unlock(&idr->lock); 189 mtx_destroy(&idr->lock); 190 } 191 192 static void 193 idr_remove_layer(struct idr_layer *il, int layer) 194 { 195 int i; 196 197 if (il == NULL) 198 return; 199 if (layer == 0) { 200 free(il, M_IDR); 201 return; 202 } 203 for (i = 0; i < IDR_SIZE; i++) 204 if (il->ary[i]) 205 idr_remove_layer(il->ary[i], layer - 1); 206 } 207 208 void 209 idr_remove_all(struct idr *idr) 210 { 211 212 mtx_lock(&idr->lock); 213 idr_remove_layer(idr->top, idr->layers - 1); 214 idr->top = NULL; 215 idr->layers = 0; 216 mtx_unlock(&idr->lock); 217 } 218 219 static void * 220 idr_remove_locked(struct idr *idr, int id) 221 { 222 struct idr_layer *il; 223 void *res; 224 int layer; 225 int idx; 226 227 id &= MAX_ID_MASK; 228 il = idr->top; 229 layer = idr->layers - 1; 230 if (il == NULL || id > idr_max(idr)) 231 return (NULL); 232 /* 233 * Walk down the tree to this item setting bitmaps along the way 234 * as we know at least one item will be free along this path. 235 */ 236 while (layer && il) { 237 idx = idr_pos(id, layer); 238 il->bitmap |= 1 << idx; 239 il = il->ary[idx]; 240 layer--; 241 } 242 idx = id & IDR_MASK; 243 /* 244 * At this point we've set free space bitmaps up the whole tree. 245 * We could make this non-fatal and unwind but linux dumps a stack 246 * and a warning so I don't think it's necessary. 247 */ 248 if (il == NULL || (il->bitmap & (1 << idx)) != 0) 249 panic("idr_remove: Item %d not allocated (%p, %p)\n", 250 id, idr, il); 251 res = il->ary[idx]; 252 il->ary[idx] = NULL; 253 il->bitmap |= 1 << idx; 254 255 return (res); 256 } 257 258 void * 259 idr_remove(struct idr *idr, int id) 260 { 261 void *res; 262 263 mtx_lock(&idr->lock); 264 res = idr_remove_locked(idr, id); 265 mtx_unlock(&idr->lock); 266 267 return (res); 268 } 269 270 static inline struct idr_layer * 271 idr_find_layer_locked(struct idr *idr, int id) 272 { 273 struct idr_layer *il; 274 int layer; 275 276 id &= MAX_ID_MASK; 277 il = idr->top; 278 layer = idr->layers - 1; 279 if (il == NULL || id > idr_max(idr)) 280 return (NULL); 281 while (layer && il) { 282 il = il->ary[idr_pos(id, layer)]; 283 layer--; 284 } 285 return (il); 286 } 287 288 void * 289 idr_replace(struct idr *idr, void *ptr, int id) 290 { 291 struct idr_layer *il; 292 void *res; 293 int idx; 294 295 mtx_lock(&idr->lock); 296 il = idr_find_layer_locked(idr, id); 297 idx = id & IDR_MASK; 298 299 /* Replace still returns an error if the item was not allocated. */ 300 if (il == NULL || (il->bitmap & (1 << idx))) { 301 res = ERR_PTR(-ENOENT); 302 } else { 303 res = il->ary[idx]; 304 il->ary[idx] = ptr; 305 } 306 mtx_unlock(&idr->lock); 307 return (res); 308 } 309 310 static inline void * 311 idr_find_locked(struct idr *idr, int id) 312 { 313 struct idr_layer *il; 314 void *res; 315 316 mtx_assert(&idr->lock, MA_OWNED); 317 il = idr_find_layer_locked(idr, id); 318 if (il != NULL) 319 res = il->ary[id & IDR_MASK]; 320 else 321 res = NULL; 322 return (res); 323 } 324 325 void * 326 idr_find(struct idr *idr, int id) 327 { 328 void *res; 329 330 mtx_lock(&idr->lock); 331 res = idr_find_locked(idr, id); 332 mtx_unlock(&idr->lock); 333 return (res); 334 } 335 336 void * 337 idr_get_next(struct idr *idr, int *nextidp) 338 { 339 void *res = NULL; 340 int id = *nextidp; 341 342 mtx_lock(&idr->lock); 343 for (; id <= idr_max(idr); id++) { 344 res = idr_find_locked(idr, id); 345 if (res == NULL) 346 continue; 347 *nextidp = id; 348 break; 349 } 350 mtx_unlock(&idr->lock); 351 return (res); 352 } 353 354 int 355 idr_pre_get(struct idr *idr, gfp_t gfp_mask) 356 { 357 struct idr_layer *il, *iln; 358 struct idr_layer *head; 359 int need; 360 361 mtx_lock(&idr->lock); 362 for (;;) { 363 need = idr->layers + 1; 364 for (il = idr->free; il != NULL; il = il->ary[0]) 365 need--; 366 mtx_unlock(&idr->lock); 367 if (need <= 0) 368 break; 369 for (head = NULL; need; need--) { 370 iln = malloc(sizeof(*il), M_IDR, M_ZERO | gfp_mask); 371 if (iln == NULL) 372 break; 373 bitmap_fill(&iln->bitmap, IDR_SIZE); 374 if (head != NULL) { 375 il->ary[0] = iln; 376 il = iln; 377 } else 378 head = il = iln; 379 } 380 if (head == NULL) 381 return (0); 382 mtx_lock(&idr->lock); 383 il->ary[0] = idr->free; 384 idr->free = head; 385 } 386 return (1); 387 } 388 389 static struct idr_layer * 390 idr_free_list_get(struct idr *idp) 391 { 392 struct idr_layer *il; 393 394 if ((il = idp->free) != NULL) { 395 idp->free = il->ary[0]; 396 il->ary[0] = NULL; 397 } 398 return (il); 399 } 400 401 static inline struct idr_layer * 402 idr_get(struct idr *idp) 403 { 404 struct idr_layer *il; 405 406 if ((il = idr_free_list_get(idp)) != NULL) { 407 MPASS(il->bitmap != 0); 408 } else if ((il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT)) != NULL) { 409 bitmap_fill(&il->bitmap, IDR_SIZE); 410 } else if ((il = idr_preload_dequeue_locked(&DPCPU_GET(linux_idr_cache))) != NULL) { 411 bitmap_fill(&il->bitmap, IDR_SIZE); 412 } else { 413 return (NULL); 414 } 415 return (il); 416 } 417 418 /* 419 * Could be implemented as get_new_above(idr, ptr, 0, idp) but written 420 * first for simplicity sake. 421 */ 422 static int 423 idr_get_new_locked(struct idr *idr, void *ptr, int *idp) 424 { 425 struct idr_layer *stack[MAX_LEVEL]; 426 struct idr_layer *il; 427 int error; 428 int layer; 429 int idx; 430 int id; 431 432 mtx_assert(&idr->lock, MA_OWNED); 433 434 error = -EAGAIN; 435 /* 436 * Expand the tree until there is free space. 437 */ 438 if (idr->top == NULL || idr->top->bitmap == 0) { 439 if (idr->layers == MAX_LEVEL + 1) { 440 error = -ENOSPC; 441 goto out; 442 } 443 il = idr_get(idr); 444 if (il == NULL) 445 goto out; 446 il->ary[0] = idr->top; 447 if (idr->top) 448 il->bitmap &= ~1; 449 idr->top = il; 450 idr->layers++; 451 } 452 il = idr->top; 453 id = 0; 454 /* 455 * Walk the tree following free bitmaps, record our path. 456 */ 457 for (layer = idr->layers - 1;; layer--) { 458 stack[layer] = il; 459 idx = ffsl(il->bitmap); 460 if (idx == 0) 461 panic("idr_get_new: Invalid leaf state (%p, %p)\n", 462 idr, il); 463 idx--; 464 id |= idx << (layer * IDR_BITS); 465 if (layer == 0) 466 break; 467 if (il->ary[idx] == NULL) { 468 il->ary[idx] = idr_get(idr); 469 if (il->ary[idx] == NULL) 470 goto out; 471 } 472 il = il->ary[idx]; 473 } 474 /* 475 * Allocate the leaf to the consumer. 476 */ 477 il->bitmap &= ~(1 << idx); 478 il->ary[idx] = ptr; 479 *idp = id; 480 /* 481 * Clear bitmaps potentially up to the root. 482 */ 483 while (il->bitmap == 0 && ++layer < idr->layers) { 484 il = stack[layer]; 485 il->bitmap &= ~(1 << idr_pos(id, layer)); 486 } 487 error = 0; 488 out: 489 #ifdef INVARIANTS 490 if (error == 0 && idr_find_locked(idr, id) != ptr) { 491 panic("idr_get_new: Failed for idr %p, id %d, ptr %p\n", 492 idr, id, ptr); 493 } 494 #endif 495 return (error); 496 } 497 498 int 499 idr_get_new(struct idr *idr, void *ptr, int *idp) 500 { 501 int retval; 502 503 mtx_lock(&idr->lock); 504 retval = idr_get_new_locked(idr, ptr, idp); 505 mtx_unlock(&idr->lock); 506 return (retval); 507 } 508 509 static int 510 idr_get_new_above_locked(struct idr *idr, void *ptr, int starting_id, int *idp) 511 { 512 struct idr_layer *stack[MAX_LEVEL]; 513 struct idr_layer *il; 514 int error; 515 int layer; 516 int idx, sidx; 517 int id; 518 519 mtx_assert(&idr->lock, MA_OWNED); 520 521 error = -EAGAIN; 522 /* 523 * Compute the layers required to support starting_id and the mask 524 * at the top layer. 525 */ 526 restart: 527 idx = starting_id; 528 layer = 0; 529 while (idx & ~IDR_MASK) { 530 layer++; 531 idx >>= IDR_BITS; 532 } 533 if (layer == MAX_LEVEL + 1) { 534 error = -ENOSPC; 535 goto out; 536 } 537 /* 538 * Expand the tree until there is free space at or beyond starting_id. 539 */ 540 while (idr->layers <= layer || 541 idr->top->bitmap < (1 << idr_pos(starting_id, idr->layers - 1))) { 542 if (idr->layers == MAX_LEVEL + 1) { 543 error = -ENOSPC; 544 goto out; 545 } 546 il = idr_get(idr); 547 if (il == NULL) 548 goto out; 549 il->ary[0] = idr->top; 550 if (idr->top && idr->top->bitmap == 0) 551 il->bitmap &= ~1; 552 idr->top = il; 553 idr->layers++; 554 } 555 il = idr->top; 556 id = 0; 557 /* 558 * Walk the tree following free bitmaps, record our path. 559 */ 560 for (layer = idr->layers - 1;; layer--) { 561 stack[layer] = il; 562 sidx = idr_pos(starting_id, layer); 563 /* Returns index numbered from 0 or size if none exists. */ 564 idx = find_next_bit(&il->bitmap, IDR_SIZE, sidx); 565 if (idx == IDR_SIZE && sidx == 0) 566 panic("idr_get_new: Invalid leaf state (%p, %p)\n", 567 idr, il); 568 /* 569 * We may have walked a path where there was a free bit but 570 * it was lower than what we wanted. Restart the search with 571 * a larger starting id. id contains the progress we made so 572 * far. Search the leaf one above this level. This may 573 * restart as many as MAX_LEVEL times but that is expected 574 * to be rare. 575 */ 576 if (idx == IDR_SIZE) { 577 starting_id = id + (1 << ((layer + 1) * IDR_BITS)); 578 goto restart; 579 } 580 if (idx > sidx) 581 starting_id = 0; /* Search the whole subtree. */ 582 id |= idx << (layer * IDR_BITS); 583 if (layer == 0) 584 break; 585 if (il->ary[idx] == NULL) { 586 il->ary[idx] = idr_get(idr); 587 if (il->ary[idx] == NULL) 588 goto out; 589 } 590 il = il->ary[idx]; 591 } 592 /* 593 * Allocate the leaf to the consumer. 594 */ 595 il->bitmap &= ~(1 << idx); 596 il->ary[idx] = ptr; 597 *idp = id; 598 /* 599 * Clear bitmaps potentially up to the root. 600 */ 601 while (il->bitmap == 0 && ++layer < idr->layers) { 602 il = stack[layer]; 603 il->bitmap &= ~(1 << idr_pos(id, layer)); 604 } 605 error = 0; 606 out: 607 #ifdef INVARIANTS 608 if (error == 0 && idr_find_locked(idr, id) != ptr) { 609 panic("idr_get_new_above: Failed for idr %p, id %d, ptr %p\n", 610 idr, id, ptr); 611 } 612 #endif 613 return (error); 614 } 615 616 int 617 idr_get_new_above(struct idr *idr, void *ptr, int starting_id, int *idp) 618 { 619 int retval; 620 621 mtx_lock(&idr->lock); 622 retval = idr_get_new_above_locked(idr, ptr, starting_id, idp); 623 mtx_unlock(&idr->lock); 624 return (retval); 625 } 626 627 int 628 ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 629 { 630 return (idr_get_new_above(&ida->idr, NULL, starting_id, p_id)); 631 } 632 633 static int 634 idr_alloc_locked(struct idr *idr, void *ptr, int start, int end) 635 { 636 int max = end > 0 ? end - 1 : INT_MAX; 637 int error; 638 int id; 639 640 mtx_assert(&idr->lock, MA_OWNED); 641 642 if (unlikely(start < 0)) 643 return (-EINVAL); 644 if (unlikely(max < start)) 645 return (-ENOSPC); 646 647 if (start == 0) 648 error = idr_get_new_locked(idr, ptr, &id); 649 else 650 error = idr_get_new_above_locked(idr, ptr, start, &id); 651 652 if (unlikely(error < 0)) 653 return (error); 654 if (unlikely(id > max)) { 655 idr_remove_locked(idr, id); 656 return (-ENOSPC); 657 } 658 return (id); 659 } 660 661 int 662 idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) 663 { 664 int retval; 665 666 mtx_lock(&idr->lock); 667 retval = idr_alloc_locked(idr, ptr, start, end); 668 mtx_unlock(&idr->lock); 669 return (retval); 670 } 671 672 int 673 idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) 674 { 675 int retval; 676 677 mtx_lock(&idr->lock); 678 retval = idr_alloc_locked(idr, ptr, max(start, idr->next_cyclic_id), end); 679 if (unlikely(retval == -ENOSPC)) 680 retval = idr_alloc_locked(idr, ptr, start, end); 681 if (likely(retval >= 0)) 682 idr->next_cyclic_id = retval + 1; 683 mtx_unlock(&idr->lock); 684 return (retval); 685 } 686 687 static int 688 idr_for_each_layer(struct idr_layer *il, int offset, int layer, 689 int (*f)(int id, void *p, void *data), void *data) 690 { 691 int i, err; 692 693 if (il == NULL) 694 return (0); 695 if (layer == 0) { 696 for (i = 0; i < IDR_SIZE; i++) { 697 if (il->ary[i] == NULL) 698 continue; 699 err = f(i + offset, il->ary[i], data); 700 if (err) 701 return (err); 702 } 703 return (0); 704 } 705 for (i = 0; i < IDR_SIZE; i++) { 706 if (il->ary[i] == NULL) 707 continue; 708 err = idr_for_each_layer(il->ary[i], 709 (i + offset) * IDR_SIZE, layer - 1, f, data); 710 if (err) 711 return (err); 712 } 713 return (0); 714 } 715 716 /* NOTE: It is not allowed to modify the IDR tree while it is being iterated */ 717 int 718 idr_for_each(struct idr *idp, int (*f)(int id, void *p, void *data), void *data) 719 { 720 return (idr_for_each_layer(idp->top, 0, idp->layers - 1, f, data)); 721 } 722 723 static int 724 idr_has_entry(int id, void *p, void *data) 725 { 726 727 return (1); 728 } 729 730 bool 731 idr_is_empty(struct idr *idp) 732 { 733 734 return (idr_for_each(idp, idr_has_entry, NULL) == 0); 735 } 736 737 int 738 ida_pre_get(struct ida *ida, gfp_t flags) 739 { 740 if (idr_pre_get(&ida->idr, flags) == 0) 741 return (0); 742 743 if (ida->free_bitmap == NULL) { 744 ida->free_bitmap = 745 malloc(sizeof(struct ida_bitmap), M_IDR, flags); 746 } 747 return (ida->free_bitmap != NULL); 748 } 749 750 int 751 ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, 752 gfp_t flags) 753 { 754 int ret, id; 755 unsigned int max; 756 757 MPASS((int)start >= 0); 758 MPASS((int)end >= 0); 759 760 if (end == 0) 761 max = 0x80000000; 762 else { 763 MPASS(end > start); 764 max = end - 1; 765 } 766 again: 767 if (!ida_pre_get(ida, flags)) 768 return (-ENOMEM); 769 770 if ((ret = ida_get_new_above(ida, start, &id)) == 0) { 771 if (id > max) { 772 ida_remove(ida, id); 773 ret = -ENOSPC; 774 } else { 775 ret = id; 776 } 777 } 778 if (__predict_false(ret == -EAGAIN)) 779 goto again; 780 781 return (ret); 782 } 783 784 void 785 ida_simple_remove(struct ida *ida, unsigned int id) 786 { 787 idr_remove(&ida->idr, id); 788 } 789 790 void 791 ida_remove(struct ida *ida, int id) 792 { 793 idr_remove(&ida->idr, id); 794 } 795 796 void 797 ida_init(struct ida *ida) 798 { 799 idr_init(&ida->idr); 800 } 801 802 void 803 ida_destroy(struct ida *ida) 804 { 805 idr_destroy(&ida->idr); 806 free(ida->free_bitmap, M_IDR); 807 } 808