1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 */ 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 59 #include "opt_param.h" 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/types.h> 64 #include <sys/queue.h> 65 #include <sys/malloc.h> 66 #include <sys/lock.h> 67 #include <sys/sysctl.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/smp.h> 71 #include <sys/vmmeter.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 #include <vm/uma_dbg.h> 83 84 #include <machine/vmparam.h> 85 86 /* 87 * This is the zone from which all zones are spawned. The idea is that even 88 * the zone heads are allocated from the allocator, so we use the bss section 89 * to bootstrap us. 90 */ 91 static struct uma_zone masterzone; 92 static uma_zone_t zones = &masterzone; 93 94 /* This is the zone from which all of uma_slab_t's are allocated. */ 95 static uma_zone_t slabzone; 96 97 /* 98 * The initial hash tables come out of this zone so they can be allocated 99 * prior to malloc coming up. 100 */ 101 static uma_zone_t hashzone; 102 103 /* 104 * Zone that buckets come from. 105 */ 106 static uma_zone_t bucketzone; 107 108 /* 109 * Are we allowed to allocate buckets? 110 */ 111 static int bucketdisable = 1; 112 113 /* Linked list of all zones in the system */ 114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 115 116 /* This mutex protects the zone list */ 117 static struct mtx uma_mtx; 118 119 /* Linked list of boot time pages */ 120 static LIST_HEAD(,uma_slab) uma_boot_pages = 121 LIST_HEAD_INITIALIZER(&uma_boot_pages); 122 123 /* Count of free boottime pages */ 124 static int uma_boot_free = 0; 125 126 /* Is the VM done starting up? */ 127 static int booted = 0; 128 129 /* This is the handle used to schedule our working set calculator */ 130 static struct callout uma_callout; 131 132 /* This is mp_maxid + 1, for use while looping over each cpu */ 133 static int maxcpu; 134 135 /* 136 * This structure is passed as the zone ctor arg so that I don't have to create 137 * a special allocation function just for zones. 138 */ 139 struct uma_zctor_args { 140 char *name; 141 size_t size; 142 uma_ctor ctor; 143 uma_dtor dtor; 144 uma_init uminit; 145 uma_fini fini; 146 int align; 147 u_int16_t flags; 148 }; 149 150 /* Prototypes.. */ 151 152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 154 static void page_free(void *, int, u_int8_t); 155 static uma_slab_t slab_zalloc(uma_zone_t, int); 156 static void cache_drain(uma_zone_t); 157 static void bucket_drain(uma_zone_t, uma_bucket_t); 158 static void zone_drain(uma_zone_t); 159 static void zone_ctor(void *, int, void *); 160 static void zone_dtor(void *, int, void *); 161 static void zero_init(void *, int); 162 static void zone_small_init(uma_zone_t zone); 163 static void zone_large_init(uma_zone_t zone); 164 static void zone_foreach(void (*zfunc)(uma_zone_t)); 165 static void zone_timeout(uma_zone_t zone); 166 static int hash_alloc(struct uma_hash *); 167 static int hash_expand(struct uma_hash *, struct uma_hash *); 168 static void hash_free(struct uma_hash *hash); 169 static void uma_timeout(void *); 170 static void uma_startup3(void); 171 static void *uma_zalloc_internal(uma_zone_t, void *, int); 172 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 173 static void bucket_enable(void); 174 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 177 178 void uma_print_zone(uma_zone_t); 179 void uma_print_stats(void); 180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 181 182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 183 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 185 186 /* 187 * This routine checks to see whether or not it's safe to enable buckets. 188 */ 189 190 static void 191 bucket_enable(void) 192 { 193 if (cnt.v_free_count < cnt.v_free_min) 194 bucketdisable = 1; 195 else 196 bucketdisable = 0; 197 } 198 199 200 /* 201 * Routine called by timeout which is used to fire off some time interval 202 * based calculations. (working set, stats, etc.) 203 * 204 * Arguments: 205 * arg Unused 206 * 207 * Returns: 208 * Nothing 209 */ 210 static void 211 uma_timeout(void *unused) 212 { 213 bucket_enable(); 214 zone_foreach(zone_timeout); 215 216 /* Reschedule this event */ 217 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 218 } 219 220 /* 221 * Routine to perform timeout driven calculations. This does the working set 222 * as well as hash expanding, and per cpu statistics aggregation. 223 * 224 * Arguments: 225 * zone The zone to operate on 226 * 227 * Returns: 228 * Nothing 229 */ 230 static void 231 zone_timeout(uma_zone_t zone) 232 { 233 uma_cache_t cache; 234 u_int64_t alloc; 235 int free; 236 int cpu; 237 238 alloc = 0; 239 free = 0; 240 241 /* 242 * Aggregate per cpu cache statistics back to the zone. 243 * 244 * I may rewrite this to set a flag in the per cpu cache instead of 245 * locking. If the flag is not cleared on the next round I will have 246 * to lock and do it here instead so that the statistics don't get too 247 * far out of sync. 248 */ 249 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 250 for (cpu = 0; cpu < maxcpu; cpu++) { 251 if (CPU_ABSENT(cpu)) 252 continue; 253 CPU_LOCK(zone, cpu); 254 cache = &zone->uz_cpu[cpu]; 255 /* Add them up, and reset */ 256 alloc += cache->uc_allocs; 257 cache->uc_allocs = 0; 258 if (cache->uc_allocbucket) 259 free += cache->uc_allocbucket->ub_ptr + 1; 260 if (cache->uc_freebucket) 261 free += cache->uc_freebucket->ub_ptr + 1; 262 CPU_UNLOCK(zone, cpu); 263 } 264 } 265 266 /* Now push these stats back into the zone.. */ 267 ZONE_LOCK(zone); 268 zone->uz_allocs += alloc; 269 270 /* 271 * cachefree is an instantanious snapshot of what is in the per cpu 272 * caches, not an accurate counter 273 */ 274 zone->uz_cachefree = free; 275 276 /* 277 * Expand the zone hash table. 278 * 279 * This is done if the number of slabs is larger than the hash size. 280 * What I'm trying to do here is completely reduce collisions. This 281 * may be a little aggressive. Should I allow for two collisions max? 282 */ 283 284 if (zone->uz_flags & UMA_ZFLAG_HASH && 285 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 286 struct uma_hash newhash; 287 struct uma_hash oldhash; 288 int ret; 289 290 /* 291 * This is so involved because allocating and freeing 292 * while the zone lock is held will lead to deadlock. 293 * I have to do everything in stages and check for 294 * races. 295 */ 296 newhash = zone->uz_hash; 297 ZONE_UNLOCK(zone); 298 ret = hash_alloc(&newhash); 299 ZONE_LOCK(zone); 300 if (ret) { 301 if (hash_expand(&zone->uz_hash, &newhash)) { 302 oldhash = zone->uz_hash; 303 zone->uz_hash = newhash; 304 } else 305 oldhash = newhash; 306 307 ZONE_UNLOCK(zone); 308 hash_free(&oldhash); 309 ZONE_LOCK(zone); 310 } 311 } 312 313 /* 314 * Here we compute the working set size as the total number of items 315 * left outstanding since the last time interval. This is slightly 316 * suboptimal. What we really want is the highest number of outstanding 317 * items during the last time quantum. This should be close enough. 318 * 319 * The working set size is used to throttle the zone_drain function. 320 * We don't want to return memory that we may need again immediately. 321 */ 322 alloc = zone->uz_allocs - zone->uz_oallocs; 323 zone->uz_oallocs = zone->uz_allocs; 324 zone->uz_wssize = alloc; 325 326 ZONE_UNLOCK(zone); 327 } 328 329 /* 330 * Allocate and zero fill the next sized hash table from the appropriate 331 * backing store. 332 * 333 * Arguments: 334 * hash A new hash structure with the old hash size in uh_hashsize 335 * 336 * Returns: 337 * 1 on sucess and 0 on failure. 338 */ 339 static int 340 hash_alloc(struct uma_hash *hash) 341 { 342 int oldsize; 343 int alloc; 344 345 oldsize = hash->uh_hashsize; 346 347 /* We're just going to go to a power of two greater */ 348 if (oldsize) { 349 hash->uh_hashsize = oldsize * 2; 350 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 351 /* XXX Shouldn't be abusing DEVBUF here */ 352 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 353 M_DEVBUF, M_NOWAIT); 354 } else { 355 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 356 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 357 M_WAITOK); 358 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 359 } 360 if (hash->uh_slab_hash) { 361 bzero(hash->uh_slab_hash, alloc); 362 hash->uh_hashmask = hash->uh_hashsize - 1; 363 return (1); 364 } 365 366 return (0); 367 } 368 369 /* 370 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 371 * to reduce collisions. This must not be done in the regular allocation path, 372 * otherwise, we can recurse on the vm while allocating pages. 373 * 374 * Arguments: 375 * oldhash The hash you want to expand 376 * newhash The hash structure for the new table 377 * 378 * Returns: 379 * Nothing 380 * 381 * Discussion: 382 */ 383 static int 384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 385 { 386 uma_slab_t slab; 387 int hval; 388 int i; 389 390 if (!newhash->uh_slab_hash) 391 return (0); 392 393 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 394 return (0); 395 396 /* 397 * I need to investigate hash algorithms for resizing without a 398 * full rehash. 399 */ 400 401 for (i = 0; i < oldhash->uh_hashsize; i++) 402 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 403 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 404 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 405 hval = UMA_HASH(newhash, slab->us_data); 406 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 407 slab, us_hlink); 408 } 409 410 return (1); 411 } 412 413 /* 414 * Free the hash bucket to the appropriate backing store. 415 * 416 * Arguments: 417 * slab_hash The hash bucket we're freeing 418 * hashsize The number of entries in that hash bucket 419 * 420 * Returns: 421 * Nothing 422 */ 423 static void 424 hash_free(struct uma_hash *hash) 425 { 426 if (hash->uh_slab_hash == NULL) 427 return; 428 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 429 uma_zfree_internal(hashzone, 430 hash->uh_slab_hash, NULL, 0); 431 else 432 free(hash->uh_slab_hash, M_DEVBUF); 433 } 434 435 /* 436 * Frees all outstanding items in a bucket 437 * 438 * Arguments: 439 * zone The zone to free to, must be unlocked. 440 * bucket The free/alloc bucket with items, cpu queue must be locked. 441 * 442 * Returns: 443 * Nothing 444 */ 445 446 static void 447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 448 { 449 uma_slab_t slab; 450 int mzone; 451 void *item; 452 453 if (bucket == NULL) 454 return; 455 456 slab = NULL; 457 mzone = 0; 458 459 /* We have to lookup the slab again for malloc.. */ 460 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 461 mzone = 1; 462 463 while (bucket->ub_ptr > -1) { 464 item = bucket->ub_bucket[bucket->ub_ptr]; 465 #ifdef INVARIANTS 466 bucket->ub_bucket[bucket->ub_ptr] = NULL; 467 KASSERT(item != NULL, 468 ("bucket_drain: botched ptr, item is NULL")); 469 #endif 470 bucket->ub_ptr--; 471 /* 472 * This is extremely inefficient. The slab pointer was passed 473 * to uma_zfree_arg, but we lost it because the buckets don't 474 * hold them. This will go away when free() gets a size passed 475 * to it. 476 */ 477 if (mzone) 478 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 479 uma_zfree_internal(zone, item, slab, 1); 480 } 481 } 482 483 /* 484 * Drains the per cpu caches for a zone. 485 * 486 * Arguments: 487 * zone The zone to drain, must be unlocked. 488 * 489 * Returns: 490 * Nothing 491 * 492 * This function returns with the zone locked so that the per cpu queues can 493 * not be filled until zone_drain is finished. 494 * 495 */ 496 static void 497 cache_drain(uma_zone_t zone) 498 { 499 uma_bucket_t bucket; 500 uma_cache_t cache; 501 int cpu; 502 503 /* 504 * Flush out the per cpu queues. 505 * 506 * XXX This causes unnecessary thrashing due to immediately having 507 * empty per cpu queues. I need to improve this. 508 */ 509 510 /* 511 * We have to lock each cpu cache before locking the zone 512 */ 513 ZONE_UNLOCK(zone); 514 515 for (cpu = 0; cpu < maxcpu; cpu++) { 516 if (CPU_ABSENT(cpu)) 517 continue; 518 CPU_LOCK(zone, cpu); 519 cache = &zone->uz_cpu[cpu]; 520 bucket_drain(zone, cache->uc_allocbucket); 521 bucket_drain(zone, cache->uc_freebucket); 522 } 523 524 /* 525 * Drain the bucket queues and free the buckets, we just keep two per 526 * cpu (alloc/free). 527 */ 528 ZONE_LOCK(zone); 529 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 530 LIST_REMOVE(bucket, ub_link); 531 ZONE_UNLOCK(zone); 532 bucket_drain(zone, bucket); 533 uma_zfree_internal(bucketzone, bucket, NULL, 0); 534 ZONE_LOCK(zone); 535 } 536 537 /* Now we do the free queue.. */ 538 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 539 LIST_REMOVE(bucket, ub_link); 540 uma_zfree_internal(bucketzone, bucket, NULL, 0); 541 } 542 543 /* We unlock here, but they will all block until the zone is unlocked */ 544 for (cpu = 0; cpu < maxcpu; cpu++) { 545 if (CPU_ABSENT(cpu)) 546 continue; 547 CPU_UNLOCK(zone, cpu); 548 } 549 550 zone->uz_cachefree = 0; 551 } 552 553 /* 554 * Frees pages from a zone back to the system. This is done on demand from 555 * the pageout daemon. 556 * 557 * Arguments: 558 * zone The zone to free pages from 559 * all Should we drain all items? 560 * 561 * Returns: 562 * Nothing. 563 */ 564 static void 565 zone_drain(uma_zone_t zone) 566 { 567 struct slabhead freeslabs = {}; 568 uma_slab_t slab; 569 uma_slab_t n; 570 u_int64_t extra; 571 u_int8_t flags; 572 u_int8_t *mem; 573 int i; 574 575 /* 576 * We don't want to take pages from staticly allocated zones at this 577 * time 578 */ 579 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 580 return; 581 582 ZONE_LOCK(zone); 583 584 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 585 cache_drain(zone); 586 587 if (zone->uz_free < zone->uz_wssize) 588 goto finished; 589 #ifdef UMA_DEBUG 590 printf("%s working set size: %llu free items: %u\n", 591 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 592 #endif 593 extra = zone->uz_free - zone->uz_wssize; 594 extra /= zone->uz_ipers; 595 596 /* extra is now the number of extra slabs that we can free */ 597 598 if (extra == 0) 599 goto finished; 600 601 slab = LIST_FIRST(&zone->uz_free_slab); 602 while (slab && extra) { 603 n = LIST_NEXT(slab, us_link); 604 605 /* We have no where to free these to */ 606 if (slab->us_flags & UMA_SLAB_BOOT) { 607 slab = n; 608 continue; 609 } 610 611 LIST_REMOVE(slab, us_link); 612 zone->uz_pages -= zone->uz_ppera; 613 zone->uz_free -= zone->uz_ipers; 614 615 if (zone->uz_flags & UMA_ZFLAG_HASH) 616 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 617 618 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 619 620 slab = n; 621 extra--; 622 } 623 finished: 624 ZONE_UNLOCK(zone); 625 626 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 627 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 628 if (zone->uz_fini) 629 for (i = 0; i < zone->uz_ipers; i++) 630 zone->uz_fini( 631 slab->us_data + (zone->uz_rsize * i), 632 zone->uz_size); 633 flags = slab->us_flags; 634 mem = slab->us_data; 635 636 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 637 uma_zfree_internal(slabzone, slab, NULL, 0); 638 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 639 vm_object_t obj; 640 641 if (flags & UMA_SLAB_KMEM) 642 obj = kmem_object; 643 else 644 obj = NULL; 645 for (i = 0; i < zone->uz_ppera; i++) 646 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 647 obj); 648 } 649 #ifdef UMA_DEBUG 650 printf("%s: Returning %d bytes.\n", 651 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 652 #endif 653 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 654 } 655 656 } 657 658 /* 659 * Allocate a new slab for a zone. This does not insert the slab onto a list. 660 * 661 * Arguments: 662 * zone The zone to allocate slabs for 663 * wait Shall we wait? 664 * 665 * Returns: 666 * The slab that was allocated or NULL if there is no memory and the 667 * caller specified M_NOWAIT. 668 * 669 */ 670 static uma_slab_t 671 slab_zalloc(uma_zone_t zone, int wait) 672 { 673 uma_slab_t slab; /* Starting slab */ 674 u_int8_t *mem; 675 u_int8_t flags; 676 int i; 677 678 slab = NULL; 679 680 #ifdef UMA_DEBUG 681 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 682 #endif 683 ZONE_UNLOCK(zone); 684 685 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 686 slab = uma_zalloc_internal(slabzone, NULL, wait); 687 if (slab == NULL) { 688 ZONE_LOCK(zone); 689 return NULL; 690 } 691 } 692 693 /* 694 * This reproduces the old vm_zone behavior of zero filling pages the 695 * first time they are added to a zone. 696 * 697 * Malloced items are zeroed in uma_zalloc. 698 */ 699 700 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 701 wait |= M_ZERO; 702 else 703 wait &= ~M_ZERO; 704 705 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 706 mtx_lock(&Giant); 707 mem = zone->uz_allocf(zone, 708 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 709 mtx_unlock(&Giant); 710 if (mem == NULL) { 711 ZONE_LOCK(zone); 712 return (NULL); 713 } 714 } else { 715 uma_slab_t tmps; 716 717 if (zone->uz_ppera > 1) 718 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 719 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 720 panic("Mallocing before uma_startup2 has been called.\n"); 721 if (uma_boot_free == 0) 722 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 723 tmps = LIST_FIRST(&uma_boot_pages); 724 LIST_REMOVE(tmps, us_link); 725 uma_boot_free--; 726 mem = tmps->us_data; 727 flags = tmps->us_flags; 728 } 729 730 /* Point the slab into the allocated memory */ 731 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 732 slab = (uma_slab_t )(mem + zone->uz_pgoff); 733 734 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 735 for (i = 0; i < zone->uz_ppera; i++) 736 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 737 738 slab->us_zone = zone; 739 slab->us_data = mem; 740 741 /* 742 * This is intended to spread data out across cache lines. 743 * 744 * This code doesn't seem to work properly on x86, and on alpha 745 * it makes absolutely no performance difference. I'm sure it could 746 * use some tuning, but sun makes outrageous claims about it's 747 * performance. 748 */ 749 #if 0 750 if (zone->uz_cachemax) { 751 slab->us_data += zone->uz_cacheoff; 752 zone->uz_cacheoff += UMA_CACHE_INC; 753 if (zone->uz_cacheoff > zone->uz_cachemax) 754 zone->uz_cacheoff = 0; 755 } 756 #endif 757 758 slab->us_freecount = zone->uz_ipers; 759 slab->us_firstfree = 0; 760 slab->us_flags = flags; 761 for (i = 0; i < zone->uz_ipers; i++) 762 slab->us_freelist[i] = i+1; 763 764 if (zone->uz_init) 765 for (i = 0; i < zone->uz_ipers; i++) 766 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 767 zone->uz_size); 768 ZONE_LOCK(zone); 769 770 if (zone->uz_flags & UMA_ZFLAG_HASH) 771 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 772 773 zone->uz_pages += zone->uz_ppera; 774 zone->uz_free += zone->uz_ipers; 775 776 777 return (slab); 778 } 779 780 /* 781 * Allocates a number of pages from the system 782 * 783 * Arguments: 784 * zone Unused 785 * bytes The number of bytes requested 786 * wait Shall we wait? 787 * 788 * Returns: 789 * A pointer to the alloced memory or possibly 790 * NULL if M_NOWAIT is set. 791 */ 792 static void * 793 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 794 { 795 void *p; /* Returned page */ 796 797 *pflag = UMA_SLAB_KMEM; 798 p = (void *) kmem_malloc(kmem_map, bytes, wait); 799 800 return (p); 801 } 802 803 /* 804 * Allocates a number of pages from within an object 805 * 806 * Arguments: 807 * zone Unused 808 * bytes The number of bytes requested 809 * wait Shall we wait? 810 * 811 * Returns: 812 * A pointer to the alloced memory or possibly 813 * NULL if M_NOWAIT is set. 814 * 815 * TODO: If we fail during a multi-page allocation release the pages that have 816 * already been allocated. 817 */ 818 static void * 819 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 820 { 821 vm_offset_t zkva; 822 vm_offset_t retkva; 823 vm_page_t p; 824 int pages; 825 826 retkva = 0; 827 pages = zone->uz_pages; 828 829 /* 830 * This looks a little weird since we're getting one page at a time 831 */ 832 while (bytes > 0) { 833 p = vm_page_alloc(zone->uz_obj, pages, 834 VM_ALLOC_INTERRUPT); 835 if (p == NULL) 836 return (NULL); 837 838 zkva = zone->uz_kva + pages * PAGE_SIZE; 839 if (retkva == 0) 840 retkva = zkva; 841 pmap_qenter(zkva, &p, 1); 842 bytes -= PAGE_SIZE; 843 pages += 1; 844 } 845 846 *flags = UMA_SLAB_PRIV; 847 848 return ((void *)retkva); 849 } 850 851 /* 852 * Frees a number of pages to the system 853 * 854 * Arguments: 855 * mem A pointer to the memory to be freed 856 * size The size of the memory being freed 857 * flags The original p->us_flags field 858 * 859 * Returns: 860 * Nothing 861 * 862 */ 863 static void 864 page_free(void *mem, int size, u_int8_t flags) 865 { 866 vm_map_t map; 867 868 if (flags & UMA_SLAB_KMEM) 869 map = kmem_map; 870 else 871 panic("UMA: page_free used with invalid flags %d\n", flags); 872 873 kmem_free(map, (vm_offset_t)mem, size); 874 } 875 876 /* 877 * Zero fill initializer 878 * 879 * Arguments/Returns follow uma_init specifications 880 * 881 */ 882 static void 883 zero_init(void *mem, int size) 884 { 885 bzero(mem, size); 886 } 887 888 /* 889 * Finish creating a small uma zone. This calculates ipers, and the zone size. 890 * 891 * Arguments 892 * zone The zone we should initialize 893 * 894 * Returns 895 * Nothing 896 */ 897 static void 898 zone_small_init(uma_zone_t zone) 899 { 900 int rsize; 901 int memused; 902 int ipers; 903 904 rsize = zone->uz_size; 905 906 if (rsize < UMA_SMALLEST_UNIT) 907 rsize = UMA_SMALLEST_UNIT; 908 909 if (rsize & zone->uz_align) 910 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 911 912 zone->uz_rsize = rsize; 913 914 rsize += 1; /* Account for the byte of linkage */ 915 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 916 zone->uz_ppera = 1; 917 918 memused = zone->uz_ipers * zone->uz_rsize; 919 920 /* Can we do any better? */ 921 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 922 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 923 return; 924 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 925 if (ipers > zone->uz_ipers) { 926 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 927 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 928 zone->uz_flags |= UMA_ZFLAG_HASH; 929 zone->uz_ipers = ipers; 930 } 931 } 932 933 } 934 935 /* 936 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 937 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 938 * more complicated. 939 * 940 * Arguments 941 * zone The zone we should initialize 942 * 943 * Returns 944 * Nothing 945 */ 946 static void 947 zone_large_init(uma_zone_t zone) 948 { 949 int pages; 950 951 pages = zone->uz_size / UMA_SLAB_SIZE; 952 953 /* Account for remainder */ 954 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 955 pages++; 956 957 zone->uz_ppera = pages; 958 zone->uz_ipers = 1; 959 960 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 961 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 962 zone->uz_flags |= UMA_ZFLAG_HASH; 963 964 zone->uz_rsize = zone->uz_size; 965 } 966 967 /* 968 * Zone header ctor. This initializes all fields, locks, etc. And inserts 969 * the zone onto the global zone list. 970 * 971 * Arguments/Returns follow uma_ctor specifications 972 * udata Actually uma_zcreat_args 973 * 974 */ 975 976 static void 977 zone_ctor(void *mem, int size, void *udata) 978 { 979 struct uma_zctor_args *arg = udata; 980 uma_zone_t zone = mem; 981 int privlc; 982 int cplen; 983 int cpu; 984 985 bzero(zone, size); 986 zone->uz_name = arg->name; 987 zone->uz_size = arg->size; 988 zone->uz_ctor = arg->ctor; 989 zone->uz_dtor = arg->dtor; 990 zone->uz_init = arg->uminit; 991 zone->uz_fini = arg->fini; 992 zone->uz_align = arg->align; 993 zone->uz_free = 0; 994 zone->uz_pages = 0; 995 zone->uz_flags = 0; 996 zone->uz_allocf = page_alloc; 997 zone->uz_freef = page_free; 998 999 if (arg->flags & UMA_ZONE_ZINIT) 1000 zone->uz_init = zero_init; 1001 1002 if (arg->flags & UMA_ZONE_INTERNAL) 1003 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 1004 1005 if (arg->flags & UMA_ZONE_MALLOC) 1006 zone->uz_flags |= UMA_ZFLAG_MALLOC; 1007 1008 if (arg->flags & UMA_ZONE_NOFREE) 1009 zone->uz_flags |= UMA_ZFLAG_NOFREE; 1010 1011 if (arg->flags & UMA_ZONE_VM) 1012 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE; 1013 1014 if (zone->uz_size > UMA_SLAB_SIZE) 1015 zone_large_init(zone); 1016 else 1017 zone_small_init(zone); 1018 #ifdef UMA_MD_SMALL_ALLOC 1019 if (zone->uz_ppera == 1) { 1020 zone->uz_allocf = uma_small_alloc; 1021 zone->uz_freef = uma_small_free; 1022 } 1023 #endif /* UMA_MD_SMALL_ALLOC */ 1024 1025 if (arg->flags & UMA_ZONE_MTXCLASS) 1026 privlc = 1; 1027 else 1028 privlc = 0; 1029 1030 /* We do this so that the per cpu lock name is unique for each zone */ 1031 memcpy(zone->uz_lname, "PCPU ", 5); 1032 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1033 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1034 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1035 1036 /* 1037 * If we're putting the slab header in the actual page we need to 1038 * figure out where in each page it goes. This calculates a right 1039 * justified offset into the memory on an ALIGN_PTR boundary. 1040 */ 1041 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1042 int totsize; 1043 int waste; 1044 1045 /* Size of the slab struct and free list */ 1046 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1047 if (totsize & UMA_ALIGN_PTR) 1048 totsize = (totsize & ~UMA_ALIGN_PTR) + 1049 (UMA_ALIGN_PTR + 1); 1050 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1051 1052 waste = zone->uz_pgoff; 1053 waste -= (zone->uz_ipers * zone->uz_rsize); 1054 1055 /* 1056 * This calculates how much space we have for cache line size 1057 * optimizations. It works by offseting each slab slightly. 1058 * Currently it breaks on x86, and so it is disabled. 1059 */ 1060 1061 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1062 zone->uz_cachemax = waste - UMA_CACHE_INC; 1063 zone->uz_cacheoff = 0; 1064 } 1065 1066 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1067 + zone->uz_ipers; 1068 /* I don't think it's possible, but I'll make sure anyway */ 1069 if (totsize > UMA_SLAB_SIZE) { 1070 printf("zone %s ipers %d rsize %d size %d\n", 1071 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1072 zone->uz_size); 1073 panic("UMA slab won't fit.\n"); 1074 } 1075 } 1076 1077 if (zone->uz_flags & UMA_ZFLAG_HASH) 1078 hash_alloc(&zone->uz_hash); 1079 1080 #ifdef UMA_DEBUG 1081 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1082 zone->uz_name, zone, 1083 zone->uz_size, zone->uz_ipers, 1084 zone->uz_ppera, zone->uz_pgoff); 1085 #endif 1086 ZONE_LOCK_INIT(zone, privlc); 1087 1088 mtx_lock(&uma_mtx); 1089 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1090 mtx_unlock(&uma_mtx); 1091 1092 /* 1093 * Some internal zones don't have room allocated for the per cpu 1094 * caches. If we're internal, bail out here. 1095 */ 1096 1097 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1098 return; 1099 1100 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1101 zone->uz_count = zone->uz_ipers - 1; 1102 else 1103 zone->uz_count = UMA_BUCKET_SIZE - 1; 1104 1105 for (cpu = 0; cpu < maxcpu; cpu++) 1106 CPU_LOCK_INIT(zone, cpu, privlc); 1107 } 1108 1109 /* 1110 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1111 * and removes the zone from the global list. 1112 * 1113 * Arguments/Returns follow uma_dtor specifications 1114 * udata unused 1115 */ 1116 1117 static void 1118 zone_dtor(void *arg, int size, void *udata) 1119 { 1120 uma_zone_t zone; 1121 int cpu; 1122 1123 zone = (uma_zone_t)arg; 1124 1125 ZONE_LOCK(zone); 1126 zone->uz_wssize = 0; 1127 ZONE_UNLOCK(zone); 1128 1129 mtx_lock(&uma_mtx); 1130 LIST_REMOVE(zone, uz_link); 1131 zone_drain(zone); 1132 mtx_unlock(&uma_mtx); 1133 1134 ZONE_LOCK(zone); 1135 if (zone->uz_free != 0) 1136 printf("Zone %s was not empty (%d items). Lost %d pages of memory.\n", 1137 zone->uz_name, zone->uz_free, zone->uz_pages); 1138 1139 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1140 for (cpu = 0; cpu < maxcpu; cpu++) 1141 CPU_LOCK_FINI(zone, cpu); 1142 1143 ZONE_UNLOCK(zone); 1144 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1145 hash_free(&zone->uz_hash); 1146 1147 ZONE_LOCK_FINI(zone); 1148 } 1149 /* 1150 * Traverses every zone in the system and calls a callback 1151 * 1152 * Arguments: 1153 * zfunc A pointer to a function which accepts a zone 1154 * as an argument. 1155 * 1156 * Returns: 1157 * Nothing 1158 */ 1159 static void 1160 zone_foreach(void (*zfunc)(uma_zone_t)) 1161 { 1162 uma_zone_t zone; 1163 1164 mtx_lock(&uma_mtx); 1165 LIST_FOREACH(zone, &uma_zones, uz_link) { 1166 zfunc(zone); 1167 } 1168 mtx_unlock(&uma_mtx); 1169 } 1170 1171 /* Public functions */ 1172 /* See uma.h */ 1173 void 1174 uma_startup(void *bootmem) 1175 { 1176 struct uma_zctor_args args; 1177 uma_slab_t slab; 1178 int slabsize; 1179 int i; 1180 1181 #ifdef UMA_DEBUG 1182 printf("Creating uma zone headers zone.\n"); 1183 #endif 1184 #ifdef SMP 1185 maxcpu = mp_maxid + 1; 1186 #else 1187 maxcpu = 1; 1188 #endif 1189 #ifdef UMA_DEBUG 1190 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1191 Debugger("stop"); 1192 #endif 1193 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1194 /* "manually" Create the initial zone */ 1195 args.name = "UMA Zones"; 1196 args.size = sizeof(struct uma_zone) + 1197 (sizeof(struct uma_cache) * (maxcpu - 1)); 1198 args.ctor = zone_ctor; 1199 args.dtor = zone_dtor; 1200 args.uminit = zero_init; 1201 args.fini = NULL; 1202 args.align = 32 - 1; 1203 args.flags = UMA_ZONE_INTERNAL; 1204 /* The initial zone has no Per cpu queues so it's smaller */ 1205 zone_ctor(zones, sizeof(struct uma_zone), &args); 1206 1207 #ifdef UMA_DEBUG 1208 printf("Filling boot free list.\n"); 1209 #endif 1210 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1211 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1212 slab->us_data = (u_int8_t *)slab; 1213 slab->us_flags = UMA_SLAB_BOOT; 1214 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1215 uma_boot_free++; 1216 } 1217 1218 #ifdef UMA_DEBUG 1219 printf("Creating slab zone.\n"); 1220 #endif 1221 1222 /* 1223 * This is the max number of free list items we'll have with 1224 * offpage slabs. 1225 */ 1226 1227 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1228 slabsize /= UMA_MAX_WASTE; 1229 slabsize++; /* In case there it's rounded */ 1230 slabsize += sizeof(struct uma_slab); 1231 1232 /* Now make a zone for slab headers */ 1233 slabzone = uma_zcreate("UMA Slabs", 1234 slabsize, 1235 NULL, NULL, NULL, NULL, 1236 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1237 1238 hashzone = uma_zcreate("UMA Hash", 1239 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1240 NULL, NULL, NULL, NULL, 1241 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1242 1243 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1244 NULL, NULL, NULL, NULL, 1245 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1246 1247 #ifdef UMA_MD_SMALL_ALLOC 1248 booted = 1; 1249 #endif 1250 1251 #ifdef UMA_DEBUG 1252 printf("UMA startup complete.\n"); 1253 #endif 1254 } 1255 1256 /* see uma.h */ 1257 void 1258 uma_startup2(void) 1259 { 1260 booted = 1; 1261 bucket_enable(); 1262 #ifdef UMA_DEBUG 1263 printf("UMA startup2 complete.\n"); 1264 #endif 1265 } 1266 1267 /* 1268 * Initialize our callout handle 1269 * 1270 */ 1271 1272 static void 1273 uma_startup3(void) 1274 { 1275 #ifdef UMA_DEBUG 1276 printf("Starting callout.\n"); 1277 #endif 1278 callout_init(&uma_callout, 0); 1279 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1280 #ifdef UMA_DEBUG 1281 printf("UMA startup3 complete.\n"); 1282 #endif 1283 } 1284 1285 /* See uma.h */ 1286 uma_zone_t 1287 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1288 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1289 1290 { 1291 struct uma_zctor_args args; 1292 1293 /* This stuff is essential for the zone ctor */ 1294 args.name = name; 1295 args.size = size; 1296 args.ctor = ctor; 1297 args.dtor = dtor; 1298 args.uminit = uminit; 1299 args.fini = fini; 1300 args.align = align; 1301 args.flags = flags; 1302 1303 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1304 } 1305 1306 /* See uma.h */ 1307 void 1308 uma_zdestroy(uma_zone_t zone) 1309 { 1310 uma_zfree_internal(zones, zone, NULL, 0); 1311 } 1312 1313 /* See uma.h */ 1314 void * 1315 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1316 { 1317 void *item; 1318 uma_cache_t cache; 1319 uma_bucket_t bucket; 1320 int cpu; 1321 1322 /* This is the fast path allocation */ 1323 #ifdef UMA_DEBUG_ALLOC_1 1324 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1325 #endif 1326 1327 if (!(flags & M_NOWAIT)) { 1328 KASSERT(curthread->td_intr_nesting_level == 0, 1329 ("malloc(M_WAITOK) in interrupt context")); 1330 WITNESS_SLEEP(1, NULL); 1331 } 1332 1333 zalloc_restart: 1334 cpu = PCPU_GET(cpuid); 1335 CPU_LOCK(zone, cpu); 1336 cache = &zone->uz_cpu[cpu]; 1337 1338 zalloc_start: 1339 bucket = cache->uc_allocbucket; 1340 1341 if (bucket) { 1342 if (bucket->ub_ptr > -1) { 1343 item = bucket->ub_bucket[bucket->ub_ptr]; 1344 #ifdef INVARIANTS 1345 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1346 #endif 1347 bucket->ub_ptr--; 1348 KASSERT(item != NULL, 1349 ("uma_zalloc: Bucket pointer mangled.")); 1350 cache->uc_allocs++; 1351 #ifdef INVARIANTS 1352 ZONE_LOCK(zone); 1353 uma_dbg_alloc(zone, NULL, item); 1354 ZONE_UNLOCK(zone); 1355 #endif 1356 CPU_UNLOCK(zone, cpu); 1357 if (zone->uz_ctor) 1358 zone->uz_ctor(item, zone->uz_size, udata); 1359 if (flags & M_ZERO) 1360 bzero(item, zone->uz_size); 1361 return (item); 1362 } else if (cache->uc_freebucket) { 1363 /* 1364 * We have run out of items in our allocbucket. 1365 * See if we can switch with our free bucket. 1366 */ 1367 if (cache->uc_freebucket->ub_ptr > -1) { 1368 uma_bucket_t swap; 1369 1370 #ifdef UMA_DEBUG_ALLOC 1371 printf("uma_zalloc: Swapping empty with alloc.\n"); 1372 #endif 1373 swap = cache->uc_freebucket; 1374 cache->uc_freebucket = cache->uc_allocbucket; 1375 cache->uc_allocbucket = swap; 1376 1377 goto zalloc_start; 1378 } 1379 } 1380 } 1381 ZONE_LOCK(zone); 1382 /* Since we have locked the zone we may as well send back our stats */ 1383 zone->uz_allocs += cache->uc_allocs; 1384 cache->uc_allocs = 0; 1385 1386 /* Our old one is now a free bucket */ 1387 if (cache->uc_allocbucket) { 1388 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1389 ("uma_zalloc_arg: Freeing a non free bucket.")); 1390 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1391 cache->uc_allocbucket, ub_link); 1392 cache->uc_allocbucket = NULL; 1393 } 1394 1395 /* Check the free list for a new alloc bucket */ 1396 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1397 KASSERT(bucket->ub_ptr != -1, 1398 ("uma_zalloc_arg: Returning an empty bucket.")); 1399 1400 LIST_REMOVE(bucket, ub_link); 1401 cache->uc_allocbucket = bucket; 1402 ZONE_UNLOCK(zone); 1403 goto zalloc_start; 1404 } 1405 /* We are no longer associated with this cpu!!! */ 1406 CPU_UNLOCK(zone, cpu); 1407 1408 /* Bump up our uz_count so we get here less */ 1409 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1410 zone->uz_count++; 1411 1412 /* 1413 * Now lets just fill a bucket and put it on the free list. If that 1414 * works we'll restart the allocation from the begining. 1415 */ 1416 1417 if (uma_zalloc_bucket(zone, flags)) { 1418 ZONE_UNLOCK(zone); 1419 goto zalloc_restart; 1420 } 1421 ZONE_UNLOCK(zone); 1422 /* 1423 * We may not be able to get a bucket so return an actual item. 1424 */ 1425 #ifdef UMA_DEBUG 1426 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1427 #endif 1428 1429 return (uma_zalloc_internal(zone, udata, flags)); 1430 } 1431 1432 static uma_slab_t 1433 uma_zone_slab(uma_zone_t zone, int flags) 1434 { 1435 uma_slab_t slab; 1436 1437 /* 1438 * This is to prevent us from recursively trying to allocate 1439 * buckets. The problem is that if an allocation forces us to 1440 * grab a new bucket we will call page_alloc, which will go off 1441 * and cause the vm to allocate vm_map_entries. If we need new 1442 * buckets there too we will recurse in kmem_alloc and bad 1443 * things happen. So instead we return a NULL bucket, and make 1444 * the code that allocates buckets smart enough to deal with it 1445 */ 1446 if (zone == bucketzone && zone->uz_recurse != 0) 1447 return (NULL); 1448 1449 slab = NULL; 1450 1451 for (;;) { 1452 /* 1453 * Find a slab with some space. Prefer slabs that are partially 1454 * used over those that are totally full. This helps to reduce 1455 * fragmentation. 1456 */ 1457 if (zone->uz_free != 0) { 1458 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1459 slab = LIST_FIRST(&zone->uz_part_slab); 1460 } else { 1461 slab = LIST_FIRST(&zone->uz_free_slab); 1462 LIST_REMOVE(slab, us_link); 1463 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, 1464 us_link); 1465 } 1466 return (slab); 1467 } 1468 1469 /* 1470 * M_NOVM means don't ask at all! 1471 */ 1472 if (flags & M_NOVM) 1473 break; 1474 1475 if (zone->uz_maxpages && 1476 zone->uz_pages >= zone->uz_maxpages) { 1477 zone->uz_flags |= UMA_ZFLAG_FULL; 1478 1479 if (flags & M_NOWAIT) 1480 break; 1481 else 1482 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1483 continue; 1484 } 1485 zone->uz_recurse++; 1486 slab = slab_zalloc(zone, flags); 1487 zone->uz_recurse--; 1488 /* 1489 * If we got a slab here it's safe to mark it partially used 1490 * and return. We assume that the caller is going to remove 1491 * at least one item. 1492 */ 1493 if (slab) { 1494 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1495 return (slab); 1496 } 1497 /* 1498 * We might not have been able to get a slab but another cpu 1499 * could have while we were unlocked. Check again before we 1500 * fail. 1501 */ 1502 if (flags & M_NOWAIT) 1503 flags |= M_NOVM; 1504 } 1505 return (slab); 1506 } 1507 1508 static __inline void * 1509 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 1510 { 1511 void *item; 1512 u_int8_t freei; 1513 1514 freei = slab->us_firstfree; 1515 slab->us_firstfree = slab->us_freelist[freei]; 1516 item = slab->us_data + (zone->uz_rsize * freei); 1517 1518 slab->us_freecount--; 1519 zone->uz_free--; 1520 #ifdef INVARIANTS 1521 uma_dbg_alloc(zone, slab, item); 1522 #endif 1523 /* Move this slab to the full list */ 1524 if (slab->us_freecount == 0) { 1525 LIST_REMOVE(slab, us_link); 1526 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1527 } 1528 1529 return (item); 1530 } 1531 1532 static int 1533 uma_zalloc_bucket(uma_zone_t zone, int flags) 1534 { 1535 uma_bucket_t bucket; 1536 uma_slab_t slab; 1537 1538 /* 1539 * Try this zone's free list first so we don't allocate extra buckets. 1540 */ 1541 1542 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1543 KASSERT(bucket->ub_ptr == -1, 1544 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 1545 LIST_REMOVE(bucket, ub_link); 1546 } else { 1547 int bflags; 1548 1549 bflags = flags; 1550 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1551 bflags |= M_NOVM; 1552 1553 ZONE_UNLOCK(zone); 1554 bucket = uma_zalloc_internal(bucketzone, 1555 NULL, bflags); 1556 ZONE_LOCK(zone); 1557 if (bucket != NULL) { 1558 #ifdef INVARIANTS 1559 bzero(bucket, bucketzone->uz_size); 1560 #endif 1561 bucket->ub_ptr = -1; 1562 } 1563 } 1564 1565 if (bucket == NULL) 1566 return (0); 1567 1568 #ifdef SMP 1569 /* 1570 * This code is here to limit the number of simultaneous bucket fills 1571 * for any given zone to the number of per cpu caches in this zone. This 1572 * is done so that we don't allocate more memory than we really need. 1573 */ 1574 if (zone->uz_fills >= mp_ncpus) 1575 goto done; 1576 1577 #endif 1578 zone->uz_fills++; 1579 1580 /* Try to keep the buckets totally full */ 1581 while ((slab = uma_zone_slab(zone, flags)) != NULL && 1582 bucket->ub_ptr < zone->uz_count) { 1583 while (slab->us_freecount && 1584 bucket->ub_ptr < zone->uz_count) { 1585 bucket->ub_bucket[++bucket->ub_ptr] = 1586 uma_slab_alloc(zone, slab); 1587 } 1588 /* Don't block on the next fill */ 1589 flags |= M_NOWAIT; 1590 } 1591 1592 zone->uz_fills--; 1593 1594 if (bucket->ub_ptr != -1) { 1595 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1596 bucket, ub_link); 1597 return (1); 1598 } 1599 #ifdef SMP 1600 done: 1601 #endif 1602 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1603 1604 return (0); 1605 } 1606 /* 1607 * Allocates an item for an internal zone 1608 * 1609 * Arguments 1610 * zone The zone to alloc for. 1611 * udata The data to be passed to the constructor. 1612 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1613 * 1614 * Returns 1615 * NULL if there is no memory and M_NOWAIT is set 1616 * An item if successful 1617 */ 1618 1619 static void * 1620 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 1621 { 1622 uma_slab_t slab; 1623 void *item; 1624 1625 item = NULL; 1626 1627 /* 1628 * This is to stop us from allocating per cpu buckets while we're 1629 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1630 * boot pages. 1631 */ 1632 1633 if (bucketdisable && zone == bucketzone) 1634 return (NULL); 1635 1636 #ifdef UMA_DEBUG_ALLOC 1637 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1638 #endif 1639 ZONE_LOCK(zone); 1640 1641 slab = uma_zone_slab(zone, flags); 1642 if (slab == NULL) { 1643 ZONE_UNLOCK(zone); 1644 return (NULL); 1645 } 1646 1647 item = uma_slab_alloc(zone, slab); 1648 1649 ZONE_UNLOCK(zone); 1650 1651 if (zone->uz_ctor != NULL) 1652 zone->uz_ctor(item, zone->uz_size, udata); 1653 if (flags & M_ZERO) 1654 bzero(item, zone->uz_size); 1655 1656 return (item); 1657 } 1658 1659 /* See uma.h */ 1660 void 1661 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1662 { 1663 uma_cache_t cache; 1664 uma_bucket_t bucket; 1665 int bflags; 1666 int cpu; 1667 1668 /* This is the fast path free */ 1669 #ifdef UMA_DEBUG_ALLOC_1 1670 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1671 #endif 1672 /* 1673 * The race here is acceptable. If we miss it we'll just have to wait 1674 * a little longer for the limits to be reset. 1675 */ 1676 1677 if (zone->uz_flags & UMA_ZFLAG_FULL) 1678 goto zfree_internal; 1679 1680 if (zone->uz_dtor) 1681 zone->uz_dtor(item, zone->uz_size, udata); 1682 1683 zfree_restart: 1684 cpu = PCPU_GET(cpuid); 1685 CPU_LOCK(zone, cpu); 1686 cache = &zone->uz_cpu[cpu]; 1687 1688 zfree_start: 1689 bucket = cache->uc_freebucket; 1690 1691 if (bucket) { 1692 /* 1693 * Do we have room in our bucket? It is OK for this uz count 1694 * check to be slightly out of sync. 1695 */ 1696 1697 if (bucket->ub_ptr < zone->uz_count) { 1698 bucket->ub_ptr++; 1699 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1700 ("uma_zfree: Freeing to non free bucket index.")); 1701 bucket->ub_bucket[bucket->ub_ptr] = item; 1702 #ifdef INVARIANTS 1703 ZONE_LOCK(zone); 1704 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1705 uma_dbg_free(zone, udata, item); 1706 else 1707 uma_dbg_free(zone, NULL, item); 1708 ZONE_UNLOCK(zone); 1709 #endif 1710 CPU_UNLOCK(zone, cpu); 1711 return; 1712 } else if (cache->uc_allocbucket) { 1713 #ifdef UMA_DEBUG_ALLOC 1714 printf("uma_zfree: Swapping buckets.\n"); 1715 #endif 1716 /* 1717 * We have run out of space in our freebucket. 1718 * See if we can switch with our alloc bucket. 1719 */ 1720 if (cache->uc_allocbucket->ub_ptr < 1721 cache->uc_freebucket->ub_ptr) { 1722 uma_bucket_t swap; 1723 1724 swap = cache->uc_freebucket; 1725 cache->uc_freebucket = cache->uc_allocbucket; 1726 cache->uc_allocbucket = swap; 1727 1728 goto zfree_start; 1729 } 1730 } 1731 } 1732 1733 /* 1734 * We can get here for two reasons: 1735 * 1736 * 1) The buckets are NULL 1737 * 2) The alloc and free buckets are both somewhat full. 1738 * 1739 */ 1740 1741 ZONE_LOCK(zone); 1742 1743 bucket = cache->uc_freebucket; 1744 cache->uc_freebucket = NULL; 1745 1746 /* Can we throw this on the zone full list? */ 1747 if (bucket != NULL) { 1748 #ifdef UMA_DEBUG_ALLOC 1749 printf("uma_zfree: Putting old bucket on the free list.\n"); 1750 #endif 1751 /* ub_ptr is pointing to the last free item */ 1752 KASSERT(bucket->ub_ptr != -1, 1753 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1754 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1755 bucket, ub_link); 1756 } 1757 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1758 LIST_REMOVE(bucket, ub_link); 1759 ZONE_UNLOCK(zone); 1760 cache->uc_freebucket = bucket; 1761 goto zfree_start; 1762 } 1763 /* We're done with this CPU now */ 1764 CPU_UNLOCK(zone, cpu); 1765 1766 /* And the zone.. */ 1767 ZONE_UNLOCK(zone); 1768 1769 #ifdef UMA_DEBUG_ALLOC 1770 printf("uma_zfree: Allocating new free bucket.\n"); 1771 #endif 1772 bflags = M_NOWAIT; 1773 1774 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1775 bflags |= M_NOVM; 1776 #ifdef INVARIANTS 1777 bflags |= M_ZERO; 1778 #endif 1779 bucket = uma_zalloc_internal(bucketzone, 1780 NULL, bflags); 1781 if (bucket) { 1782 bucket->ub_ptr = -1; 1783 ZONE_LOCK(zone); 1784 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1785 bucket, ub_link); 1786 ZONE_UNLOCK(zone); 1787 goto zfree_restart; 1788 } 1789 1790 /* 1791 * If nothing else caught this, we'll just do an internal free. 1792 */ 1793 1794 zfree_internal: 1795 1796 uma_zfree_internal(zone, item, udata, 0); 1797 1798 return; 1799 1800 } 1801 1802 /* 1803 * Frees an item to an INTERNAL zone or allocates a free bucket 1804 * 1805 * Arguments: 1806 * zone The zone to free to 1807 * item The item we're freeing 1808 * udata User supplied data for the dtor 1809 * skip Skip the dtor, it was done in uma_zfree_arg 1810 */ 1811 1812 static void 1813 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1814 { 1815 uma_slab_t slab; 1816 u_int8_t *mem; 1817 u_int8_t freei; 1818 1819 if (!skip && zone->uz_dtor) 1820 zone->uz_dtor(item, zone->uz_size, udata); 1821 1822 ZONE_LOCK(zone); 1823 1824 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1825 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1826 if (zone->uz_flags & UMA_ZFLAG_HASH) 1827 slab = hash_sfind(&zone->uz_hash, mem); 1828 else { 1829 mem += zone->uz_pgoff; 1830 slab = (uma_slab_t)mem; 1831 } 1832 } else { 1833 slab = (uma_slab_t)udata; 1834 } 1835 1836 /* Do we need to remove from any lists? */ 1837 if (slab->us_freecount+1 == zone->uz_ipers) { 1838 LIST_REMOVE(slab, us_link); 1839 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1840 } else if (slab->us_freecount == 0) { 1841 LIST_REMOVE(slab, us_link); 1842 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1843 } 1844 1845 /* Slab management stuff */ 1846 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1847 / zone->uz_rsize; 1848 1849 #ifdef INVARIANTS 1850 if (!skip) 1851 uma_dbg_free(zone, slab, item); 1852 #endif 1853 1854 slab->us_freelist[freei] = slab->us_firstfree; 1855 slab->us_firstfree = freei; 1856 slab->us_freecount++; 1857 1858 /* Zone statistics */ 1859 zone->uz_free++; 1860 1861 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1862 if (zone->uz_pages < zone->uz_maxpages) 1863 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1864 1865 /* We can handle one more allocation */ 1866 wakeup_one(zone); 1867 } 1868 1869 ZONE_UNLOCK(zone); 1870 } 1871 1872 /* See uma.h */ 1873 void 1874 uma_zone_set_max(uma_zone_t zone, int nitems) 1875 { 1876 ZONE_LOCK(zone); 1877 if (zone->uz_ppera > 1) 1878 zone->uz_maxpages = nitems * zone->uz_ppera; 1879 else 1880 zone->uz_maxpages = nitems / zone->uz_ipers; 1881 1882 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1883 zone->uz_maxpages++; 1884 1885 ZONE_UNLOCK(zone); 1886 } 1887 1888 /* See uma.h */ 1889 void 1890 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1891 { 1892 ZONE_LOCK(zone); 1893 1894 zone->uz_freef = freef; 1895 1896 ZONE_UNLOCK(zone); 1897 } 1898 1899 /* See uma.h */ 1900 void 1901 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1902 { 1903 ZONE_LOCK(zone); 1904 1905 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1906 zone->uz_allocf = allocf; 1907 1908 ZONE_UNLOCK(zone); 1909 } 1910 1911 /* See uma.h */ 1912 int 1913 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1914 { 1915 int pages; 1916 vm_offset_t kva; 1917 1918 mtx_lock(&Giant); 1919 1920 pages = count / zone->uz_ipers; 1921 1922 if (pages * zone->uz_ipers < count) 1923 pages++; 1924 1925 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1926 1927 if (kva == 0) { 1928 mtx_unlock(&Giant); 1929 return (0); 1930 } 1931 1932 1933 if (obj == NULL) 1934 obj = vm_object_allocate(OBJT_DEFAULT, 1935 pages); 1936 else 1937 _vm_object_allocate(OBJT_DEFAULT, 1938 pages, obj); 1939 1940 ZONE_LOCK(zone); 1941 zone->uz_kva = kva; 1942 zone->uz_obj = obj; 1943 zone->uz_maxpages = pages; 1944 1945 zone->uz_allocf = obj_alloc; 1946 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1947 1948 ZONE_UNLOCK(zone); 1949 mtx_unlock(&Giant); 1950 1951 return (1); 1952 } 1953 1954 /* See uma.h */ 1955 void 1956 uma_prealloc(uma_zone_t zone, int items) 1957 { 1958 int slabs; 1959 uma_slab_t slab; 1960 1961 ZONE_LOCK(zone); 1962 slabs = items / zone->uz_ipers; 1963 if (slabs * zone->uz_ipers < items) 1964 slabs++; 1965 1966 while (slabs > 0) { 1967 slab = slab_zalloc(zone, M_WAITOK); 1968 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1969 slabs--; 1970 } 1971 ZONE_UNLOCK(zone); 1972 } 1973 1974 /* See uma.h */ 1975 void 1976 uma_reclaim(void) 1977 { 1978 /* 1979 * You might think that the delay below would improve performance since 1980 * the allocator will give away memory that it may ask for immediately. 1981 * Really, it makes things worse, since cpu cycles are so much cheaper 1982 * than disk activity. 1983 */ 1984 #if 0 1985 static struct timeval tv = {0}; 1986 struct timeval now; 1987 getmicrouptime(&now); 1988 if (now.tv_sec > tv.tv_sec + 30) 1989 tv = now; 1990 else 1991 return; 1992 #endif 1993 #ifdef UMA_DEBUG 1994 printf("UMA: vm asked us to release pages!\n"); 1995 #endif 1996 bucket_enable(); 1997 zone_foreach(zone_drain); 1998 1999 /* 2000 * Some slabs may have been freed but this zone will be visited early 2001 * we visit again so that we can free pages that are empty once other 2002 * zones are drained. We have to do the same for buckets. 2003 */ 2004 zone_drain(slabzone); 2005 zone_drain(bucketzone); 2006 } 2007 2008 void * 2009 uma_large_malloc(int size, int wait) 2010 { 2011 void *mem; 2012 uma_slab_t slab; 2013 u_int8_t flags; 2014 2015 slab = uma_zalloc_internal(slabzone, NULL, wait); 2016 if (slab == NULL) 2017 return (NULL); 2018 2019 mem = page_alloc(NULL, size, &flags, wait); 2020 if (mem) { 2021 vsetslab((vm_offset_t)mem, slab); 2022 slab->us_data = mem; 2023 slab->us_flags = flags | UMA_SLAB_MALLOC; 2024 slab->us_size = size; 2025 } else { 2026 uma_zfree_internal(slabzone, slab, NULL, 0); 2027 } 2028 2029 2030 return (mem); 2031 } 2032 2033 void 2034 uma_large_free(uma_slab_t slab) 2035 { 2036 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2037 page_free(slab->us_data, slab->us_size, slab->us_flags); 2038 uma_zfree_internal(slabzone, slab, NULL, 0); 2039 } 2040 2041 void 2042 uma_print_stats(void) 2043 { 2044 zone_foreach(uma_print_zone); 2045 } 2046 2047 void 2048 uma_print_zone(uma_zone_t zone) 2049 { 2050 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2051 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2052 zone->uz_ipers, zone->uz_ppera, 2053 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2054 } 2055 2056 /* 2057 * Sysctl handler for vm.zone 2058 * 2059 * stolen from vm_zone.c 2060 */ 2061 static int 2062 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2063 { 2064 int error, len, cnt; 2065 const int linesize = 128; /* conservative */ 2066 int totalfree; 2067 char *tmpbuf, *offset; 2068 uma_zone_t z; 2069 char *p; 2070 2071 cnt = 0; 2072 mtx_lock(&uma_mtx); 2073 LIST_FOREACH(z, &uma_zones, uz_link) 2074 cnt++; 2075 mtx_unlock(&uma_mtx); 2076 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2077 M_TEMP, M_WAITOK); 2078 len = snprintf(tmpbuf, linesize, 2079 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2080 if (cnt == 0) 2081 tmpbuf[len - 1] = '\0'; 2082 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2083 if (error || cnt == 0) 2084 goto out; 2085 offset = tmpbuf; 2086 mtx_lock(&uma_mtx); 2087 LIST_FOREACH(z, &uma_zones, uz_link) { 2088 if (cnt == 0) /* list may have changed size */ 2089 break; 2090 ZONE_LOCK(z); 2091 totalfree = z->uz_free + z->uz_cachefree; 2092 len = snprintf(offset, linesize, 2093 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2094 z->uz_name, z->uz_size, 2095 z->uz_maxpages * z->uz_ipers, 2096 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2097 totalfree, 2098 (unsigned long long)z->uz_allocs); 2099 ZONE_UNLOCK(z); 2100 for (p = offset + 12; p > offset && *p == ' '; --p) 2101 /* nothing */ ; 2102 p[1] = ':'; 2103 cnt--; 2104 offset += len; 2105 } 2106 mtx_unlock(&uma_mtx); 2107 *offset++ = '\0'; 2108 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2109 out: 2110 FREE(tmpbuf, M_TEMP); 2111 return (error); 2112 } 2113