1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 */ 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 59 #include "opt_param.h" 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/types.h> 64 #include <sys/queue.h> 65 #include <sys/malloc.h> 66 #include <sys/lock.h> 67 #include <sys/sysctl.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/smp.h> 71 #include <sys/vmmeter.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 #include <vm/uma_dbg.h> 83 84 /* 85 * This is the zone from which all zones are spawned. The idea is that even 86 * the zone heads are allocated from the allocator, so we use the bss section 87 * to bootstrap us. 88 */ 89 static struct uma_zone masterzone; 90 static uma_zone_t zones = &masterzone; 91 92 /* This is the zone from which all of uma_slab_t's are allocated. */ 93 static uma_zone_t slabzone; 94 95 /* 96 * The initial hash tables come out of this zone so they can be allocated 97 * prior to malloc coming up. 98 */ 99 static uma_zone_t hashzone; 100 101 /* 102 * Zone that buckets come from. 103 */ 104 static uma_zone_t bucketzone; 105 106 /* 107 * Are we allowed to allocate buckets? 108 */ 109 static int bucketdisable = 1; 110 111 /* Linked list of all zones in the system */ 112 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 113 114 /* This mutex protects the zone list */ 115 static struct mtx uma_mtx; 116 117 /* Linked list of boot time pages */ 118 static LIST_HEAD(,uma_slab) uma_boot_pages = 119 LIST_HEAD_INITIALIZER(&uma_boot_pages); 120 121 /* Count of free boottime pages */ 122 static int uma_boot_free = 0; 123 124 /* Is the VM done starting up? */ 125 static int booted = 0; 126 127 /* This is the handle used to schedule our working set calculator */ 128 static struct callout uma_callout; 129 130 /* This is mp_maxid + 1, for use while looping over each cpu */ 131 static int maxcpu; 132 133 /* 134 * This structure is passed as the zone ctor arg so that I don't have to create 135 * a special allocation function just for zones. 136 */ 137 struct uma_zctor_args { 138 char *name; 139 size_t size; 140 uma_ctor ctor; 141 uma_dtor dtor; 142 uma_init uminit; 143 uma_fini fini; 144 int align; 145 u_int16_t flags; 146 }; 147 148 /* 149 * This is the malloc hash table which is used to find the zone that a 150 * malloc allocation came from. It is not currently resizeable. The 151 * memory for the actual hash bucket is allocated in kmeminit. 152 */ 153 struct uma_hash mhash; 154 struct uma_hash *mallochash = &mhash; 155 156 /* Prototypes.. */ 157 158 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 159 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 160 static void page_free(void *, int, u_int8_t); 161 static uma_slab_t slab_zalloc(uma_zone_t, int); 162 static void cache_drain(uma_zone_t); 163 static void bucket_drain(uma_zone_t, uma_bucket_t); 164 static void zone_drain(uma_zone_t); 165 static void zone_ctor(void *, int, void *); 166 static void zone_dtor(void *, int, void *); 167 static void zero_init(void *, int); 168 static void zone_small_init(uma_zone_t zone); 169 static void zone_large_init(uma_zone_t zone); 170 static void zone_foreach(void (*zfunc)(uma_zone_t)); 171 static void zone_timeout(uma_zone_t zone); 172 static int hash_alloc(struct uma_hash *); 173 static int hash_expand(struct uma_hash *, struct uma_hash *); 174 static void hash_free(struct uma_hash *hash); 175 static void uma_timeout(void *); 176 static void uma_startup3(void); 177 static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t); 178 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 179 static void bucket_enable(void); 180 void uma_print_zone(uma_zone_t); 181 void uma_print_stats(void); 182 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 183 184 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 185 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 186 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 187 188 /* 189 * This routine checks to see whether or not it's safe to enable buckets. 190 */ 191 192 static void 193 bucket_enable(void) 194 { 195 if (cnt.v_free_count < cnt.v_free_min) 196 bucketdisable = 1; 197 else 198 bucketdisable = 0; 199 } 200 201 202 /* 203 * Routine called by timeout which is used to fire off some time interval 204 * based calculations. (working set, stats, etc.) 205 * 206 * Arguments: 207 * arg Unused 208 * 209 * Returns: 210 * Nothing 211 */ 212 static void 213 uma_timeout(void *unused) 214 { 215 bucket_enable(); 216 zone_foreach(zone_timeout); 217 218 /* Reschedule this event */ 219 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 220 } 221 222 /* 223 * Routine to perform timeout driven calculations. This does the working set 224 * as well as hash expanding, and per cpu statistics aggregation. 225 * 226 * Arguments: 227 * zone The zone to operate on 228 * 229 * Returns: 230 * Nothing 231 */ 232 static void 233 zone_timeout(uma_zone_t zone) 234 { 235 uma_cache_t cache; 236 u_int64_t alloc; 237 int free; 238 int cpu; 239 240 alloc = 0; 241 free = 0; 242 243 /* 244 * Aggregate per cpu cache statistics back to the zone. 245 * 246 * I may rewrite this to set a flag in the per cpu cache instead of 247 * locking. If the flag is not cleared on the next round I will have 248 * to lock and do it here instead so that the statistics don't get too 249 * far out of sync. 250 */ 251 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 252 for (cpu = 0; cpu < maxcpu; cpu++) { 253 if (CPU_ABSENT(cpu)) 254 continue; 255 CPU_LOCK(zone, cpu); 256 cache = &zone->uz_cpu[cpu]; 257 /* Add them up, and reset */ 258 alloc += cache->uc_allocs; 259 cache->uc_allocs = 0; 260 if (cache->uc_allocbucket) 261 free += cache->uc_allocbucket->ub_ptr + 1; 262 if (cache->uc_freebucket) 263 free += cache->uc_freebucket->ub_ptr + 1; 264 CPU_UNLOCK(zone, cpu); 265 } 266 } 267 268 /* Now push these stats back into the zone.. */ 269 ZONE_LOCK(zone); 270 zone->uz_allocs += alloc; 271 272 /* 273 * cachefree is an instantanious snapshot of what is in the per cpu 274 * caches, not an accurate counter 275 */ 276 zone->uz_cachefree = free; 277 278 /* 279 * Expand the zone hash table. 280 * 281 * This is done if the number of slabs is larger than the hash size. 282 * What I'm trying to do here is completely reduce collisions. This 283 * may be a little aggressive. Should I allow for two collisions max? 284 */ 285 286 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 287 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 288 if (zone->uz_pages / zone->uz_ppera 289 >= zone->uz_hash.uh_hashsize) { 290 struct uma_hash newhash; 291 struct uma_hash oldhash; 292 int ret; 293 294 /* 295 * This is so involved because allocating and freeing 296 * while the zone lock is held will lead to deadlock. 297 * I have to do everything in stages and check for 298 * races. 299 */ 300 newhash = zone->uz_hash; 301 ZONE_UNLOCK(zone); 302 ret = hash_alloc(&newhash); 303 ZONE_LOCK(zone); 304 if (ret) { 305 if (hash_expand(&zone->uz_hash, &newhash)) { 306 oldhash = zone->uz_hash; 307 zone->uz_hash = newhash; 308 } else 309 oldhash = newhash; 310 311 ZONE_UNLOCK(zone); 312 hash_free(&oldhash); 313 ZONE_LOCK(zone); 314 } 315 } 316 } 317 318 /* 319 * Here we compute the working set size as the total number of items 320 * left outstanding since the last time interval. This is slightly 321 * suboptimal. What we really want is the highest number of outstanding 322 * items during the last time quantum. This should be close enough. 323 * 324 * The working set size is used to throttle the zone_drain function. 325 * We don't want to return memory that we may need again immediately. 326 */ 327 alloc = zone->uz_allocs - zone->uz_oallocs; 328 zone->uz_oallocs = zone->uz_allocs; 329 zone->uz_wssize = alloc; 330 331 ZONE_UNLOCK(zone); 332 } 333 334 /* 335 * Allocate and zero fill the next sized hash table from the appropriate 336 * backing store. 337 * 338 * Arguments: 339 * hash A new hash structure with the old hash size in uh_hashsize 340 * 341 * Returns: 342 * 1 on sucess and 0 on failure. 343 */ 344 int 345 hash_alloc(struct uma_hash *hash) 346 { 347 int oldsize; 348 int alloc; 349 350 oldsize = hash->uh_hashsize; 351 352 /* We're just going to go to a power of two greater */ 353 if (oldsize) { 354 hash->uh_hashsize = oldsize * 2; 355 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 356 /* XXX Shouldn't be abusing DEVBUF here */ 357 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 358 M_DEVBUF, M_NOWAIT); 359 } else { 360 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 361 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 362 M_WAITOK, NULL); 363 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 364 } 365 if (hash->uh_slab_hash) { 366 bzero(hash->uh_slab_hash, alloc); 367 hash->uh_hashmask = hash->uh_hashsize - 1; 368 return (1); 369 } 370 371 return (0); 372 } 373 374 /* 375 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 376 * to reduce collisions. This must not be done in the regular allocation path, 377 * otherwise, we can recurse on the vm while allocating pages. 378 * 379 * Arguments: 380 * oldhash The hash you want to expand 381 * newhash The hash structure for the new table 382 * 383 * Returns: 384 * Nothing 385 * 386 * Discussion: 387 */ 388 static int 389 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 390 { 391 uma_slab_t slab; 392 int hval; 393 int i; 394 395 if (!newhash->uh_slab_hash) 396 return (0); 397 398 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 399 return (0); 400 401 /* 402 * I need to investigate hash algorithms for resizing without a 403 * full rehash. 404 */ 405 406 for (i = 0; i < oldhash->uh_hashsize; i++) 407 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 408 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 409 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 410 hval = UMA_HASH(newhash, slab->us_data); 411 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 412 slab, us_hlink); 413 } 414 415 return (1); 416 } 417 418 /* 419 * Free the hash bucket to the appropriate backing store. 420 * 421 * Arguments: 422 * slab_hash The hash bucket we're freeing 423 * hashsize The number of entries in that hash bucket 424 * 425 * Returns: 426 * Nothing 427 */ 428 static void 429 hash_free(struct uma_hash *hash) 430 { 431 if (hash->uh_slab_hash == NULL) 432 return; 433 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 434 uma_zfree_internal(hashzone, 435 hash->uh_slab_hash, NULL, 0); 436 else 437 free(hash->uh_slab_hash, M_DEVBUF); 438 } 439 440 /* 441 * Frees all outstanding items in a bucket 442 * 443 * Arguments: 444 * zone The zone to free to, must be unlocked. 445 * bucket The free/alloc bucket with items, cpu queue must be locked. 446 * 447 * Returns: 448 * Nothing 449 */ 450 451 static void 452 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 453 { 454 uma_slab_t slab; 455 int mzone; 456 void *item; 457 458 if (bucket == NULL) 459 return; 460 461 slab = NULL; 462 mzone = 0; 463 464 /* We have to lookup the slab again for malloc.. */ 465 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 466 mzone = 1; 467 468 while (bucket->ub_ptr > -1) { 469 item = bucket->ub_bucket[bucket->ub_ptr]; 470 #ifdef INVARIANTS 471 bucket->ub_bucket[bucket->ub_ptr] = NULL; 472 KASSERT(item != NULL, 473 ("bucket_drain: botched ptr, item is NULL")); 474 #endif 475 bucket->ub_ptr--; 476 /* 477 * This is extremely inefficient. The slab pointer was passed 478 * to uma_zfree_arg, but we lost it because the buckets don't 479 * hold them. This will go away when free() gets a size passed 480 * to it. 481 */ 482 if (mzone) { 483 mtx_lock(&malloc_mtx); 484 slab = hash_sfind(mallochash, 485 (u_int8_t *)((unsigned long)item & 486 (~UMA_SLAB_MASK))); 487 mtx_unlock(&malloc_mtx); 488 } 489 uma_zfree_internal(zone, item, slab, 1); 490 } 491 } 492 493 /* 494 * Drains the per cpu caches for a zone. 495 * 496 * Arguments: 497 * zone The zone to drain, must be unlocked. 498 * 499 * Returns: 500 * Nothing 501 * 502 * This function returns with the zone locked so that the per cpu queues can 503 * not be filled until zone_drain is finished. 504 * 505 */ 506 static void 507 cache_drain(uma_zone_t zone) 508 { 509 uma_bucket_t bucket; 510 uma_cache_t cache; 511 int cpu; 512 513 /* 514 * Flush out the per cpu queues. 515 * 516 * XXX This causes unnecessary thrashing due to immediately having 517 * empty per cpu queues. I need to improve this. 518 */ 519 520 /* 521 * We have to lock each cpu cache before locking the zone 522 */ 523 ZONE_UNLOCK(zone); 524 525 for (cpu = 0; cpu < maxcpu; cpu++) { 526 if (CPU_ABSENT(cpu)) 527 continue; 528 CPU_LOCK(zone, cpu); 529 cache = &zone->uz_cpu[cpu]; 530 bucket_drain(zone, cache->uc_allocbucket); 531 bucket_drain(zone, cache->uc_freebucket); 532 } 533 534 /* 535 * Drain the bucket queues and free the buckets, we just keep two per 536 * cpu (alloc/free). 537 */ 538 ZONE_LOCK(zone); 539 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 540 LIST_REMOVE(bucket, ub_link); 541 ZONE_UNLOCK(zone); 542 bucket_drain(zone, bucket); 543 uma_zfree_internal(bucketzone, bucket, NULL, 0); 544 ZONE_LOCK(zone); 545 } 546 547 /* Now we do the free queue.. */ 548 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 549 LIST_REMOVE(bucket, ub_link); 550 uma_zfree_internal(bucketzone, bucket, NULL, 0); 551 } 552 553 /* We unlock here, but they will all block until the zone is unlocked */ 554 for (cpu = 0; cpu < maxcpu; cpu++) { 555 if (CPU_ABSENT(cpu)) 556 continue; 557 CPU_UNLOCK(zone, cpu); 558 } 559 560 zone->uz_cachefree = 0; 561 } 562 563 /* 564 * Frees pages from a zone back to the system. This is done on demand from 565 * the pageout daemon. 566 * 567 * Arguments: 568 * zone The zone to free pages from 569 * all Should we drain all items? 570 * 571 * Returns: 572 * Nothing. 573 */ 574 static void 575 zone_drain(uma_zone_t zone) 576 { 577 struct slabhead freeslabs = {}; 578 uma_slab_t slab; 579 uma_slab_t n; 580 u_int64_t extra; 581 u_int8_t flags; 582 u_int8_t *mem; 583 int i; 584 585 /* 586 * We don't want to take pages from staticly allocated zones at this 587 * time 588 */ 589 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 590 return; 591 592 ZONE_LOCK(zone); 593 594 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 595 cache_drain(zone); 596 597 if (zone->uz_free < zone->uz_wssize) 598 goto finished; 599 #ifdef UMA_DEBUG 600 printf("%s working set size: %llu free items: %u\n", 601 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 602 #endif 603 extra = zone->uz_free - zone->uz_wssize; 604 extra /= zone->uz_ipers; 605 606 /* extra is now the number of extra slabs that we can free */ 607 608 if (extra == 0) 609 goto finished; 610 611 slab = LIST_FIRST(&zone->uz_free_slab); 612 while (slab && extra) { 613 n = LIST_NEXT(slab, us_link); 614 615 /* We have no where to free these to */ 616 if (slab->us_flags & UMA_SLAB_BOOT) { 617 slab = n; 618 continue; 619 } 620 621 LIST_REMOVE(slab, us_link); 622 zone->uz_pages -= zone->uz_ppera; 623 zone->uz_free -= zone->uz_ipers; 624 625 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 626 mtx_lock(&malloc_mtx); 627 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 628 mtx_unlock(&malloc_mtx); 629 } 630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE && 631 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) 632 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 633 634 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 635 636 slab = n; 637 extra--; 638 } 639 finished: 640 ZONE_UNLOCK(zone); 641 642 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 643 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 644 if (zone->uz_fini) 645 for (i = 0; i < zone->uz_ipers; i++) 646 zone->uz_fini( 647 slab->us_data + (zone->uz_rsize * i), 648 zone->uz_size); 649 flags = slab->us_flags; 650 mem = slab->us_data; 651 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 652 uma_zfree_internal(slabzone, slab, NULL, 0); 653 } 654 #ifdef UMA_DEBUG 655 printf("%s: Returning %d bytes.\n", 656 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 657 #endif 658 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 659 } 660 661 } 662 663 /* 664 * Allocate a new slab for a zone. This does not insert the slab onto a list. 665 * 666 * Arguments: 667 * zone The zone to allocate slabs for 668 * wait Shall we wait? 669 * 670 * Returns: 671 * The slab that was allocated or NULL if there is no memory and the 672 * caller specified M_NOWAIT. 673 * 674 */ 675 static uma_slab_t 676 slab_zalloc(uma_zone_t zone, int wait) 677 { 678 uma_slab_t slab; /* Starting slab */ 679 u_int8_t *mem; 680 u_int8_t flags; 681 int i; 682 683 slab = NULL; 684 685 #ifdef UMA_DEBUG 686 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 687 #endif 688 ZONE_UNLOCK(zone); 689 690 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 691 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 692 if (slab == NULL) { 693 ZONE_LOCK(zone); 694 return NULL; 695 } 696 } 697 698 /* 699 * This reproduces the old vm_zone behavior of zero filling pages the 700 * first time they are added to a zone. 701 * 702 * Malloced items are zeroed in uma_zalloc. 703 */ 704 705 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 706 wait |= M_ZERO; 707 else 708 wait &= ~M_ZERO; 709 710 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 711 mtx_lock(&Giant); 712 mem = zone->uz_allocf(zone, 713 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 714 mtx_unlock(&Giant); 715 if (mem == NULL) { 716 ZONE_LOCK(zone); 717 return (NULL); 718 } 719 } else { 720 uma_slab_t tmps; 721 722 if (zone->uz_ppera > 1) 723 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 724 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 725 panic("Mallocing before uma_startup2 has been called.\n"); 726 if (uma_boot_free == 0) 727 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 728 tmps = LIST_FIRST(&uma_boot_pages); 729 LIST_REMOVE(tmps, us_link); 730 uma_boot_free--; 731 mem = tmps->us_data; 732 } 733 734 /* Point the slab into the allocated memory */ 735 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 736 slab = (uma_slab_t )(mem + zone->uz_pgoff); 737 } 738 739 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 740 #ifdef UMA_DEBUG 741 printf("Inserting %p into malloc hash from slab %p\n", 742 mem, slab); 743 #endif 744 mtx_lock(&malloc_mtx); 745 UMA_HASH_INSERT(mallochash, slab, mem); 746 mtx_unlock(&malloc_mtx); 747 } 748 749 slab->us_zone = zone; 750 slab->us_data = mem; 751 752 /* 753 * This is intended to spread data out across cache lines. 754 * 755 * This code doesn't seem to work properly on x86, and on alpha 756 * it makes absolutely no performance difference. I'm sure it could 757 * use some tuning, but sun makes outrageous claims about it's 758 * performance. 759 */ 760 #if 0 761 if (zone->uz_cachemax) { 762 slab->us_data += zone->uz_cacheoff; 763 zone->uz_cacheoff += UMA_CACHE_INC; 764 if (zone->uz_cacheoff > zone->uz_cachemax) 765 zone->uz_cacheoff = 0; 766 } 767 #endif 768 769 slab->us_freecount = zone->uz_ipers; 770 slab->us_firstfree = 0; 771 slab->us_flags = flags; 772 for (i = 0; i < zone->uz_ipers; i++) 773 slab->us_freelist[i] = i+1; 774 775 if (zone->uz_init) 776 for (i = 0; i < zone->uz_ipers; i++) 777 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 778 zone->uz_size); 779 ZONE_LOCK(zone); 780 781 if ((zone->uz_flags & (UMA_ZFLAG_OFFPAGE|UMA_ZFLAG_MALLOC)) == 782 UMA_ZFLAG_OFFPAGE) 783 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 784 785 zone->uz_pages += zone->uz_ppera; 786 zone->uz_free += zone->uz_ipers; 787 788 789 return (slab); 790 } 791 792 /* 793 * Allocates a number of pages from the system 794 * 795 * Arguments: 796 * zone Unused 797 * bytes The number of bytes requested 798 * wait Shall we wait? 799 * 800 * Returns: 801 * A pointer to the alloced memory or possibly 802 * NULL if M_NOWAIT is set. 803 */ 804 static void * 805 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 806 { 807 void *p; /* Returned page */ 808 809 *pflag = UMA_SLAB_KMEM; 810 p = (void *) kmem_malloc(kmem_map, bytes, wait); 811 812 return (p); 813 } 814 815 /* 816 * Allocates a number of pages from within an object 817 * 818 * Arguments: 819 * zone Unused 820 * bytes The number of bytes requested 821 * wait Shall we wait? 822 * 823 * Returns: 824 * A pointer to the alloced memory or possibly 825 * NULL if M_NOWAIT is set. 826 * 827 * TODO: If we fail during a multi-page allocation release the pages that have 828 * already been allocated. 829 */ 830 static void * 831 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 832 { 833 vm_offset_t zkva; 834 vm_offset_t retkva; 835 vm_page_t p; 836 int pages; 837 838 retkva = 0; 839 pages = zone->uz_pages; 840 841 /* 842 * This looks a little weird since we're getting one page at a time 843 */ 844 while (bytes > 0) { 845 p = vm_page_alloc(zone->uz_obj, pages, 846 VM_ALLOC_INTERRUPT); 847 if (p == NULL) 848 return (NULL); 849 850 zkva = zone->uz_kva + pages * PAGE_SIZE; 851 if (retkva == 0) 852 retkva = zkva; 853 pmap_qenter(zkva, &p, 1); 854 bytes -= PAGE_SIZE; 855 pages += 1; 856 } 857 858 *flags = UMA_SLAB_PRIV; 859 860 return ((void *)retkva); 861 } 862 863 /* 864 * Frees a number of pages to the system 865 * 866 * Arguments: 867 * mem A pointer to the memory to be freed 868 * size The size of the memory being freed 869 * flags The original p->us_flags field 870 * 871 * Returns: 872 * Nothing 873 * 874 */ 875 static void 876 page_free(void *mem, int size, u_int8_t flags) 877 { 878 vm_map_t map; 879 880 if (flags & UMA_SLAB_KMEM) 881 map = kmem_map; 882 else 883 panic("UMA: page_free used with invalid flags %d\n", flags); 884 885 kmem_free(map, (vm_offset_t)mem, size); 886 } 887 888 /* 889 * Zero fill initializer 890 * 891 * Arguments/Returns follow uma_init specifications 892 * 893 */ 894 static void 895 zero_init(void *mem, int size) 896 { 897 bzero(mem, size); 898 } 899 900 /* 901 * Finish creating a small uma zone. This calculates ipers, and the zone size. 902 * 903 * Arguments 904 * zone The zone we should initialize 905 * 906 * Returns 907 * Nothing 908 */ 909 static void 910 zone_small_init(uma_zone_t zone) 911 { 912 int rsize; 913 int memused; 914 int ipers; 915 916 rsize = zone->uz_size; 917 918 if (rsize < UMA_SMALLEST_UNIT) 919 rsize = UMA_SMALLEST_UNIT; 920 921 if (rsize & zone->uz_align) 922 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 923 924 zone->uz_rsize = rsize; 925 926 rsize += 1; /* Account for the byte of linkage */ 927 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 928 zone->uz_ppera = 1; 929 930 memused = zone->uz_ipers * zone->uz_rsize; 931 932 /* Can we do any better? */ 933 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 934 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 935 return; 936 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 937 if (ipers > zone->uz_ipers) { 938 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 939 zone->uz_ipers = ipers; 940 } 941 } 942 943 } 944 945 /* 946 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 947 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 948 * more complicated. 949 * 950 * Arguments 951 * zone The zone we should initialize 952 * 953 * Returns 954 * Nothing 955 */ 956 static void 957 zone_large_init(uma_zone_t zone) 958 { 959 int pages; 960 961 pages = zone->uz_size / UMA_SLAB_SIZE; 962 963 /* Account for remainder */ 964 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 965 pages++; 966 967 zone->uz_ppera = pages; 968 zone->uz_ipers = 1; 969 970 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 971 zone->uz_rsize = zone->uz_size; 972 } 973 974 /* 975 * Zone header ctor. This initializes all fields, locks, etc. And inserts 976 * the zone onto the global zone list. 977 * 978 * Arguments/Returns follow uma_ctor specifications 979 * udata Actually uma_zcreat_args 980 * 981 */ 982 983 static void 984 zone_ctor(void *mem, int size, void *udata) 985 { 986 struct uma_zctor_args *arg = udata; 987 uma_zone_t zone = mem; 988 int privlc; 989 int cplen; 990 int cpu; 991 992 bzero(zone, size); 993 zone->uz_name = arg->name; 994 zone->uz_size = arg->size; 995 zone->uz_ctor = arg->ctor; 996 zone->uz_dtor = arg->dtor; 997 zone->uz_init = arg->uminit; 998 zone->uz_fini = arg->fini; 999 zone->uz_align = arg->align; 1000 zone->uz_free = 0; 1001 zone->uz_pages = 0; 1002 zone->uz_flags = 0; 1003 zone->uz_allocf = page_alloc; 1004 zone->uz_freef = page_free; 1005 1006 if (arg->flags & UMA_ZONE_ZINIT) 1007 zone->uz_init = zero_init; 1008 1009 if (arg->flags & UMA_ZONE_INTERNAL) 1010 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 1011 1012 if (arg->flags & UMA_ZONE_MALLOC) 1013 zone->uz_flags |= UMA_ZFLAG_MALLOC; 1014 1015 if (arg->flags & UMA_ZONE_NOFREE) 1016 zone->uz_flags |= UMA_ZFLAG_NOFREE; 1017 1018 if (arg->flags & UMA_ZONE_VM) 1019 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE; 1020 1021 if (zone->uz_size > UMA_SLAB_SIZE) 1022 zone_large_init(zone); 1023 else 1024 zone_small_init(zone); 1025 1026 if (arg->flags & UMA_ZONE_MTXCLASS) 1027 privlc = 1; 1028 else 1029 privlc = 0; 1030 1031 /* We do this so that the per cpu lock name is unique for each zone */ 1032 memcpy(zone->uz_lname, "PCPU ", 5); 1033 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1034 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1035 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1036 1037 /* 1038 * If we're putting the slab header in the actual page we need to 1039 * figure out where in each page it goes. This calculates a right 1040 * justified offset into the memory on a ALIGN_PTR boundary. 1041 */ 1042 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1043 int totsize; 1044 int waste; 1045 1046 /* Size of the slab struct and free list */ 1047 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1048 if (totsize & UMA_ALIGN_PTR) 1049 totsize = (totsize & ~UMA_ALIGN_PTR) + 1050 (UMA_ALIGN_PTR + 1); 1051 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1052 1053 waste = zone->uz_pgoff; 1054 waste -= (zone->uz_ipers * zone->uz_rsize); 1055 1056 /* 1057 * This calculates how much space we have for cache line size 1058 * optimizations. It works by offseting each slab slightly. 1059 * Currently it breaks on x86, and so it is disabled. 1060 */ 1061 1062 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1063 zone->uz_cachemax = waste - UMA_CACHE_INC; 1064 zone->uz_cacheoff = 0; 1065 } 1066 1067 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1068 + zone->uz_ipers; 1069 /* I don't think it's possible, but I'll make sure anyway */ 1070 if (totsize > UMA_SLAB_SIZE) { 1071 printf("zone %s ipers %d rsize %d size %d\n", 1072 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1073 zone->uz_size); 1074 panic("UMA slab won't fit.\n"); 1075 } 1076 } else { 1077 hash_alloc(&zone->uz_hash); 1078 zone->uz_pgoff = 0; 1079 } 1080 1081 #ifdef UMA_DEBUG 1082 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1083 zone->uz_name, zone, 1084 zone->uz_size, zone->uz_ipers, 1085 zone->uz_ppera, zone->uz_pgoff); 1086 #endif 1087 ZONE_LOCK_INIT(zone, privlc); 1088 1089 mtx_lock(&uma_mtx); 1090 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1091 mtx_unlock(&uma_mtx); 1092 1093 /* 1094 * Some internal zones don't have room allocated for the per cpu 1095 * caches. If we're internal, bail out here. 1096 */ 1097 1098 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1099 return; 1100 1101 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1102 zone->uz_count = zone->uz_ipers - 1; 1103 else 1104 zone->uz_count = UMA_BUCKET_SIZE - 1; 1105 1106 for (cpu = 0; cpu < maxcpu; cpu++) 1107 CPU_LOCK_INIT(zone, cpu, privlc); 1108 } 1109 1110 /* 1111 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1112 * and removes the zone from the global list. 1113 * 1114 * Arguments/Returns follow uma_dtor specifications 1115 * udata unused 1116 */ 1117 1118 static void 1119 zone_dtor(void *arg, int size, void *udata) 1120 { 1121 uma_zone_t zone; 1122 int cpu; 1123 1124 zone = (uma_zone_t)arg; 1125 1126 ZONE_LOCK(zone); 1127 zone->uz_wssize = 0; 1128 ZONE_UNLOCK(zone); 1129 1130 mtx_lock(&uma_mtx); 1131 LIST_REMOVE(zone, uz_link); 1132 zone_drain(zone); 1133 mtx_unlock(&uma_mtx); 1134 1135 ZONE_LOCK(zone); 1136 if (zone->uz_free != 0) 1137 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1138 zone->uz_name, zone->uz_pages); 1139 1140 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1141 for (cpu = 0; cpu < maxcpu; cpu++) 1142 CPU_LOCK_FINI(zone, cpu); 1143 1144 ZONE_UNLOCK(zone); 1145 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1146 hash_free(&zone->uz_hash); 1147 1148 ZONE_LOCK_FINI(zone); 1149 } 1150 /* 1151 * Traverses every zone in the system and calls a callback 1152 * 1153 * Arguments: 1154 * zfunc A pointer to a function which accepts a zone 1155 * as an argument. 1156 * 1157 * Returns: 1158 * Nothing 1159 */ 1160 static void 1161 zone_foreach(void (*zfunc)(uma_zone_t)) 1162 { 1163 uma_zone_t zone; 1164 1165 mtx_lock(&uma_mtx); 1166 LIST_FOREACH(zone, &uma_zones, uz_link) { 1167 zfunc(zone); 1168 } 1169 mtx_unlock(&uma_mtx); 1170 } 1171 1172 /* Public functions */ 1173 /* See uma.h */ 1174 void 1175 uma_startup(void *bootmem) 1176 { 1177 struct uma_zctor_args args; 1178 uma_slab_t slab; 1179 int slabsize; 1180 int i; 1181 1182 #ifdef UMA_DEBUG 1183 printf("Creating uma zone headers zone.\n"); 1184 #endif 1185 #ifdef SMP 1186 maxcpu = mp_maxid + 1; 1187 #else 1188 maxcpu = 1; 1189 #endif 1190 #ifdef UMA_DEBUG 1191 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1192 Debugger("stop"); 1193 #endif 1194 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1195 /* "manually" Create the initial zone */ 1196 args.name = "UMA Zones"; 1197 args.size = sizeof(struct uma_zone) + 1198 (sizeof(struct uma_cache) * (maxcpu - 1)); 1199 args.ctor = zone_ctor; 1200 args.dtor = zone_dtor; 1201 args.uminit = zero_init; 1202 args.fini = NULL; 1203 args.align = 32 - 1; 1204 args.flags = UMA_ZONE_INTERNAL; 1205 /* The initial zone has no Per cpu queues so it's smaller */ 1206 zone_ctor(zones, sizeof(struct uma_zone), &args); 1207 1208 #ifdef UMA_DEBUG 1209 printf("Filling boot free list.\n"); 1210 #endif 1211 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1212 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1213 slab->us_data = (u_int8_t *)slab; 1214 slab->us_flags = UMA_SLAB_BOOT; 1215 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1216 uma_boot_free++; 1217 } 1218 1219 #ifdef UMA_DEBUG 1220 printf("Creating slab zone.\n"); 1221 #endif 1222 1223 /* 1224 * This is the max number of free list items we'll have with 1225 * offpage slabs. 1226 */ 1227 1228 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1229 slabsize /= UMA_MAX_WASTE; 1230 slabsize++; /* In case there it's rounded */ 1231 slabsize += sizeof(struct uma_slab); 1232 1233 /* Now make a zone for slab headers */ 1234 slabzone = uma_zcreate("UMA Slabs", 1235 slabsize, 1236 NULL, NULL, NULL, NULL, 1237 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1238 1239 hashzone = uma_zcreate("UMA Hash", 1240 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1241 NULL, NULL, NULL, NULL, 1242 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1243 1244 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1245 NULL, NULL, NULL, NULL, 1246 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1247 1248 1249 #ifdef UMA_DEBUG 1250 printf("UMA startup complete.\n"); 1251 #endif 1252 } 1253 1254 /* see uma.h */ 1255 void 1256 uma_startup2(void *hashmem, u_long elems) 1257 { 1258 bzero(hashmem, elems * sizeof(void *)); 1259 mallochash->uh_slab_hash = hashmem; 1260 mallochash->uh_hashsize = elems; 1261 mallochash->uh_hashmask = elems - 1; 1262 booted = 1; 1263 bucket_enable(); 1264 #ifdef UMA_DEBUG 1265 printf("UMA startup2 complete.\n"); 1266 #endif 1267 } 1268 1269 /* 1270 * Initialize our callout handle 1271 * 1272 */ 1273 1274 static void 1275 uma_startup3(void) 1276 { 1277 #ifdef UMA_DEBUG 1278 printf("Starting callout.\n"); 1279 #endif 1280 callout_init(&uma_callout, 0); 1281 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1282 #ifdef UMA_DEBUG 1283 printf("UMA startup3 complete.\n"); 1284 #endif 1285 } 1286 1287 /* See uma.h */ 1288 uma_zone_t 1289 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1290 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1291 1292 { 1293 struct uma_zctor_args args; 1294 1295 /* This stuff is essential for the zone ctor */ 1296 args.name = name; 1297 args.size = size; 1298 args.ctor = ctor; 1299 args.dtor = dtor; 1300 args.uminit = uminit; 1301 args.fini = fini; 1302 args.align = align; 1303 args.flags = flags; 1304 1305 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1306 } 1307 1308 /* See uma.h */ 1309 void 1310 uma_zdestroy(uma_zone_t zone) 1311 { 1312 uma_zfree_internal(zones, zone, NULL, 0); 1313 } 1314 1315 /* See uma.h */ 1316 void * 1317 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1318 { 1319 void *item; 1320 uma_cache_t cache; 1321 uma_bucket_t bucket; 1322 int cpu; 1323 1324 /* This is the fast path allocation */ 1325 #ifdef UMA_DEBUG_ALLOC_1 1326 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1327 #endif 1328 1329 if (!(flags & M_NOWAIT)) { 1330 KASSERT(curthread->td_intr_nesting_level == 0, 1331 ("malloc(M_WAITOK) in interrupt context")); 1332 WITNESS_SLEEP(1, NULL); 1333 } 1334 1335 zalloc_restart: 1336 cpu = PCPU_GET(cpuid); 1337 CPU_LOCK(zone, cpu); 1338 cache = &zone->uz_cpu[cpu]; 1339 1340 zalloc_start: 1341 bucket = cache->uc_allocbucket; 1342 1343 if (bucket) { 1344 if (bucket->ub_ptr > -1) { 1345 item = bucket->ub_bucket[bucket->ub_ptr]; 1346 #ifdef INVARIANTS 1347 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1348 #endif 1349 bucket->ub_ptr--; 1350 KASSERT(item != NULL, 1351 ("uma_zalloc: Bucket pointer mangled.")); 1352 cache->uc_allocs++; 1353 #ifdef INVARIANTS 1354 uma_dbg_alloc(zone, NULL, item); 1355 #endif 1356 CPU_UNLOCK(zone, cpu); 1357 if (zone->uz_ctor) 1358 zone->uz_ctor(item, zone->uz_size, udata); 1359 if (flags & M_ZERO) 1360 bzero(item, zone->uz_size); 1361 return (item); 1362 } else if (cache->uc_freebucket) { 1363 /* 1364 * We have run out of items in our allocbucket. 1365 * See if we can switch with our free bucket. 1366 */ 1367 if (cache->uc_freebucket->ub_ptr > -1) { 1368 uma_bucket_t swap; 1369 1370 #ifdef UMA_DEBUG_ALLOC 1371 printf("uma_zalloc: Swapping empty with alloc.\n"); 1372 #endif 1373 swap = cache->uc_freebucket; 1374 cache->uc_freebucket = cache->uc_allocbucket; 1375 cache->uc_allocbucket = swap; 1376 1377 goto zalloc_start; 1378 } 1379 } 1380 } 1381 ZONE_LOCK(zone); 1382 /* Since we have locked the zone we may as well send back our stats */ 1383 zone->uz_allocs += cache->uc_allocs; 1384 cache->uc_allocs = 0; 1385 1386 /* Our old one is now a free bucket */ 1387 if (cache->uc_allocbucket) { 1388 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1389 ("uma_zalloc_arg: Freeing a non free bucket.")); 1390 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1391 cache->uc_allocbucket, ub_link); 1392 cache->uc_allocbucket = NULL; 1393 } 1394 1395 /* Check the free list for a new alloc bucket */ 1396 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1397 KASSERT(bucket->ub_ptr != -1, 1398 ("uma_zalloc_arg: Returning an empty bucket.")); 1399 1400 LIST_REMOVE(bucket, ub_link); 1401 cache->uc_allocbucket = bucket; 1402 ZONE_UNLOCK(zone); 1403 goto zalloc_start; 1404 } 1405 /* Bump up our uz_count so we get here less */ 1406 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1407 zone->uz_count++; 1408 1409 /* We are no longer associated with this cpu!!! */ 1410 CPU_UNLOCK(zone, cpu); 1411 1412 /* 1413 * Now lets just fill a bucket and put it on the free list. If that 1414 * works we'll restart the allocation from the begining. 1415 * 1416 * Try this zone's free list first so we don't allocate extra buckets. 1417 */ 1418 1419 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1420 LIST_REMOVE(bucket, ub_link); 1421 1422 /* Now we no longer need the zone lock. */ 1423 ZONE_UNLOCK(zone); 1424 1425 if (bucket == NULL) { 1426 int bflags; 1427 1428 bflags = flags; 1429 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1430 bflags |= M_NOVM; 1431 1432 bucket = uma_zalloc_internal(bucketzone, 1433 NULL, bflags, NULL); 1434 } 1435 1436 if (bucket != NULL) { 1437 #ifdef INVARIANTS 1438 bzero(bucket, bucketzone->uz_size); 1439 #endif 1440 bucket->ub_ptr = -1; 1441 1442 if (uma_zalloc_internal(zone, udata, flags, bucket)) 1443 goto zalloc_restart; 1444 else 1445 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1446 } 1447 /* 1448 * We may not get a bucket if we recurse, so 1449 * return an actual item. 1450 */ 1451 #ifdef UMA_DEBUG 1452 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1453 #endif 1454 1455 return (uma_zalloc_internal(zone, udata, flags, NULL)); 1456 } 1457 1458 /* 1459 * Allocates an item for an internal zone OR fills a bucket 1460 * 1461 * Arguments 1462 * zone The zone to alloc for. 1463 * udata The data to be passed to the constructor. 1464 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1465 * bucket The bucket to fill or NULL 1466 * 1467 * Returns 1468 * NULL if there is no memory and M_NOWAIT is set 1469 * An item if called on an interal zone 1470 * Non NULL if called to fill a bucket and it was successful. 1471 * 1472 * Discussion: 1473 * This was much cleaner before it had to do per cpu caches. It is 1474 * complicated now because it has to handle the simple internal case, and 1475 * the more involved bucket filling and allocation. 1476 */ 1477 1478 static void * 1479 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket) 1480 { 1481 uma_slab_t slab; 1482 u_int8_t freei; 1483 void *item; 1484 1485 item = NULL; 1486 1487 /* 1488 * This is to stop us from allocating per cpu buckets while we're 1489 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1490 * boot pages. 1491 */ 1492 1493 if (bucketdisable && zone == bucketzone) 1494 return (NULL); 1495 1496 #ifdef UMA_DEBUG_ALLOC 1497 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1498 #endif 1499 ZONE_LOCK(zone); 1500 1501 /* 1502 * This code is here to limit the number of simultaneous bucket fills 1503 * for any given zone to the number of per cpu caches in this zone. This 1504 * is done so that we don't allocate more memory than we really need. 1505 */ 1506 1507 if (bucket) { 1508 #ifdef SMP 1509 if (zone->uz_fills >= mp_ncpus) { 1510 #else 1511 if (zone->uz_fills > 1) { 1512 #endif 1513 ZONE_UNLOCK(zone); 1514 return (NULL); 1515 } 1516 1517 zone->uz_fills++; 1518 } 1519 1520 new_slab: 1521 1522 /* Find a slab with some space */ 1523 if (zone->uz_free) { 1524 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1525 slab = LIST_FIRST(&zone->uz_part_slab); 1526 } else { 1527 slab = LIST_FIRST(&zone->uz_free_slab); 1528 LIST_REMOVE(slab, us_link); 1529 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1530 } 1531 } else { 1532 /* 1533 * This is to prevent us from recursively trying to allocate 1534 * buckets. The problem is that if an allocation forces us to 1535 * grab a new bucket we will call page_alloc, which will go off 1536 * and cause the vm to allocate vm_map_entries. If we need new 1537 * buckets there too we will recurse in kmem_alloc and bad 1538 * things happen. So instead we return a NULL bucket, and make 1539 * the code that allocates buckets smart enough to deal with it 1540 */ 1541 if (zone == bucketzone && zone->uz_recurse != 0) { 1542 ZONE_UNLOCK(zone); 1543 return (NULL); 1544 } 1545 while (zone->uz_maxpages && 1546 zone->uz_pages >= zone->uz_maxpages) { 1547 zone->uz_flags |= UMA_ZFLAG_FULL; 1548 1549 if (flags & M_WAITOK) 1550 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1551 else 1552 goto alloc_fail; 1553 1554 goto new_slab; 1555 } 1556 1557 if (flags & M_NOVM) 1558 goto alloc_fail; 1559 1560 zone->uz_recurse++; 1561 slab = slab_zalloc(zone, flags); 1562 zone->uz_recurse--; 1563 /* 1564 * We might not have been able to get a slab but another cpu 1565 * could have while we were unlocked. If we did get a slab put 1566 * it on the partially used slab list. If not check the free 1567 * count and restart or fail accordingly. 1568 */ 1569 if (slab) 1570 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1571 else if (zone->uz_free == 0) 1572 goto alloc_fail; 1573 else 1574 goto new_slab; 1575 } 1576 /* 1577 * If this is our first time though put this guy on the list. 1578 */ 1579 if (bucket != NULL && bucket->ub_ptr == -1) 1580 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1581 bucket, ub_link); 1582 1583 1584 while (slab->us_freecount) { 1585 freei = slab->us_firstfree; 1586 slab->us_firstfree = slab->us_freelist[freei]; 1587 1588 item = slab->us_data + (zone->uz_rsize * freei); 1589 1590 slab->us_freecount--; 1591 zone->uz_free--; 1592 #ifdef INVARIANTS 1593 uma_dbg_alloc(zone, slab, item); 1594 #endif 1595 if (bucket == NULL) { 1596 zone->uz_allocs++; 1597 break; 1598 } 1599 bucket->ub_bucket[++bucket->ub_ptr] = item; 1600 1601 /* Don't overfill the bucket! */ 1602 if (bucket->ub_ptr == zone->uz_count) 1603 break; 1604 } 1605 1606 /* Move this slab to the full list */ 1607 if (slab->us_freecount == 0) { 1608 LIST_REMOVE(slab, us_link); 1609 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1610 } 1611 1612 if (bucket != NULL) { 1613 /* Try to keep the buckets totally full, but don't block */ 1614 if (bucket->ub_ptr < zone->uz_count) { 1615 flags |= M_NOWAIT; 1616 flags &= ~M_WAITOK; 1617 goto new_slab; 1618 } else 1619 zone->uz_fills--; 1620 } 1621 1622 ZONE_UNLOCK(zone); 1623 1624 /* Only construct at this time if we're not filling a bucket */ 1625 if (bucket == NULL) { 1626 if (zone->uz_ctor != NULL) 1627 zone->uz_ctor(item, zone->uz_size, udata); 1628 if (flags & M_ZERO) 1629 bzero(item, zone->uz_size); 1630 } 1631 1632 return (item); 1633 1634 alloc_fail: 1635 if (bucket != NULL) 1636 zone->uz_fills--; 1637 ZONE_UNLOCK(zone); 1638 1639 if (bucket != NULL && bucket->ub_ptr != -1) 1640 return (bucket); 1641 1642 return (NULL); 1643 } 1644 1645 /* See uma.h */ 1646 void 1647 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1648 { 1649 uma_cache_t cache; 1650 uma_bucket_t bucket; 1651 int bflags; 1652 int cpu; 1653 1654 /* This is the fast path free */ 1655 #ifdef UMA_DEBUG_ALLOC_1 1656 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1657 #endif 1658 /* 1659 * The race here is acceptable. If we miss it we'll just have to wait 1660 * a little longer for the limits to be reset. 1661 */ 1662 1663 if (zone->uz_flags & UMA_ZFLAG_FULL) 1664 goto zfree_internal; 1665 1666 zfree_restart: 1667 cpu = PCPU_GET(cpuid); 1668 CPU_LOCK(zone, cpu); 1669 cache = &zone->uz_cpu[cpu]; 1670 1671 zfree_start: 1672 bucket = cache->uc_freebucket; 1673 1674 if (bucket) { 1675 /* 1676 * Do we have room in our bucket? It is OK for this uz count 1677 * check to be slightly out of sync. 1678 */ 1679 1680 if (bucket->ub_ptr < zone->uz_count) { 1681 bucket->ub_ptr++; 1682 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1683 ("uma_zfree: Freeing to non free bucket index.")); 1684 bucket->ub_bucket[bucket->ub_ptr] = item; 1685 if (zone->uz_dtor) 1686 zone->uz_dtor(item, zone->uz_size, udata); 1687 #ifdef INVARIANTS 1688 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1689 uma_dbg_free(zone, udata, item); 1690 else 1691 uma_dbg_free(zone, NULL, item); 1692 #endif 1693 CPU_UNLOCK(zone, cpu); 1694 return; 1695 } else if (cache->uc_allocbucket) { 1696 #ifdef UMA_DEBUG_ALLOC 1697 printf("uma_zfree: Swapping buckets.\n"); 1698 #endif 1699 /* 1700 * We have run out of space in our freebucket. 1701 * See if we can switch with our alloc bucket. 1702 */ 1703 if (cache->uc_allocbucket->ub_ptr < 1704 cache->uc_freebucket->ub_ptr) { 1705 uma_bucket_t swap; 1706 1707 swap = cache->uc_freebucket; 1708 cache->uc_freebucket = cache->uc_allocbucket; 1709 cache->uc_allocbucket = swap; 1710 1711 goto zfree_start; 1712 } 1713 } 1714 } 1715 1716 /* 1717 * We can get here for two reasons: 1718 * 1719 * 1) The buckets are NULL 1720 * 2) The alloc and free buckets are both somewhat full. 1721 * 1722 */ 1723 1724 ZONE_LOCK(zone); 1725 1726 bucket = cache->uc_freebucket; 1727 cache->uc_freebucket = NULL; 1728 1729 /* Can we throw this on the zone full list? */ 1730 if (bucket != NULL) { 1731 #ifdef UMA_DEBUG_ALLOC 1732 printf("uma_zfree: Putting old bucket on the free list.\n"); 1733 #endif 1734 /* ub_ptr is pointing to the last free item */ 1735 KASSERT(bucket->ub_ptr != -1, 1736 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1737 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1738 bucket, ub_link); 1739 } 1740 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1741 LIST_REMOVE(bucket, ub_link); 1742 ZONE_UNLOCK(zone); 1743 cache->uc_freebucket = bucket; 1744 goto zfree_start; 1745 } 1746 /* We're done with this CPU now */ 1747 CPU_UNLOCK(zone, cpu); 1748 1749 /* And the zone.. */ 1750 ZONE_UNLOCK(zone); 1751 1752 #ifdef UMA_DEBUG_ALLOC 1753 printf("uma_zfree: Allocating new free bucket.\n"); 1754 #endif 1755 bflags = M_NOWAIT; 1756 1757 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1758 bflags |= M_NOVM; 1759 #ifdef INVARIANTS 1760 bflags |= M_ZERO; 1761 #endif 1762 bucket = uma_zalloc_internal(bucketzone, 1763 NULL, bflags, NULL); 1764 if (bucket) { 1765 bucket->ub_ptr = -1; 1766 ZONE_LOCK(zone); 1767 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1768 bucket, ub_link); 1769 ZONE_UNLOCK(zone); 1770 goto zfree_restart; 1771 } 1772 1773 /* 1774 * If nothing else caught this, we'll just do an internal free. 1775 */ 1776 1777 zfree_internal: 1778 1779 uma_zfree_internal(zone, item, udata, 0); 1780 1781 return; 1782 1783 } 1784 1785 /* 1786 * Frees an item to an INTERNAL zone or allocates a free bucket 1787 * 1788 * Arguments: 1789 * zone The zone to free to 1790 * item The item we're freeing 1791 * udata User supplied data for the dtor 1792 * skip Skip the dtor, it was done in uma_zfree_arg 1793 */ 1794 1795 static void 1796 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1797 { 1798 uma_slab_t slab; 1799 u_int8_t *mem; 1800 u_int8_t freei; 1801 1802 ZONE_LOCK(zone); 1803 1804 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1805 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1806 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1807 slab = hash_sfind(&zone->uz_hash, mem); 1808 else { 1809 mem += zone->uz_pgoff; 1810 slab = (uma_slab_t)mem; 1811 } 1812 } else { 1813 slab = (uma_slab_t)udata; 1814 } 1815 1816 /* Do we need to remove from any lists? */ 1817 if (slab->us_freecount+1 == zone->uz_ipers) { 1818 LIST_REMOVE(slab, us_link); 1819 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1820 } else if (slab->us_freecount == 0) { 1821 LIST_REMOVE(slab, us_link); 1822 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1823 } 1824 1825 /* Slab management stuff */ 1826 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1827 / zone->uz_rsize; 1828 1829 #ifdef INVARIANTS 1830 if (!skip) 1831 uma_dbg_free(zone, slab, item); 1832 #endif 1833 1834 slab->us_freelist[freei] = slab->us_firstfree; 1835 slab->us_firstfree = freei; 1836 slab->us_freecount++; 1837 1838 /* Zone statistics */ 1839 zone->uz_free++; 1840 1841 if (!skip && zone->uz_dtor) 1842 zone->uz_dtor(item, zone->uz_size, udata); 1843 1844 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1845 if (zone->uz_pages < zone->uz_maxpages) 1846 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1847 1848 /* We can handle one more allocation */ 1849 wakeup_one(&zone); 1850 } 1851 1852 ZONE_UNLOCK(zone); 1853 } 1854 1855 /* See uma.h */ 1856 void 1857 uma_zone_set_max(uma_zone_t zone, int nitems) 1858 { 1859 ZONE_LOCK(zone); 1860 if (zone->uz_ppera > 1) 1861 zone->uz_maxpages = nitems * zone->uz_ppera; 1862 else 1863 zone->uz_maxpages = nitems / zone->uz_ipers; 1864 1865 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1866 zone->uz_maxpages++; 1867 1868 ZONE_UNLOCK(zone); 1869 } 1870 1871 /* See uma.h */ 1872 void 1873 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1874 { 1875 ZONE_LOCK(zone); 1876 1877 zone->uz_freef = freef; 1878 1879 ZONE_UNLOCK(zone); 1880 } 1881 1882 /* See uma.h */ 1883 void 1884 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1885 { 1886 ZONE_LOCK(zone); 1887 1888 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1889 zone->uz_allocf = allocf; 1890 1891 ZONE_UNLOCK(zone); 1892 } 1893 1894 /* See uma.h */ 1895 int 1896 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1897 { 1898 int pages; 1899 vm_offset_t kva; 1900 1901 mtx_lock(&Giant); 1902 1903 pages = count / zone->uz_ipers; 1904 1905 if (pages * zone->uz_ipers < count) 1906 pages++; 1907 1908 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1909 1910 if (kva == 0) { 1911 mtx_unlock(&Giant); 1912 return (0); 1913 } 1914 1915 1916 if (obj == NULL) 1917 obj = vm_object_allocate(OBJT_DEFAULT, 1918 pages); 1919 else 1920 _vm_object_allocate(OBJT_DEFAULT, 1921 pages, obj); 1922 1923 ZONE_LOCK(zone); 1924 zone->uz_kva = kva; 1925 zone->uz_obj = obj; 1926 zone->uz_maxpages = pages; 1927 1928 zone->uz_allocf = obj_alloc; 1929 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1930 1931 ZONE_UNLOCK(zone); 1932 mtx_unlock(&Giant); 1933 1934 return (1); 1935 } 1936 1937 /* See uma.h */ 1938 void 1939 uma_prealloc(uma_zone_t zone, int items) 1940 { 1941 int slabs; 1942 uma_slab_t slab; 1943 1944 ZONE_LOCK(zone); 1945 slabs = items / zone->uz_ipers; 1946 if (slabs * zone->uz_ipers < items) 1947 slabs++; 1948 1949 while (slabs > 0) { 1950 slab = slab_zalloc(zone, M_WAITOK); 1951 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1952 slabs--; 1953 } 1954 ZONE_UNLOCK(zone); 1955 } 1956 1957 /* See uma.h */ 1958 void 1959 uma_reclaim(void) 1960 { 1961 /* 1962 * You might think that the delay below would improve performance since 1963 * the allocator will give away memory that it may ask for immediately. 1964 * Really, it makes things worse, since cpu cycles are so much cheaper 1965 * than disk activity. 1966 */ 1967 #if 0 1968 static struct timeval tv = {0}; 1969 struct timeval now; 1970 getmicrouptime(&now); 1971 if (now.tv_sec > tv.tv_sec + 30) 1972 tv = now; 1973 else 1974 return; 1975 #endif 1976 #ifdef UMA_DEBUG 1977 printf("UMA: vm asked us to release pages!\n"); 1978 #endif 1979 bucket_enable(); 1980 zone_foreach(zone_drain); 1981 1982 /* 1983 * Some slabs may have been freed but this zone will be visited early 1984 * we visit again so that we can free pages that are empty once other 1985 * zones are drained. We have to do the same for buckets. 1986 */ 1987 zone_drain(slabzone); 1988 zone_drain(bucketzone); 1989 } 1990 1991 void * 1992 uma_large_malloc(int size, int wait) 1993 { 1994 void *mem; 1995 uma_slab_t slab; 1996 u_int8_t flags; 1997 1998 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1999 if (slab == NULL) 2000 return (NULL); 2001 2002 mem = page_alloc(NULL, size, &flags, wait); 2003 if (mem) { 2004 slab->us_data = mem; 2005 slab->us_flags = flags | UMA_SLAB_MALLOC; 2006 slab->us_size = size; 2007 mtx_lock(&malloc_mtx); 2008 UMA_HASH_INSERT(mallochash, slab, mem); 2009 mtx_unlock(&malloc_mtx); 2010 } else { 2011 uma_zfree_internal(slabzone, slab, NULL, 0); 2012 } 2013 2014 2015 return (mem); 2016 } 2017 2018 void 2019 uma_large_free(uma_slab_t slab) 2020 { 2021 mtx_lock(&malloc_mtx); 2022 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 2023 mtx_unlock(&malloc_mtx); 2024 page_free(slab->us_data, slab->us_size, slab->us_flags); 2025 uma_zfree_internal(slabzone, slab, NULL, 0); 2026 } 2027 2028 void 2029 uma_print_stats(void) 2030 { 2031 zone_foreach(uma_print_zone); 2032 } 2033 2034 void 2035 uma_print_zone(uma_zone_t zone) 2036 { 2037 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2038 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2039 zone->uz_ipers, zone->uz_ppera, 2040 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2041 } 2042 2043 /* 2044 * Sysctl handler for vm.zone 2045 * 2046 * stolen from vm_zone.c 2047 */ 2048 static int 2049 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2050 { 2051 int error, len, cnt; 2052 const int linesize = 128; /* conservative */ 2053 int totalfree; 2054 char *tmpbuf, *offset; 2055 uma_zone_t z; 2056 char *p; 2057 2058 cnt = 0; 2059 mtx_lock(&uma_mtx); 2060 LIST_FOREACH(z, &uma_zones, uz_link) 2061 cnt++; 2062 mtx_unlock(&uma_mtx); 2063 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2064 M_TEMP, M_WAITOK); 2065 len = snprintf(tmpbuf, linesize, 2066 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2067 if (cnt == 0) 2068 tmpbuf[len - 1] = '\0'; 2069 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2070 if (error || cnt == 0) 2071 goto out; 2072 offset = tmpbuf; 2073 mtx_lock(&uma_mtx); 2074 LIST_FOREACH(z, &uma_zones, uz_link) { 2075 if (cnt == 0) /* list may have changed size */ 2076 break; 2077 ZONE_LOCK(z); 2078 totalfree = z->uz_free + z->uz_cachefree; 2079 len = snprintf(offset, linesize, 2080 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2081 z->uz_name, z->uz_size, 2082 z->uz_maxpages * z->uz_ipers, 2083 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2084 totalfree, 2085 (unsigned long long)z->uz_allocs); 2086 ZONE_UNLOCK(z); 2087 for (p = offset + 12; p > offset && *p == ' '; --p) 2088 /* nothing */ ; 2089 p[1] = ':'; 2090 cnt--; 2091 offset += len; 2092 } 2093 mtx_unlock(&uma_mtx); 2094 *offset++ = '\0'; 2095 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2096 out: 2097 FREE(tmpbuf, M_TEMP); 2098 return (error); 2099 } 2100