1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 */ 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 59 #include "opt_param.h" 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/types.h> 64 #include <sys/queue.h> 65 #include <sys/malloc.h> 66 #include <sys/lock.h> 67 #include <sys/sysctl.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/smp.h> 71 #include <sys/vmmeter.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 #include <vm/uma_dbg.h> 83 84 #include <machine/vmparam.h> 85 86 /* 87 * This is the zone from which all zones are spawned. The idea is that even 88 * the zone heads are allocated from the allocator, so we use the bss section 89 * to bootstrap us. 90 */ 91 static struct uma_zone masterzone; 92 static uma_zone_t zones = &masterzone; 93 94 /* This is the zone from which all of uma_slab_t's are allocated. */ 95 static uma_zone_t slabzone; 96 97 /* 98 * The initial hash tables come out of this zone so they can be allocated 99 * prior to malloc coming up. 100 */ 101 static uma_zone_t hashzone; 102 103 /* 104 * Zone that buckets come from. 105 */ 106 static uma_zone_t bucketzone; 107 108 /* 109 * Are we allowed to allocate buckets? 110 */ 111 static int bucketdisable = 1; 112 113 /* Linked list of all zones in the system */ 114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 115 116 /* This mutex protects the zone list */ 117 static struct mtx uma_mtx; 118 119 /* Linked list of boot time pages */ 120 static LIST_HEAD(,uma_slab) uma_boot_pages = 121 LIST_HEAD_INITIALIZER(&uma_boot_pages); 122 123 /* Count of free boottime pages */ 124 static int uma_boot_free = 0; 125 126 /* Is the VM done starting up? */ 127 static int booted = 0; 128 129 /* This is the handle used to schedule our working set calculator */ 130 static struct callout uma_callout; 131 132 /* This is mp_maxid + 1, for use while looping over each cpu */ 133 static int maxcpu; 134 135 /* 136 * This structure is passed as the zone ctor arg so that I don't have to create 137 * a special allocation function just for zones. 138 */ 139 struct uma_zctor_args { 140 char *name; 141 size_t size; 142 uma_ctor ctor; 143 uma_dtor dtor; 144 uma_init uminit; 145 uma_fini fini; 146 int align; 147 u_int16_t flags; 148 }; 149 150 /* Prototypes.. */ 151 152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 154 static void page_free(void *, int, u_int8_t); 155 static uma_slab_t slab_zalloc(uma_zone_t, int); 156 static void cache_drain(uma_zone_t); 157 static void bucket_drain(uma_zone_t, uma_bucket_t); 158 static void zone_drain(uma_zone_t); 159 static void zone_ctor(void *, int, void *); 160 static void zone_dtor(void *, int, void *); 161 static void zero_init(void *, int); 162 static void zone_small_init(uma_zone_t zone); 163 static void zone_large_init(uma_zone_t zone); 164 static void zone_foreach(void (*zfunc)(uma_zone_t)); 165 static void zone_timeout(uma_zone_t zone); 166 static int hash_alloc(struct uma_hash *); 167 static int hash_expand(struct uma_hash *, struct uma_hash *); 168 static void hash_free(struct uma_hash *hash); 169 static void uma_timeout(void *); 170 static void uma_startup3(void); 171 static void *uma_zalloc_internal(uma_zone_t, void *, int); 172 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 173 static void bucket_enable(void); 174 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 177 178 void uma_print_zone(uma_zone_t); 179 void uma_print_stats(void); 180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 181 182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 183 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 185 186 /* 187 * This routine checks to see whether or not it's safe to enable buckets. 188 */ 189 190 static void 191 bucket_enable(void) 192 { 193 if (cnt.v_free_count < cnt.v_free_min) 194 bucketdisable = 1; 195 else 196 bucketdisable = 0; 197 } 198 199 200 /* 201 * Routine called by timeout which is used to fire off some time interval 202 * based calculations. (working set, stats, etc.) 203 * 204 * Arguments: 205 * arg Unused 206 * 207 * Returns: 208 * Nothing 209 */ 210 static void 211 uma_timeout(void *unused) 212 { 213 bucket_enable(); 214 zone_foreach(zone_timeout); 215 216 /* Reschedule this event */ 217 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 218 } 219 220 /* 221 * Routine to perform timeout driven calculations. This does the working set 222 * as well as hash expanding, and per cpu statistics aggregation. 223 * 224 * Arguments: 225 * zone The zone to operate on 226 * 227 * Returns: 228 * Nothing 229 */ 230 static void 231 zone_timeout(uma_zone_t zone) 232 { 233 uma_cache_t cache; 234 u_int64_t alloc; 235 int free; 236 int cpu; 237 238 alloc = 0; 239 free = 0; 240 241 /* 242 * Aggregate per cpu cache statistics back to the zone. 243 * 244 * I may rewrite this to set a flag in the per cpu cache instead of 245 * locking. If the flag is not cleared on the next round I will have 246 * to lock and do it here instead so that the statistics don't get too 247 * far out of sync. 248 */ 249 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 250 for (cpu = 0; cpu < maxcpu; cpu++) { 251 if (CPU_ABSENT(cpu)) 252 continue; 253 CPU_LOCK(zone, cpu); 254 cache = &zone->uz_cpu[cpu]; 255 /* Add them up, and reset */ 256 alloc += cache->uc_allocs; 257 cache->uc_allocs = 0; 258 if (cache->uc_allocbucket) 259 free += cache->uc_allocbucket->ub_ptr + 1; 260 if (cache->uc_freebucket) 261 free += cache->uc_freebucket->ub_ptr + 1; 262 CPU_UNLOCK(zone, cpu); 263 } 264 } 265 266 /* Now push these stats back into the zone.. */ 267 ZONE_LOCK(zone); 268 zone->uz_allocs += alloc; 269 270 /* 271 * cachefree is an instantanious snapshot of what is in the per cpu 272 * caches, not an accurate counter 273 */ 274 zone->uz_cachefree = free; 275 276 /* 277 * Expand the zone hash table. 278 * 279 * This is done if the number of slabs is larger than the hash size. 280 * What I'm trying to do here is completely reduce collisions. This 281 * may be a little aggressive. Should I allow for two collisions max? 282 */ 283 284 if (zone->uz_flags & UMA_ZFLAG_HASH && 285 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 286 struct uma_hash newhash; 287 struct uma_hash oldhash; 288 int ret; 289 290 /* 291 * This is so involved because allocating and freeing 292 * while the zone lock is held will lead to deadlock. 293 * I have to do everything in stages and check for 294 * races. 295 */ 296 newhash = zone->uz_hash; 297 ZONE_UNLOCK(zone); 298 ret = hash_alloc(&newhash); 299 ZONE_LOCK(zone); 300 if (ret) { 301 if (hash_expand(&zone->uz_hash, &newhash)) { 302 oldhash = zone->uz_hash; 303 zone->uz_hash = newhash; 304 } else 305 oldhash = newhash; 306 307 ZONE_UNLOCK(zone); 308 hash_free(&oldhash); 309 ZONE_LOCK(zone); 310 } 311 } 312 313 /* 314 * Here we compute the working set size as the total number of items 315 * left outstanding since the last time interval. This is slightly 316 * suboptimal. What we really want is the highest number of outstanding 317 * items during the last time quantum. This should be close enough. 318 * 319 * The working set size is used to throttle the zone_drain function. 320 * We don't want to return memory that we may need again immediately. 321 */ 322 alloc = zone->uz_allocs - zone->uz_oallocs; 323 zone->uz_oallocs = zone->uz_allocs; 324 zone->uz_wssize = alloc; 325 326 ZONE_UNLOCK(zone); 327 } 328 329 /* 330 * Allocate and zero fill the next sized hash table from the appropriate 331 * backing store. 332 * 333 * Arguments: 334 * hash A new hash structure with the old hash size in uh_hashsize 335 * 336 * Returns: 337 * 1 on sucess and 0 on failure. 338 */ 339 static int 340 hash_alloc(struct uma_hash *hash) 341 { 342 int oldsize; 343 int alloc; 344 345 oldsize = hash->uh_hashsize; 346 347 /* We're just going to go to a power of two greater */ 348 if (oldsize) { 349 hash->uh_hashsize = oldsize * 2; 350 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 351 /* XXX Shouldn't be abusing DEVBUF here */ 352 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 353 M_DEVBUF, M_NOWAIT); 354 } else { 355 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 356 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 357 M_WAITOK); 358 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 359 } 360 if (hash->uh_slab_hash) { 361 bzero(hash->uh_slab_hash, alloc); 362 hash->uh_hashmask = hash->uh_hashsize - 1; 363 return (1); 364 } 365 366 return (0); 367 } 368 369 /* 370 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 371 * to reduce collisions. This must not be done in the regular allocation path, 372 * otherwise, we can recurse on the vm while allocating pages. 373 * 374 * Arguments: 375 * oldhash The hash you want to expand 376 * newhash The hash structure for the new table 377 * 378 * Returns: 379 * Nothing 380 * 381 * Discussion: 382 */ 383 static int 384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 385 { 386 uma_slab_t slab; 387 int hval; 388 int i; 389 390 if (!newhash->uh_slab_hash) 391 return (0); 392 393 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 394 return (0); 395 396 /* 397 * I need to investigate hash algorithms for resizing without a 398 * full rehash. 399 */ 400 401 for (i = 0; i < oldhash->uh_hashsize; i++) 402 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 403 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 404 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 405 hval = UMA_HASH(newhash, slab->us_data); 406 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 407 slab, us_hlink); 408 } 409 410 return (1); 411 } 412 413 /* 414 * Free the hash bucket to the appropriate backing store. 415 * 416 * Arguments: 417 * slab_hash The hash bucket we're freeing 418 * hashsize The number of entries in that hash bucket 419 * 420 * Returns: 421 * Nothing 422 */ 423 static void 424 hash_free(struct uma_hash *hash) 425 { 426 if (hash->uh_slab_hash == NULL) 427 return; 428 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 429 uma_zfree_internal(hashzone, 430 hash->uh_slab_hash, NULL, 0); 431 else 432 free(hash->uh_slab_hash, M_DEVBUF); 433 } 434 435 /* 436 * Frees all outstanding items in a bucket 437 * 438 * Arguments: 439 * zone The zone to free to, must be unlocked. 440 * bucket The free/alloc bucket with items, cpu queue must be locked. 441 * 442 * Returns: 443 * Nothing 444 */ 445 446 static void 447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 448 { 449 uma_slab_t slab; 450 int mzone; 451 void *item; 452 453 if (bucket == NULL) 454 return; 455 456 slab = NULL; 457 mzone = 0; 458 459 /* We have to lookup the slab again for malloc.. */ 460 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 461 mzone = 1; 462 463 while (bucket->ub_ptr > -1) { 464 item = bucket->ub_bucket[bucket->ub_ptr]; 465 #ifdef INVARIANTS 466 bucket->ub_bucket[bucket->ub_ptr] = NULL; 467 KASSERT(item != NULL, 468 ("bucket_drain: botched ptr, item is NULL")); 469 #endif 470 bucket->ub_ptr--; 471 /* 472 * This is extremely inefficient. The slab pointer was passed 473 * to uma_zfree_arg, but we lost it because the buckets don't 474 * hold them. This will go away when free() gets a size passed 475 * to it. 476 */ 477 if (mzone) 478 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 479 uma_zfree_internal(zone, item, slab, 1); 480 } 481 } 482 483 /* 484 * Drains the per cpu caches for a zone. 485 * 486 * Arguments: 487 * zone The zone to drain, must be unlocked. 488 * 489 * Returns: 490 * Nothing 491 * 492 * This function returns with the zone locked so that the per cpu queues can 493 * not be filled until zone_drain is finished. 494 * 495 */ 496 static void 497 cache_drain(uma_zone_t zone) 498 { 499 uma_bucket_t bucket; 500 uma_cache_t cache; 501 int cpu; 502 503 /* 504 * Flush out the per cpu queues. 505 * 506 * XXX This causes unnecessary thrashing due to immediately having 507 * empty per cpu queues. I need to improve this. 508 */ 509 510 /* 511 * We have to lock each cpu cache before locking the zone 512 */ 513 ZONE_UNLOCK(zone); 514 515 for (cpu = 0; cpu < maxcpu; cpu++) { 516 if (CPU_ABSENT(cpu)) 517 continue; 518 CPU_LOCK(zone, cpu); 519 cache = &zone->uz_cpu[cpu]; 520 bucket_drain(zone, cache->uc_allocbucket); 521 bucket_drain(zone, cache->uc_freebucket); 522 } 523 524 /* 525 * Drain the bucket queues and free the buckets, we just keep two per 526 * cpu (alloc/free). 527 */ 528 ZONE_LOCK(zone); 529 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 530 LIST_REMOVE(bucket, ub_link); 531 ZONE_UNLOCK(zone); 532 bucket_drain(zone, bucket); 533 uma_zfree_internal(bucketzone, bucket, NULL, 0); 534 ZONE_LOCK(zone); 535 } 536 537 /* Now we do the free queue.. */ 538 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 539 LIST_REMOVE(bucket, ub_link); 540 uma_zfree_internal(bucketzone, bucket, NULL, 0); 541 } 542 543 /* We unlock here, but they will all block until the zone is unlocked */ 544 for (cpu = 0; cpu < maxcpu; cpu++) { 545 if (CPU_ABSENT(cpu)) 546 continue; 547 CPU_UNLOCK(zone, cpu); 548 } 549 550 zone->uz_cachefree = 0; 551 } 552 553 /* 554 * Frees pages from a zone back to the system. This is done on demand from 555 * the pageout daemon. 556 * 557 * Arguments: 558 * zone The zone to free pages from 559 * all Should we drain all items? 560 * 561 * Returns: 562 * Nothing. 563 */ 564 static void 565 zone_drain(uma_zone_t zone) 566 { 567 struct slabhead freeslabs = {}; 568 uma_slab_t slab; 569 uma_slab_t n; 570 u_int64_t extra; 571 u_int8_t flags; 572 u_int8_t *mem; 573 int i; 574 575 /* 576 * We don't want to take pages from staticly allocated zones at this 577 * time 578 */ 579 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 580 return; 581 582 ZONE_LOCK(zone); 583 584 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 585 cache_drain(zone); 586 587 if (zone->uz_free < zone->uz_wssize) 588 goto finished; 589 #ifdef UMA_DEBUG 590 printf("%s working set size: %llu free items: %u\n", 591 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 592 #endif 593 extra = zone->uz_free - zone->uz_wssize; 594 extra /= zone->uz_ipers; 595 596 /* extra is now the number of extra slabs that we can free */ 597 598 if (extra == 0) 599 goto finished; 600 601 slab = LIST_FIRST(&zone->uz_free_slab); 602 while (slab && extra) { 603 n = LIST_NEXT(slab, us_link); 604 605 /* We have no where to free these to */ 606 if (slab->us_flags & UMA_SLAB_BOOT) { 607 slab = n; 608 continue; 609 } 610 611 LIST_REMOVE(slab, us_link); 612 zone->uz_pages -= zone->uz_ppera; 613 zone->uz_free -= zone->uz_ipers; 614 615 if (zone->uz_flags & UMA_ZFLAG_HASH) 616 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 617 618 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 619 620 slab = n; 621 extra--; 622 } 623 finished: 624 ZONE_UNLOCK(zone); 625 626 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 627 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 628 if (zone->uz_fini) 629 for (i = 0; i < zone->uz_ipers; i++) 630 zone->uz_fini( 631 slab->us_data + (zone->uz_rsize * i), 632 zone->uz_size); 633 flags = slab->us_flags; 634 mem = slab->us_data; 635 636 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 637 uma_zfree_internal(slabzone, slab, NULL, 0); 638 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 639 vm_object_t obj; 640 641 if (flags & UMA_SLAB_KMEM) 642 obj = kmem_object; 643 else 644 obj = NULL; 645 for (i = 0; i < zone->uz_ppera; i++) 646 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 647 obj); 648 } 649 #ifdef UMA_DEBUG 650 printf("%s: Returning %d bytes.\n", 651 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 652 #endif 653 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 654 } 655 656 } 657 658 /* 659 * Allocate a new slab for a zone. This does not insert the slab onto a list. 660 * 661 * Arguments: 662 * zone The zone to allocate slabs for 663 * wait Shall we wait? 664 * 665 * Returns: 666 * The slab that was allocated or NULL if there is no memory and the 667 * caller specified M_NOWAIT. 668 * 669 */ 670 static uma_slab_t 671 slab_zalloc(uma_zone_t zone, int wait) 672 { 673 uma_slab_t slab; /* Starting slab */ 674 u_int8_t *mem; 675 u_int8_t flags; 676 int i; 677 678 slab = NULL; 679 680 #ifdef UMA_DEBUG 681 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 682 #endif 683 ZONE_UNLOCK(zone); 684 685 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 686 slab = uma_zalloc_internal(slabzone, NULL, wait); 687 if (slab == NULL) { 688 ZONE_LOCK(zone); 689 return NULL; 690 } 691 } 692 693 /* 694 * This reproduces the old vm_zone behavior of zero filling pages the 695 * first time they are added to a zone. 696 * 697 * Malloced items are zeroed in uma_zalloc. 698 */ 699 700 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 701 wait |= M_ZERO; 702 else 703 wait &= ~M_ZERO; 704 705 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 706 mtx_lock(&Giant); 707 mem = zone->uz_allocf(zone, 708 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 709 mtx_unlock(&Giant); 710 if (mem == NULL) { 711 ZONE_LOCK(zone); 712 return (NULL); 713 } 714 } else { 715 uma_slab_t tmps; 716 717 if (zone->uz_ppera > 1) 718 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 719 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 720 panic("Mallocing before uma_startup2 has been called.\n"); 721 if (uma_boot_free == 0) 722 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 723 tmps = LIST_FIRST(&uma_boot_pages); 724 LIST_REMOVE(tmps, us_link); 725 uma_boot_free--; 726 mem = tmps->us_data; 727 } 728 729 /* Point the slab into the allocated memory */ 730 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 731 slab = (uma_slab_t )(mem + zone->uz_pgoff); 732 733 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 734 for (i = 0; i < zone->uz_ppera; i++) 735 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 736 737 slab->us_zone = zone; 738 slab->us_data = mem; 739 740 /* 741 * This is intended to spread data out across cache lines. 742 * 743 * This code doesn't seem to work properly on x86, and on alpha 744 * it makes absolutely no performance difference. I'm sure it could 745 * use some tuning, but sun makes outrageous claims about it's 746 * performance. 747 */ 748 #if 0 749 if (zone->uz_cachemax) { 750 slab->us_data += zone->uz_cacheoff; 751 zone->uz_cacheoff += UMA_CACHE_INC; 752 if (zone->uz_cacheoff > zone->uz_cachemax) 753 zone->uz_cacheoff = 0; 754 } 755 #endif 756 757 slab->us_freecount = zone->uz_ipers; 758 slab->us_firstfree = 0; 759 slab->us_flags = flags; 760 for (i = 0; i < zone->uz_ipers; i++) 761 slab->us_freelist[i] = i+1; 762 763 if (zone->uz_init) 764 for (i = 0; i < zone->uz_ipers; i++) 765 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 766 zone->uz_size); 767 ZONE_LOCK(zone); 768 769 if (zone->uz_flags & UMA_ZFLAG_HASH) 770 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 771 772 zone->uz_pages += zone->uz_ppera; 773 zone->uz_free += zone->uz_ipers; 774 775 776 return (slab); 777 } 778 779 /* 780 * Allocates a number of pages from the system 781 * 782 * Arguments: 783 * zone Unused 784 * bytes The number of bytes requested 785 * wait Shall we wait? 786 * 787 * Returns: 788 * A pointer to the alloced memory or possibly 789 * NULL if M_NOWAIT is set. 790 */ 791 static void * 792 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 793 { 794 void *p; /* Returned page */ 795 796 *pflag = UMA_SLAB_KMEM; 797 p = (void *) kmem_malloc(kmem_map, bytes, wait); 798 799 return (p); 800 } 801 802 /* 803 * Allocates a number of pages from within an object 804 * 805 * Arguments: 806 * zone Unused 807 * bytes The number of bytes requested 808 * wait Shall we wait? 809 * 810 * Returns: 811 * A pointer to the alloced memory or possibly 812 * NULL if M_NOWAIT is set. 813 * 814 * TODO: If we fail during a multi-page allocation release the pages that have 815 * already been allocated. 816 */ 817 static void * 818 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 819 { 820 vm_offset_t zkva; 821 vm_offset_t retkva; 822 vm_page_t p; 823 int pages; 824 825 retkva = 0; 826 pages = zone->uz_pages; 827 828 /* 829 * This looks a little weird since we're getting one page at a time 830 */ 831 while (bytes > 0) { 832 p = vm_page_alloc(zone->uz_obj, pages, 833 VM_ALLOC_INTERRUPT); 834 if (p == NULL) 835 return (NULL); 836 837 zkva = zone->uz_kva + pages * PAGE_SIZE; 838 if (retkva == 0) 839 retkva = zkva; 840 pmap_qenter(zkva, &p, 1); 841 bytes -= PAGE_SIZE; 842 pages += 1; 843 } 844 845 *flags = UMA_SLAB_PRIV; 846 847 return ((void *)retkva); 848 } 849 850 /* 851 * Frees a number of pages to the system 852 * 853 * Arguments: 854 * mem A pointer to the memory to be freed 855 * size The size of the memory being freed 856 * flags The original p->us_flags field 857 * 858 * Returns: 859 * Nothing 860 * 861 */ 862 static void 863 page_free(void *mem, int size, u_int8_t flags) 864 { 865 vm_map_t map; 866 867 if (flags & UMA_SLAB_KMEM) 868 map = kmem_map; 869 else 870 panic("UMA: page_free used with invalid flags %d\n", flags); 871 872 kmem_free(map, (vm_offset_t)mem, size); 873 } 874 875 /* 876 * Zero fill initializer 877 * 878 * Arguments/Returns follow uma_init specifications 879 * 880 */ 881 static void 882 zero_init(void *mem, int size) 883 { 884 bzero(mem, size); 885 } 886 887 /* 888 * Finish creating a small uma zone. This calculates ipers, and the zone size. 889 * 890 * Arguments 891 * zone The zone we should initialize 892 * 893 * Returns 894 * Nothing 895 */ 896 static void 897 zone_small_init(uma_zone_t zone) 898 { 899 int rsize; 900 int memused; 901 int ipers; 902 903 rsize = zone->uz_size; 904 905 if (rsize < UMA_SMALLEST_UNIT) 906 rsize = UMA_SMALLEST_UNIT; 907 908 if (rsize & zone->uz_align) 909 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 910 911 zone->uz_rsize = rsize; 912 913 rsize += 1; /* Account for the byte of linkage */ 914 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 915 zone->uz_ppera = 1; 916 917 memused = zone->uz_ipers * zone->uz_rsize; 918 919 /* Can we do any better? */ 920 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 921 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 922 return; 923 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 924 if (ipers > zone->uz_ipers) { 925 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 926 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 927 zone->uz_flags |= UMA_ZFLAG_HASH; 928 zone->uz_ipers = ipers; 929 } 930 } 931 932 } 933 934 /* 935 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 936 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 937 * more complicated. 938 * 939 * Arguments 940 * zone The zone we should initialize 941 * 942 * Returns 943 * Nothing 944 */ 945 static void 946 zone_large_init(uma_zone_t zone) 947 { 948 int pages; 949 950 pages = zone->uz_size / UMA_SLAB_SIZE; 951 952 /* Account for remainder */ 953 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 954 pages++; 955 956 zone->uz_ppera = pages; 957 zone->uz_ipers = 1; 958 959 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 960 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 961 zone->uz_flags |= UMA_ZFLAG_HASH; 962 963 zone->uz_rsize = zone->uz_size; 964 } 965 966 /* 967 * Zone header ctor. This initializes all fields, locks, etc. And inserts 968 * the zone onto the global zone list. 969 * 970 * Arguments/Returns follow uma_ctor specifications 971 * udata Actually uma_zcreat_args 972 * 973 */ 974 975 static void 976 zone_ctor(void *mem, int size, void *udata) 977 { 978 struct uma_zctor_args *arg = udata; 979 uma_zone_t zone = mem; 980 int privlc; 981 int cplen; 982 int cpu; 983 984 bzero(zone, size); 985 zone->uz_name = arg->name; 986 zone->uz_size = arg->size; 987 zone->uz_ctor = arg->ctor; 988 zone->uz_dtor = arg->dtor; 989 zone->uz_init = arg->uminit; 990 zone->uz_fini = arg->fini; 991 zone->uz_align = arg->align; 992 zone->uz_free = 0; 993 zone->uz_pages = 0; 994 zone->uz_flags = 0; 995 zone->uz_allocf = page_alloc; 996 zone->uz_freef = page_free; 997 998 if (arg->flags & UMA_ZONE_ZINIT) 999 zone->uz_init = zero_init; 1000 1001 if (arg->flags & UMA_ZONE_INTERNAL) 1002 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 1003 1004 if (arg->flags & UMA_ZONE_MALLOC) 1005 zone->uz_flags |= UMA_ZFLAG_MALLOC; 1006 1007 if (arg->flags & UMA_ZONE_NOFREE) 1008 zone->uz_flags |= UMA_ZFLAG_NOFREE; 1009 1010 if (arg->flags & UMA_ZONE_VM) 1011 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE; 1012 1013 if (zone->uz_size > UMA_SLAB_SIZE) 1014 zone_large_init(zone); 1015 else 1016 zone_small_init(zone); 1017 #ifdef UMA_MD_SMALL_ALLOC 1018 if (zone->uz_ppera == 1) { 1019 zone->uz_allocf = uma_small_alloc; 1020 zone->uz_freef = uma_small_free; 1021 } 1022 #endif /* UMA_MD_SMALL_ALLOC */ 1023 1024 if (arg->flags & UMA_ZONE_MTXCLASS) 1025 privlc = 1; 1026 else 1027 privlc = 0; 1028 1029 /* We do this so that the per cpu lock name is unique for each zone */ 1030 memcpy(zone->uz_lname, "PCPU ", 5); 1031 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1032 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1033 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1034 1035 /* 1036 * If we're putting the slab header in the actual page we need to 1037 * figure out where in each page it goes. This calculates a right 1038 * justified offset into the memory on a ALIGN_PTR boundary. 1039 */ 1040 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1041 int totsize; 1042 int waste; 1043 1044 /* Size of the slab struct and free list */ 1045 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1046 if (totsize & UMA_ALIGN_PTR) 1047 totsize = (totsize & ~UMA_ALIGN_PTR) + 1048 (UMA_ALIGN_PTR + 1); 1049 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1050 1051 waste = zone->uz_pgoff; 1052 waste -= (zone->uz_ipers * zone->uz_rsize); 1053 1054 /* 1055 * This calculates how much space we have for cache line size 1056 * optimizations. It works by offseting each slab slightly. 1057 * Currently it breaks on x86, and so it is disabled. 1058 */ 1059 1060 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1061 zone->uz_cachemax = waste - UMA_CACHE_INC; 1062 zone->uz_cacheoff = 0; 1063 } 1064 1065 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1066 + zone->uz_ipers; 1067 /* I don't think it's possible, but I'll make sure anyway */ 1068 if (totsize > UMA_SLAB_SIZE) { 1069 printf("zone %s ipers %d rsize %d size %d\n", 1070 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1071 zone->uz_size); 1072 panic("UMA slab won't fit.\n"); 1073 } 1074 } 1075 1076 if (zone->uz_flags & UMA_ZFLAG_HASH) 1077 hash_alloc(&zone->uz_hash); 1078 1079 #ifdef UMA_DEBUG 1080 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1081 zone->uz_name, zone, 1082 zone->uz_size, zone->uz_ipers, 1083 zone->uz_ppera, zone->uz_pgoff); 1084 #endif 1085 ZONE_LOCK_INIT(zone, privlc); 1086 1087 mtx_lock(&uma_mtx); 1088 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1089 mtx_unlock(&uma_mtx); 1090 1091 /* 1092 * Some internal zones don't have room allocated for the per cpu 1093 * caches. If we're internal, bail out here. 1094 */ 1095 1096 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1097 return; 1098 1099 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1100 zone->uz_count = zone->uz_ipers - 1; 1101 else 1102 zone->uz_count = UMA_BUCKET_SIZE - 1; 1103 1104 for (cpu = 0; cpu < maxcpu; cpu++) 1105 CPU_LOCK_INIT(zone, cpu, privlc); 1106 } 1107 1108 /* 1109 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1110 * and removes the zone from the global list. 1111 * 1112 * Arguments/Returns follow uma_dtor specifications 1113 * udata unused 1114 */ 1115 1116 static void 1117 zone_dtor(void *arg, int size, void *udata) 1118 { 1119 uma_zone_t zone; 1120 int cpu; 1121 1122 zone = (uma_zone_t)arg; 1123 1124 ZONE_LOCK(zone); 1125 zone->uz_wssize = 0; 1126 ZONE_UNLOCK(zone); 1127 1128 mtx_lock(&uma_mtx); 1129 LIST_REMOVE(zone, uz_link); 1130 zone_drain(zone); 1131 mtx_unlock(&uma_mtx); 1132 1133 ZONE_LOCK(zone); 1134 if (zone->uz_free != 0) 1135 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1136 zone->uz_name, zone->uz_pages); 1137 1138 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1139 for (cpu = 0; cpu < maxcpu; cpu++) 1140 CPU_LOCK_FINI(zone, cpu); 1141 1142 ZONE_UNLOCK(zone); 1143 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1144 hash_free(&zone->uz_hash); 1145 1146 ZONE_LOCK_FINI(zone); 1147 } 1148 /* 1149 * Traverses every zone in the system and calls a callback 1150 * 1151 * Arguments: 1152 * zfunc A pointer to a function which accepts a zone 1153 * as an argument. 1154 * 1155 * Returns: 1156 * Nothing 1157 */ 1158 static void 1159 zone_foreach(void (*zfunc)(uma_zone_t)) 1160 { 1161 uma_zone_t zone; 1162 1163 mtx_lock(&uma_mtx); 1164 LIST_FOREACH(zone, &uma_zones, uz_link) { 1165 zfunc(zone); 1166 } 1167 mtx_unlock(&uma_mtx); 1168 } 1169 1170 /* Public functions */ 1171 /* See uma.h */ 1172 void 1173 uma_startup(void *bootmem) 1174 { 1175 struct uma_zctor_args args; 1176 uma_slab_t slab; 1177 int slabsize; 1178 int i; 1179 1180 #ifdef UMA_DEBUG 1181 printf("Creating uma zone headers zone.\n"); 1182 #endif 1183 #ifdef SMP 1184 maxcpu = mp_maxid + 1; 1185 #else 1186 maxcpu = 1; 1187 #endif 1188 #ifdef UMA_DEBUG 1189 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1190 Debugger("stop"); 1191 #endif 1192 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1193 /* "manually" Create the initial zone */ 1194 args.name = "UMA Zones"; 1195 args.size = sizeof(struct uma_zone) + 1196 (sizeof(struct uma_cache) * (maxcpu - 1)); 1197 args.ctor = zone_ctor; 1198 args.dtor = zone_dtor; 1199 args.uminit = zero_init; 1200 args.fini = NULL; 1201 args.align = 32 - 1; 1202 args.flags = UMA_ZONE_INTERNAL; 1203 /* The initial zone has no Per cpu queues so it's smaller */ 1204 zone_ctor(zones, sizeof(struct uma_zone), &args); 1205 1206 #ifdef UMA_DEBUG 1207 printf("Filling boot free list.\n"); 1208 #endif 1209 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1210 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1211 slab->us_data = (u_int8_t *)slab; 1212 slab->us_flags = UMA_SLAB_BOOT; 1213 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1214 uma_boot_free++; 1215 } 1216 1217 #ifdef UMA_DEBUG 1218 printf("Creating slab zone.\n"); 1219 #endif 1220 1221 /* 1222 * This is the max number of free list items we'll have with 1223 * offpage slabs. 1224 */ 1225 1226 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1227 slabsize /= UMA_MAX_WASTE; 1228 slabsize++; /* In case there it's rounded */ 1229 slabsize += sizeof(struct uma_slab); 1230 1231 /* Now make a zone for slab headers */ 1232 slabzone = uma_zcreate("UMA Slabs", 1233 slabsize, 1234 NULL, NULL, NULL, NULL, 1235 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1236 1237 hashzone = uma_zcreate("UMA Hash", 1238 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1239 NULL, NULL, NULL, NULL, 1240 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1241 1242 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1243 NULL, NULL, NULL, NULL, 1244 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1245 1246 #ifdef UMA_MD_SMALL_ALLOC 1247 booted = 1; 1248 #endif 1249 1250 #ifdef UMA_DEBUG 1251 printf("UMA startup complete.\n"); 1252 #endif 1253 } 1254 1255 /* see uma.h */ 1256 void 1257 uma_startup2(void) 1258 { 1259 booted = 1; 1260 bucket_enable(); 1261 #ifdef UMA_DEBUG 1262 printf("UMA startup2 complete.\n"); 1263 #endif 1264 } 1265 1266 /* 1267 * Initialize our callout handle 1268 * 1269 */ 1270 1271 static void 1272 uma_startup3(void) 1273 { 1274 #ifdef UMA_DEBUG 1275 printf("Starting callout.\n"); 1276 #endif 1277 callout_init(&uma_callout, 0); 1278 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1279 #ifdef UMA_DEBUG 1280 printf("UMA startup3 complete.\n"); 1281 #endif 1282 } 1283 1284 /* See uma.h */ 1285 uma_zone_t 1286 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1287 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1288 1289 { 1290 struct uma_zctor_args args; 1291 1292 /* This stuff is essential for the zone ctor */ 1293 args.name = name; 1294 args.size = size; 1295 args.ctor = ctor; 1296 args.dtor = dtor; 1297 args.uminit = uminit; 1298 args.fini = fini; 1299 args.align = align; 1300 args.flags = flags; 1301 1302 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1303 } 1304 1305 /* See uma.h */ 1306 void 1307 uma_zdestroy(uma_zone_t zone) 1308 { 1309 uma_zfree_internal(zones, zone, NULL, 0); 1310 } 1311 1312 /* See uma.h */ 1313 void * 1314 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1315 { 1316 void *item; 1317 uma_cache_t cache; 1318 uma_bucket_t bucket; 1319 int cpu; 1320 1321 /* This is the fast path allocation */ 1322 #ifdef UMA_DEBUG_ALLOC_1 1323 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1324 #endif 1325 1326 if (!(flags & M_NOWAIT)) { 1327 KASSERT(curthread->td_intr_nesting_level == 0, 1328 ("malloc(M_WAITOK) in interrupt context")); 1329 WITNESS_SLEEP(1, NULL); 1330 } 1331 1332 zalloc_restart: 1333 cpu = PCPU_GET(cpuid); 1334 CPU_LOCK(zone, cpu); 1335 cache = &zone->uz_cpu[cpu]; 1336 1337 zalloc_start: 1338 bucket = cache->uc_allocbucket; 1339 1340 if (bucket) { 1341 if (bucket->ub_ptr > -1) { 1342 item = bucket->ub_bucket[bucket->ub_ptr]; 1343 #ifdef INVARIANTS 1344 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1345 #endif 1346 bucket->ub_ptr--; 1347 KASSERT(item != NULL, 1348 ("uma_zalloc: Bucket pointer mangled.")); 1349 cache->uc_allocs++; 1350 #ifdef INVARIANTS 1351 uma_dbg_alloc(zone, NULL, item); 1352 #endif 1353 CPU_UNLOCK(zone, cpu); 1354 if (zone->uz_ctor) 1355 zone->uz_ctor(item, zone->uz_size, udata); 1356 if (flags & M_ZERO) 1357 bzero(item, zone->uz_size); 1358 return (item); 1359 } else if (cache->uc_freebucket) { 1360 /* 1361 * We have run out of items in our allocbucket. 1362 * See if we can switch with our free bucket. 1363 */ 1364 if (cache->uc_freebucket->ub_ptr > -1) { 1365 uma_bucket_t swap; 1366 1367 #ifdef UMA_DEBUG_ALLOC 1368 printf("uma_zalloc: Swapping empty with alloc.\n"); 1369 #endif 1370 swap = cache->uc_freebucket; 1371 cache->uc_freebucket = cache->uc_allocbucket; 1372 cache->uc_allocbucket = swap; 1373 1374 goto zalloc_start; 1375 } 1376 } 1377 } 1378 ZONE_LOCK(zone); 1379 /* Since we have locked the zone we may as well send back our stats */ 1380 zone->uz_allocs += cache->uc_allocs; 1381 cache->uc_allocs = 0; 1382 1383 /* Our old one is now a free bucket */ 1384 if (cache->uc_allocbucket) { 1385 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1386 ("uma_zalloc_arg: Freeing a non free bucket.")); 1387 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1388 cache->uc_allocbucket, ub_link); 1389 cache->uc_allocbucket = NULL; 1390 } 1391 1392 /* Check the free list for a new alloc bucket */ 1393 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1394 KASSERT(bucket->ub_ptr != -1, 1395 ("uma_zalloc_arg: Returning an empty bucket.")); 1396 1397 LIST_REMOVE(bucket, ub_link); 1398 cache->uc_allocbucket = bucket; 1399 ZONE_UNLOCK(zone); 1400 goto zalloc_start; 1401 } 1402 /* We are no longer associated with this cpu!!! */ 1403 CPU_UNLOCK(zone, cpu); 1404 1405 /* Bump up our uz_count so we get here less */ 1406 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1407 zone->uz_count++; 1408 1409 /* 1410 * Now lets just fill a bucket and put it on the free list. If that 1411 * works we'll restart the allocation from the begining. 1412 */ 1413 1414 if (uma_zalloc_bucket(zone, flags)) { 1415 ZONE_UNLOCK(zone); 1416 goto zalloc_restart; 1417 } 1418 ZONE_UNLOCK(zone); 1419 /* 1420 * We may not be able to get a bucket so return an actual item. 1421 */ 1422 #ifdef UMA_DEBUG 1423 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1424 #endif 1425 1426 return (uma_zalloc_internal(zone, udata, flags)); 1427 } 1428 1429 static uma_slab_t 1430 uma_zone_slab(uma_zone_t zone, int flags) 1431 { 1432 uma_slab_t slab; 1433 1434 /* 1435 * This is to prevent us from recursively trying to allocate 1436 * buckets. The problem is that if an allocation forces us to 1437 * grab a new bucket we will call page_alloc, which will go off 1438 * and cause the vm to allocate vm_map_entries. If we need new 1439 * buckets there too we will recurse in kmem_alloc and bad 1440 * things happen. So instead we return a NULL bucket, and make 1441 * the code that allocates buckets smart enough to deal with it 1442 */ 1443 if (zone == bucketzone && zone->uz_recurse != 0) 1444 return (NULL); 1445 1446 slab = NULL; 1447 1448 for (;;) { 1449 /* 1450 * Find a slab with some space. Prefer slabs that are partially 1451 * used over those that are totally full. This helps to reduce 1452 * fragmentation. 1453 */ 1454 if (zone->uz_free != 0) { 1455 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1456 slab = LIST_FIRST(&zone->uz_part_slab); 1457 } else { 1458 slab = LIST_FIRST(&zone->uz_free_slab); 1459 LIST_REMOVE(slab, us_link); 1460 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, 1461 us_link); 1462 } 1463 return (slab); 1464 } 1465 1466 /* 1467 * M_NOVM means don't ask at all! 1468 */ 1469 if (flags & M_NOVM) 1470 break; 1471 1472 if (zone->uz_maxpages && 1473 zone->uz_pages >= zone->uz_maxpages) { 1474 zone->uz_flags |= UMA_ZFLAG_FULL; 1475 1476 if (flags & M_WAITOK) 1477 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1478 else 1479 break; 1480 continue; 1481 } 1482 zone->uz_recurse++; 1483 slab = slab_zalloc(zone, flags); 1484 zone->uz_recurse--; 1485 /* 1486 * If we got a slab here it's safe to mark it partially used 1487 * and return. We assume that the caller is going to remove 1488 * at least one item. 1489 */ 1490 if (slab) { 1491 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1492 return (slab); 1493 } 1494 /* 1495 * We might not have been able to get a slab but another cpu 1496 * could have while we were unlocked. Check again before we 1497 * fail. 1498 */ 1499 if ((flags & M_WAITOK) == 0) 1500 flags |= M_NOVM; 1501 } 1502 return (slab); 1503 } 1504 1505 static __inline void * 1506 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 1507 { 1508 void *item; 1509 u_int8_t freei; 1510 1511 freei = slab->us_firstfree; 1512 slab->us_firstfree = slab->us_freelist[freei]; 1513 item = slab->us_data + (zone->uz_rsize * freei); 1514 1515 slab->us_freecount--; 1516 zone->uz_free--; 1517 #ifdef INVARIANTS 1518 uma_dbg_alloc(zone, slab, item); 1519 #endif 1520 /* Move this slab to the full list */ 1521 if (slab->us_freecount == 0) { 1522 LIST_REMOVE(slab, us_link); 1523 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1524 } 1525 1526 return (item); 1527 } 1528 1529 static int 1530 uma_zalloc_bucket(uma_zone_t zone, int flags) 1531 { 1532 uma_bucket_t bucket; 1533 uma_slab_t slab; 1534 1535 /* 1536 * Try this zone's free list first so we don't allocate extra buckets. 1537 */ 1538 1539 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1540 KASSERT(bucket->ub_ptr == -1, 1541 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 1542 LIST_REMOVE(bucket, ub_link); 1543 } else { 1544 int bflags; 1545 1546 bflags = flags; 1547 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1548 bflags |= M_NOVM; 1549 1550 ZONE_UNLOCK(zone); 1551 bucket = uma_zalloc_internal(bucketzone, 1552 NULL, bflags); 1553 ZONE_LOCK(zone); 1554 if (bucket != NULL) { 1555 #ifdef INVARIANTS 1556 bzero(bucket, bucketzone->uz_size); 1557 #endif 1558 bucket->ub_ptr = -1; 1559 } 1560 } 1561 1562 if (bucket == NULL) 1563 return (0); 1564 1565 #ifdef SMP 1566 /* 1567 * This code is here to limit the number of simultaneous bucket fills 1568 * for any given zone to the number of per cpu caches in this zone. This 1569 * is done so that we don't allocate more memory than we really need. 1570 */ 1571 if (zone->uz_fills >= mp_ncpus) 1572 goto done; 1573 1574 #endif 1575 zone->uz_fills++; 1576 1577 /* Try to keep the buckets totally full */ 1578 while ((slab = uma_zone_slab(zone, flags)) != NULL && 1579 bucket->ub_ptr < zone->uz_count) { 1580 while (slab->us_freecount && 1581 bucket->ub_ptr < zone->uz_count) { 1582 bucket->ub_bucket[++bucket->ub_ptr] = 1583 uma_slab_alloc(zone, slab); 1584 } 1585 /* Don't block on the next fill */ 1586 flags |= M_NOWAIT; 1587 flags &= ~M_WAITOK; 1588 } 1589 1590 zone->uz_fills--; 1591 1592 if (bucket->ub_ptr != -1) { 1593 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1594 bucket, ub_link); 1595 return (1); 1596 } 1597 #ifdef SMP 1598 done: 1599 #endif 1600 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1601 1602 return (0); 1603 } 1604 /* 1605 * Allocates an item for an internal zone 1606 * 1607 * Arguments 1608 * zone The zone to alloc for. 1609 * udata The data to be passed to the constructor. 1610 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1611 * 1612 * Returns 1613 * NULL if there is no memory and M_NOWAIT is set 1614 * An item if successful 1615 */ 1616 1617 static void * 1618 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 1619 { 1620 uma_slab_t slab; 1621 void *item; 1622 1623 item = NULL; 1624 1625 /* 1626 * This is to stop us from allocating per cpu buckets while we're 1627 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1628 * boot pages. 1629 */ 1630 1631 if (bucketdisable && zone == bucketzone) 1632 return (NULL); 1633 1634 #ifdef UMA_DEBUG_ALLOC 1635 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1636 #endif 1637 ZONE_LOCK(zone); 1638 1639 slab = uma_zone_slab(zone, flags); 1640 if (slab == NULL) { 1641 ZONE_UNLOCK(zone); 1642 return (NULL); 1643 } 1644 1645 item = uma_slab_alloc(zone, slab); 1646 1647 ZONE_UNLOCK(zone); 1648 1649 if (zone->uz_ctor != NULL) 1650 zone->uz_ctor(item, zone->uz_size, udata); 1651 if (flags & M_ZERO) 1652 bzero(item, zone->uz_size); 1653 1654 return (item); 1655 } 1656 1657 /* See uma.h */ 1658 void 1659 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1660 { 1661 uma_cache_t cache; 1662 uma_bucket_t bucket; 1663 int bflags; 1664 int cpu; 1665 1666 /* This is the fast path free */ 1667 #ifdef UMA_DEBUG_ALLOC_1 1668 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1669 #endif 1670 /* 1671 * The race here is acceptable. If we miss it we'll just have to wait 1672 * a little longer for the limits to be reset. 1673 */ 1674 1675 if (zone->uz_flags & UMA_ZFLAG_FULL) 1676 goto zfree_internal; 1677 1678 if (zone->uz_dtor) 1679 zone->uz_dtor(item, zone->uz_size, udata); 1680 1681 zfree_restart: 1682 cpu = PCPU_GET(cpuid); 1683 CPU_LOCK(zone, cpu); 1684 cache = &zone->uz_cpu[cpu]; 1685 1686 zfree_start: 1687 bucket = cache->uc_freebucket; 1688 1689 if (bucket) { 1690 /* 1691 * Do we have room in our bucket? It is OK for this uz count 1692 * check to be slightly out of sync. 1693 */ 1694 1695 if (bucket->ub_ptr < zone->uz_count) { 1696 bucket->ub_ptr++; 1697 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1698 ("uma_zfree: Freeing to non free bucket index.")); 1699 bucket->ub_bucket[bucket->ub_ptr] = item; 1700 #ifdef INVARIANTS 1701 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1702 uma_dbg_free(zone, udata, item); 1703 else 1704 uma_dbg_free(zone, NULL, item); 1705 #endif 1706 CPU_UNLOCK(zone, cpu); 1707 return; 1708 } else if (cache->uc_allocbucket) { 1709 #ifdef UMA_DEBUG_ALLOC 1710 printf("uma_zfree: Swapping buckets.\n"); 1711 #endif 1712 /* 1713 * We have run out of space in our freebucket. 1714 * See if we can switch with our alloc bucket. 1715 */ 1716 if (cache->uc_allocbucket->ub_ptr < 1717 cache->uc_freebucket->ub_ptr) { 1718 uma_bucket_t swap; 1719 1720 swap = cache->uc_freebucket; 1721 cache->uc_freebucket = cache->uc_allocbucket; 1722 cache->uc_allocbucket = swap; 1723 1724 goto zfree_start; 1725 } 1726 } 1727 } 1728 1729 /* 1730 * We can get here for two reasons: 1731 * 1732 * 1) The buckets are NULL 1733 * 2) The alloc and free buckets are both somewhat full. 1734 * 1735 */ 1736 1737 ZONE_LOCK(zone); 1738 1739 bucket = cache->uc_freebucket; 1740 cache->uc_freebucket = NULL; 1741 1742 /* Can we throw this on the zone full list? */ 1743 if (bucket != NULL) { 1744 #ifdef UMA_DEBUG_ALLOC 1745 printf("uma_zfree: Putting old bucket on the free list.\n"); 1746 #endif 1747 /* ub_ptr is pointing to the last free item */ 1748 KASSERT(bucket->ub_ptr != -1, 1749 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1750 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1751 bucket, ub_link); 1752 } 1753 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1754 LIST_REMOVE(bucket, ub_link); 1755 ZONE_UNLOCK(zone); 1756 cache->uc_freebucket = bucket; 1757 goto zfree_start; 1758 } 1759 /* We're done with this CPU now */ 1760 CPU_UNLOCK(zone, cpu); 1761 1762 /* And the zone.. */ 1763 ZONE_UNLOCK(zone); 1764 1765 #ifdef UMA_DEBUG_ALLOC 1766 printf("uma_zfree: Allocating new free bucket.\n"); 1767 #endif 1768 bflags = M_NOWAIT; 1769 1770 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1771 bflags |= M_NOVM; 1772 #ifdef INVARIANTS 1773 bflags |= M_ZERO; 1774 #endif 1775 bucket = uma_zalloc_internal(bucketzone, 1776 NULL, bflags); 1777 if (bucket) { 1778 bucket->ub_ptr = -1; 1779 ZONE_LOCK(zone); 1780 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1781 bucket, ub_link); 1782 ZONE_UNLOCK(zone); 1783 goto zfree_restart; 1784 } 1785 1786 /* 1787 * If nothing else caught this, we'll just do an internal free. 1788 */ 1789 1790 zfree_internal: 1791 1792 uma_zfree_internal(zone, item, udata, 0); 1793 1794 return; 1795 1796 } 1797 1798 /* 1799 * Frees an item to an INTERNAL zone or allocates a free bucket 1800 * 1801 * Arguments: 1802 * zone The zone to free to 1803 * item The item we're freeing 1804 * udata User supplied data for the dtor 1805 * skip Skip the dtor, it was done in uma_zfree_arg 1806 */ 1807 1808 static void 1809 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1810 { 1811 uma_slab_t slab; 1812 u_int8_t *mem; 1813 u_int8_t freei; 1814 1815 if (!skip && zone->uz_dtor) 1816 zone->uz_dtor(item, zone->uz_size, udata); 1817 1818 ZONE_LOCK(zone); 1819 1820 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1821 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1822 if (zone->uz_flags & UMA_ZFLAG_HASH) 1823 slab = hash_sfind(&zone->uz_hash, mem); 1824 else { 1825 mem += zone->uz_pgoff; 1826 slab = (uma_slab_t)mem; 1827 } 1828 } else { 1829 slab = (uma_slab_t)udata; 1830 } 1831 1832 /* Do we need to remove from any lists? */ 1833 if (slab->us_freecount+1 == zone->uz_ipers) { 1834 LIST_REMOVE(slab, us_link); 1835 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1836 } else if (slab->us_freecount == 0) { 1837 LIST_REMOVE(slab, us_link); 1838 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1839 } 1840 1841 /* Slab management stuff */ 1842 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1843 / zone->uz_rsize; 1844 1845 #ifdef INVARIANTS 1846 if (!skip) 1847 uma_dbg_free(zone, slab, item); 1848 #endif 1849 1850 slab->us_freelist[freei] = slab->us_firstfree; 1851 slab->us_firstfree = freei; 1852 slab->us_freecount++; 1853 1854 /* Zone statistics */ 1855 zone->uz_free++; 1856 1857 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1858 if (zone->uz_pages < zone->uz_maxpages) 1859 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1860 1861 /* We can handle one more allocation */ 1862 wakeup_one(&zone); 1863 } 1864 1865 ZONE_UNLOCK(zone); 1866 } 1867 1868 /* See uma.h */ 1869 void 1870 uma_zone_set_max(uma_zone_t zone, int nitems) 1871 { 1872 ZONE_LOCK(zone); 1873 if (zone->uz_ppera > 1) 1874 zone->uz_maxpages = nitems * zone->uz_ppera; 1875 else 1876 zone->uz_maxpages = nitems / zone->uz_ipers; 1877 1878 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1879 zone->uz_maxpages++; 1880 1881 ZONE_UNLOCK(zone); 1882 } 1883 1884 /* See uma.h */ 1885 void 1886 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1887 { 1888 ZONE_LOCK(zone); 1889 1890 zone->uz_freef = freef; 1891 1892 ZONE_UNLOCK(zone); 1893 } 1894 1895 /* See uma.h */ 1896 void 1897 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1898 { 1899 ZONE_LOCK(zone); 1900 1901 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1902 zone->uz_allocf = allocf; 1903 1904 ZONE_UNLOCK(zone); 1905 } 1906 1907 /* See uma.h */ 1908 int 1909 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1910 { 1911 int pages; 1912 vm_offset_t kva; 1913 1914 mtx_lock(&Giant); 1915 1916 pages = count / zone->uz_ipers; 1917 1918 if (pages * zone->uz_ipers < count) 1919 pages++; 1920 1921 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1922 1923 if (kva == 0) { 1924 mtx_unlock(&Giant); 1925 return (0); 1926 } 1927 1928 1929 if (obj == NULL) 1930 obj = vm_object_allocate(OBJT_DEFAULT, 1931 pages); 1932 else 1933 _vm_object_allocate(OBJT_DEFAULT, 1934 pages, obj); 1935 1936 ZONE_LOCK(zone); 1937 zone->uz_kva = kva; 1938 zone->uz_obj = obj; 1939 zone->uz_maxpages = pages; 1940 1941 zone->uz_allocf = obj_alloc; 1942 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1943 1944 ZONE_UNLOCK(zone); 1945 mtx_unlock(&Giant); 1946 1947 return (1); 1948 } 1949 1950 /* See uma.h */ 1951 void 1952 uma_prealloc(uma_zone_t zone, int items) 1953 { 1954 int slabs; 1955 uma_slab_t slab; 1956 1957 ZONE_LOCK(zone); 1958 slabs = items / zone->uz_ipers; 1959 if (slabs * zone->uz_ipers < items) 1960 slabs++; 1961 1962 while (slabs > 0) { 1963 slab = slab_zalloc(zone, M_WAITOK); 1964 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1965 slabs--; 1966 } 1967 ZONE_UNLOCK(zone); 1968 } 1969 1970 /* See uma.h */ 1971 void 1972 uma_reclaim(void) 1973 { 1974 /* 1975 * You might think that the delay below would improve performance since 1976 * the allocator will give away memory that it may ask for immediately. 1977 * Really, it makes things worse, since cpu cycles are so much cheaper 1978 * than disk activity. 1979 */ 1980 #if 0 1981 static struct timeval tv = {0}; 1982 struct timeval now; 1983 getmicrouptime(&now); 1984 if (now.tv_sec > tv.tv_sec + 30) 1985 tv = now; 1986 else 1987 return; 1988 #endif 1989 #ifdef UMA_DEBUG 1990 printf("UMA: vm asked us to release pages!\n"); 1991 #endif 1992 bucket_enable(); 1993 zone_foreach(zone_drain); 1994 1995 /* 1996 * Some slabs may have been freed but this zone will be visited early 1997 * we visit again so that we can free pages that are empty once other 1998 * zones are drained. We have to do the same for buckets. 1999 */ 2000 zone_drain(slabzone); 2001 zone_drain(bucketzone); 2002 } 2003 2004 void * 2005 uma_large_malloc(int size, int wait) 2006 { 2007 void *mem; 2008 uma_slab_t slab; 2009 u_int8_t flags; 2010 2011 slab = uma_zalloc_internal(slabzone, NULL, wait); 2012 if (slab == NULL) 2013 return (NULL); 2014 2015 mem = page_alloc(NULL, size, &flags, wait); 2016 if (mem) { 2017 vsetslab((vm_offset_t)mem, slab); 2018 slab->us_data = mem; 2019 slab->us_flags = flags | UMA_SLAB_MALLOC; 2020 slab->us_size = size; 2021 } else { 2022 uma_zfree_internal(slabzone, slab, NULL, 0); 2023 } 2024 2025 2026 return (mem); 2027 } 2028 2029 void 2030 uma_large_free(uma_slab_t slab) 2031 { 2032 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2033 page_free(slab->us_data, slab->us_size, slab->us_flags); 2034 uma_zfree_internal(slabzone, slab, NULL, 0); 2035 } 2036 2037 void 2038 uma_print_stats(void) 2039 { 2040 zone_foreach(uma_print_zone); 2041 } 2042 2043 void 2044 uma_print_zone(uma_zone_t zone) 2045 { 2046 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2047 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2048 zone->uz_ipers, zone->uz_ppera, 2049 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2050 } 2051 2052 /* 2053 * Sysctl handler for vm.zone 2054 * 2055 * stolen from vm_zone.c 2056 */ 2057 static int 2058 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2059 { 2060 int error, len, cnt; 2061 const int linesize = 128; /* conservative */ 2062 int totalfree; 2063 char *tmpbuf, *offset; 2064 uma_zone_t z; 2065 char *p; 2066 2067 cnt = 0; 2068 mtx_lock(&uma_mtx); 2069 LIST_FOREACH(z, &uma_zones, uz_link) 2070 cnt++; 2071 mtx_unlock(&uma_mtx); 2072 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2073 M_TEMP, M_WAITOK); 2074 len = snprintf(tmpbuf, linesize, 2075 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2076 if (cnt == 0) 2077 tmpbuf[len - 1] = '\0'; 2078 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2079 if (error || cnt == 0) 2080 goto out; 2081 offset = tmpbuf; 2082 mtx_lock(&uma_mtx); 2083 LIST_FOREACH(z, &uma_zones, uz_link) { 2084 if (cnt == 0) /* list may have changed size */ 2085 break; 2086 ZONE_LOCK(z); 2087 totalfree = z->uz_free + z->uz_cachefree; 2088 len = snprintf(offset, linesize, 2089 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2090 z->uz_name, z->uz_size, 2091 z->uz_maxpages * z->uz_ipers, 2092 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2093 totalfree, 2094 (unsigned long long)z->uz_allocs); 2095 ZONE_UNLOCK(z); 2096 for (p = offset + 12; p > offset && *p == ' '; --p) 2097 /* nothing */ ; 2098 p[1] = ':'; 2099 cnt--; 2100 offset += len; 2101 } 2102 mtx_unlock(&uma_mtx); 2103 *offset++ = '\0'; 2104 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2105 out: 2106 FREE(tmpbuf, M_TEMP); 2107 return (error); 2108 } 2109