1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 */ 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 59 #include "opt_param.h" 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/types.h> 64 #include <sys/queue.h> 65 #include <sys/malloc.h> 66 #include <sys/lock.h> 67 #include <sys/sysctl.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/smp.h> 71 #include <sys/vmmeter.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 #include <vm/uma_dbg.h> 83 84 /* 85 * This is the zone from which all zones are spawned. The idea is that even 86 * the zone heads are allocated from the allocator, so we use the bss section 87 * to bootstrap us. 88 */ 89 static struct uma_zone masterzone; 90 static uma_zone_t zones = &masterzone; 91 92 /* This is the zone from which all of uma_slab_t's are allocated. */ 93 static uma_zone_t slabzone; 94 95 /* 96 * The initial hash tables come out of this zone so they can be allocated 97 * prior to malloc coming up. 98 */ 99 static uma_zone_t hashzone; 100 101 /* 102 * Zone that buckets come from. 103 */ 104 static uma_zone_t bucketzone; 105 106 /* 107 * Are we allowed to allocate buckets? 108 */ 109 static int bucketdisable = 1; 110 111 /* Linked list of all zones in the system */ 112 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 113 114 /* This mutex protects the zone list */ 115 static struct mtx uma_mtx; 116 117 /* Linked list of boot time pages */ 118 static LIST_HEAD(,uma_slab) uma_boot_pages = 119 LIST_HEAD_INITIALIZER(&uma_boot_pages); 120 121 /* Count of free boottime pages */ 122 static int uma_boot_free = 0; 123 124 /* Is the VM done starting up? */ 125 static int booted = 0; 126 127 /* This is the handle used to schedule our working set calculator */ 128 static struct callout uma_callout; 129 130 /* This is mp_maxid + 1, for use while looping over each cpu */ 131 static int maxcpu; 132 133 /* 134 * This structure is passed as the zone ctor arg so that I don't have to create 135 * a special allocation function just for zones. 136 */ 137 struct uma_zctor_args { 138 char *name; 139 size_t size; 140 uma_ctor ctor; 141 uma_dtor dtor; 142 uma_init uminit; 143 uma_fini fini; 144 int align; 145 u_int16_t flags; 146 }; 147 148 /* Prototypes.. */ 149 150 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 151 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 152 static void page_free(void *, int, u_int8_t); 153 static uma_slab_t slab_zalloc(uma_zone_t, int); 154 static void cache_drain(uma_zone_t); 155 static void bucket_drain(uma_zone_t, uma_bucket_t); 156 static void zone_drain(uma_zone_t); 157 static void zone_ctor(void *, int, void *); 158 static void zone_dtor(void *, int, void *); 159 static void zero_init(void *, int); 160 static void zone_small_init(uma_zone_t zone); 161 static void zone_large_init(uma_zone_t zone); 162 static void zone_foreach(void (*zfunc)(uma_zone_t)); 163 static void zone_timeout(uma_zone_t zone); 164 static int hash_alloc(struct uma_hash *); 165 static int hash_expand(struct uma_hash *, struct uma_hash *); 166 static void hash_free(struct uma_hash *hash); 167 static void uma_timeout(void *); 168 static void uma_startup3(void); 169 static void *uma_zalloc_internal(uma_zone_t, void *, int); 170 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 171 static void bucket_enable(void); 172 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 173 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 174 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 175 176 void uma_print_zone(uma_zone_t); 177 void uma_print_stats(void); 178 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 179 180 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 181 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 182 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 183 184 /* 185 * This routine checks to see whether or not it's safe to enable buckets. 186 */ 187 188 static void 189 bucket_enable(void) 190 { 191 if (cnt.v_free_count < cnt.v_free_min) 192 bucketdisable = 1; 193 else 194 bucketdisable = 0; 195 } 196 197 198 /* 199 * Routine called by timeout which is used to fire off some time interval 200 * based calculations. (working set, stats, etc.) 201 * 202 * Arguments: 203 * arg Unused 204 * 205 * Returns: 206 * Nothing 207 */ 208 static void 209 uma_timeout(void *unused) 210 { 211 bucket_enable(); 212 zone_foreach(zone_timeout); 213 214 /* Reschedule this event */ 215 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 216 } 217 218 /* 219 * Routine to perform timeout driven calculations. This does the working set 220 * as well as hash expanding, and per cpu statistics aggregation. 221 * 222 * Arguments: 223 * zone The zone to operate on 224 * 225 * Returns: 226 * Nothing 227 */ 228 static void 229 zone_timeout(uma_zone_t zone) 230 { 231 uma_cache_t cache; 232 u_int64_t alloc; 233 int free; 234 int cpu; 235 236 alloc = 0; 237 free = 0; 238 239 /* 240 * Aggregate per cpu cache statistics back to the zone. 241 * 242 * I may rewrite this to set a flag in the per cpu cache instead of 243 * locking. If the flag is not cleared on the next round I will have 244 * to lock and do it here instead so that the statistics don't get too 245 * far out of sync. 246 */ 247 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 248 for (cpu = 0; cpu < maxcpu; cpu++) { 249 if (CPU_ABSENT(cpu)) 250 continue; 251 CPU_LOCK(zone, cpu); 252 cache = &zone->uz_cpu[cpu]; 253 /* Add them up, and reset */ 254 alloc += cache->uc_allocs; 255 cache->uc_allocs = 0; 256 if (cache->uc_allocbucket) 257 free += cache->uc_allocbucket->ub_ptr + 1; 258 if (cache->uc_freebucket) 259 free += cache->uc_freebucket->ub_ptr + 1; 260 CPU_UNLOCK(zone, cpu); 261 } 262 } 263 264 /* Now push these stats back into the zone.. */ 265 ZONE_LOCK(zone); 266 zone->uz_allocs += alloc; 267 268 /* 269 * cachefree is an instantanious snapshot of what is in the per cpu 270 * caches, not an accurate counter 271 */ 272 zone->uz_cachefree = free; 273 274 /* 275 * Expand the zone hash table. 276 * 277 * This is done if the number of slabs is larger than the hash size. 278 * What I'm trying to do here is completely reduce collisions. This 279 * may be a little aggressive. Should I allow for two collisions max? 280 */ 281 282 if (zone->uz_flags & UMA_ZFLAG_HASH && 283 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 284 struct uma_hash newhash; 285 struct uma_hash oldhash; 286 int ret; 287 288 /* 289 * This is so involved because allocating and freeing 290 * while the zone lock is held will lead to deadlock. 291 * I have to do everything in stages and check for 292 * races. 293 */ 294 newhash = zone->uz_hash; 295 ZONE_UNLOCK(zone); 296 ret = hash_alloc(&newhash); 297 ZONE_LOCK(zone); 298 if (ret) { 299 if (hash_expand(&zone->uz_hash, &newhash)) { 300 oldhash = zone->uz_hash; 301 zone->uz_hash = newhash; 302 } else 303 oldhash = newhash; 304 305 ZONE_UNLOCK(zone); 306 hash_free(&oldhash); 307 ZONE_LOCK(zone); 308 } 309 } 310 311 /* 312 * Here we compute the working set size as the total number of items 313 * left outstanding since the last time interval. This is slightly 314 * suboptimal. What we really want is the highest number of outstanding 315 * items during the last time quantum. This should be close enough. 316 * 317 * The working set size is used to throttle the zone_drain function. 318 * We don't want to return memory that we may need again immediately. 319 */ 320 alloc = zone->uz_allocs - zone->uz_oallocs; 321 zone->uz_oallocs = zone->uz_allocs; 322 zone->uz_wssize = alloc; 323 324 ZONE_UNLOCK(zone); 325 } 326 327 /* 328 * Allocate and zero fill the next sized hash table from the appropriate 329 * backing store. 330 * 331 * Arguments: 332 * hash A new hash structure with the old hash size in uh_hashsize 333 * 334 * Returns: 335 * 1 on sucess and 0 on failure. 336 */ 337 static int 338 hash_alloc(struct uma_hash *hash) 339 { 340 int oldsize; 341 int alloc; 342 343 oldsize = hash->uh_hashsize; 344 345 /* We're just going to go to a power of two greater */ 346 if (oldsize) { 347 hash->uh_hashsize = oldsize * 2; 348 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 349 /* XXX Shouldn't be abusing DEVBUF here */ 350 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 351 M_DEVBUF, M_NOWAIT); 352 } else { 353 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 354 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 355 M_WAITOK); 356 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 357 } 358 if (hash->uh_slab_hash) { 359 bzero(hash->uh_slab_hash, alloc); 360 hash->uh_hashmask = hash->uh_hashsize - 1; 361 return (1); 362 } 363 364 return (0); 365 } 366 367 /* 368 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 369 * to reduce collisions. This must not be done in the regular allocation path, 370 * otherwise, we can recurse on the vm while allocating pages. 371 * 372 * Arguments: 373 * oldhash The hash you want to expand 374 * newhash The hash structure for the new table 375 * 376 * Returns: 377 * Nothing 378 * 379 * Discussion: 380 */ 381 static int 382 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 383 { 384 uma_slab_t slab; 385 int hval; 386 int i; 387 388 if (!newhash->uh_slab_hash) 389 return (0); 390 391 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 392 return (0); 393 394 /* 395 * I need to investigate hash algorithms for resizing without a 396 * full rehash. 397 */ 398 399 for (i = 0; i < oldhash->uh_hashsize; i++) 400 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 401 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 402 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 403 hval = UMA_HASH(newhash, slab->us_data); 404 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 405 slab, us_hlink); 406 } 407 408 return (1); 409 } 410 411 /* 412 * Free the hash bucket to the appropriate backing store. 413 * 414 * Arguments: 415 * slab_hash The hash bucket we're freeing 416 * hashsize The number of entries in that hash bucket 417 * 418 * Returns: 419 * Nothing 420 */ 421 static void 422 hash_free(struct uma_hash *hash) 423 { 424 if (hash->uh_slab_hash == NULL) 425 return; 426 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 427 uma_zfree_internal(hashzone, 428 hash->uh_slab_hash, NULL, 0); 429 else 430 free(hash->uh_slab_hash, M_DEVBUF); 431 } 432 433 /* 434 * Frees all outstanding items in a bucket 435 * 436 * Arguments: 437 * zone The zone to free to, must be unlocked. 438 * bucket The free/alloc bucket with items, cpu queue must be locked. 439 * 440 * Returns: 441 * Nothing 442 */ 443 444 static void 445 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 446 { 447 uma_slab_t slab; 448 int mzone; 449 void *item; 450 451 if (bucket == NULL) 452 return; 453 454 slab = NULL; 455 mzone = 0; 456 457 /* We have to lookup the slab again for malloc.. */ 458 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 459 mzone = 1; 460 461 while (bucket->ub_ptr > -1) { 462 item = bucket->ub_bucket[bucket->ub_ptr]; 463 #ifdef INVARIANTS 464 bucket->ub_bucket[bucket->ub_ptr] = NULL; 465 KASSERT(item != NULL, 466 ("bucket_drain: botched ptr, item is NULL")); 467 #endif 468 bucket->ub_ptr--; 469 /* 470 * This is extremely inefficient. The slab pointer was passed 471 * to uma_zfree_arg, but we lost it because the buckets don't 472 * hold them. This will go away when free() gets a size passed 473 * to it. 474 */ 475 if (mzone) 476 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 477 uma_zfree_internal(zone, item, slab, 1); 478 } 479 } 480 481 /* 482 * Drains the per cpu caches for a zone. 483 * 484 * Arguments: 485 * zone The zone to drain, must be unlocked. 486 * 487 * Returns: 488 * Nothing 489 * 490 * This function returns with the zone locked so that the per cpu queues can 491 * not be filled until zone_drain is finished. 492 * 493 */ 494 static void 495 cache_drain(uma_zone_t zone) 496 { 497 uma_bucket_t bucket; 498 uma_cache_t cache; 499 int cpu; 500 501 /* 502 * Flush out the per cpu queues. 503 * 504 * XXX This causes unnecessary thrashing due to immediately having 505 * empty per cpu queues. I need to improve this. 506 */ 507 508 /* 509 * We have to lock each cpu cache before locking the zone 510 */ 511 ZONE_UNLOCK(zone); 512 513 for (cpu = 0; cpu < maxcpu; cpu++) { 514 if (CPU_ABSENT(cpu)) 515 continue; 516 CPU_LOCK(zone, cpu); 517 cache = &zone->uz_cpu[cpu]; 518 bucket_drain(zone, cache->uc_allocbucket); 519 bucket_drain(zone, cache->uc_freebucket); 520 } 521 522 /* 523 * Drain the bucket queues and free the buckets, we just keep two per 524 * cpu (alloc/free). 525 */ 526 ZONE_LOCK(zone); 527 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 528 LIST_REMOVE(bucket, ub_link); 529 ZONE_UNLOCK(zone); 530 bucket_drain(zone, bucket); 531 uma_zfree_internal(bucketzone, bucket, NULL, 0); 532 ZONE_LOCK(zone); 533 } 534 535 /* Now we do the free queue.. */ 536 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 537 LIST_REMOVE(bucket, ub_link); 538 uma_zfree_internal(bucketzone, bucket, NULL, 0); 539 } 540 541 /* We unlock here, but they will all block until the zone is unlocked */ 542 for (cpu = 0; cpu < maxcpu; cpu++) { 543 if (CPU_ABSENT(cpu)) 544 continue; 545 CPU_UNLOCK(zone, cpu); 546 } 547 548 zone->uz_cachefree = 0; 549 } 550 551 /* 552 * Frees pages from a zone back to the system. This is done on demand from 553 * the pageout daemon. 554 * 555 * Arguments: 556 * zone The zone to free pages from 557 * all Should we drain all items? 558 * 559 * Returns: 560 * Nothing. 561 */ 562 static void 563 zone_drain(uma_zone_t zone) 564 { 565 struct slabhead freeslabs = {}; 566 uma_slab_t slab; 567 uma_slab_t n; 568 u_int64_t extra; 569 u_int8_t flags; 570 u_int8_t *mem; 571 int i; 572 573 /* 574 * We don't want to take pages from staticly allocated zones at this 575 * time 576 */ 577 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 578 return; 579 580 ZONE_LOCK(zone); 581 582 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 583 cache_drain(zone); 584 585 if (zone->uz_free < zone->uz_wssize) 586 goto finished; 587 #ifdef UMA_DEBUG 588 printf("%s working set size: %llu free items: %u\n", 589 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 590 #endif 591 extra = zone->uz_free - zone->uz_wssize; 592 extra /= zone->uz_ipers; 593 594 /* extra is now the number of extra slabs that we can free */ 595 596 if (extra == 0) 597 goto finished; 598 599 slab = LIST_FIRST(&zone->uz_free_slab); 600 while (slab && extra) { 601 n = LIST_NEXT(slab, us_link); 602 603 /* We have no where to free these to */ 604 if (slab->us_flags & UMA_SLAB_BOOT) { 605 slab = n; 606 continue; 607 } 608 609 LIST_REMOVE(slab, us_link); 610 zone->uz_pages -= zone->uz_ppera; 611 zone->uz_free -= zone->uz_ipers; 612 613 if (zone->uz_flags & UMA_ZFLAG_HASH) 614 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 615 616 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 617 618 slab = n; 619 extra--; 620 } 621 finished: 622 ZONE_UNLOCK(zone); 623 624 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 625 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 626 if (zone->uz_fini) 627 for (i = 0; i < zone->uz_ipers; i++) 628 zone->uz_fini( 629 slab->us_data + (zone->uz_rsize * i), 630 zone->uz_size); 631 flags = slab->us_flags; 632 mem = slab->us_data; 633 634 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 635 uma_zfree_internal(slabzone, slab, NULL, 0); 636 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 637 for (i = 0; i < zone->uz_ppera; i++) 638 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 639 kmem_object); 640 #ifdef UMA_DEBUG 641 printf("%s: Returning %d bytes.\n", 642 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 643 #endif 644 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 645 } 646 647 } 648 649 /* 650 * Allocate a new slab for a zone. This does not insert the slab onto a list. 651 * 652 * Arguments: 653 * zone The zone to allocate slabs for 654 * wait Shall we wait? 655 * 656 * Returns: 657 * The slab that was allocated or NULL if there is no memory and the 658 * caller specified M_NOWAIT. 659 * 660 */ 661 static uma_slab_t 662 slab_zalloc(uma_zone_t zone, int wait) 663 { 664 uma_slab_t slab; /* Starting slab */ 665 u_int8_t *mem; 666 u_int8_t flags; 667 int i; 668 669 slab = NULL; 670 671 #ifdef UMA_DEBUG 672 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 673 #endif 674 ZONE_UNLOCK(zone); 675 676 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 677 slab = uma_zalloc_internal(slabzone, NULL, wait); 678 if (slab == NULL) { 679 ZONE_LOCK(zone); 680 return NULL; 681 } 682 } 683 684 /* 685 * This reproduces the old vm_zone behavior of zero filling pages the 686 * first time they are added to a zone. 687 * 688 * Malloced items are zeroed in uma_zalloc. 689 */ 690 691 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 692 wait |= M_ZERO; 693 else 694 wait &= ~M_ZERO; 695 696 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 697 mtx_lock(&Giant); 698 mem = zone->uz_allocf(zone, 699 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 700 mtx_unlock(&Giant); 701 if (mem == NULL) { 702 ZONE_LOCK(zone); 703 return (NULL); 704 } 705 } else { 706 uma_slab_t tmps; 707 708 if (zone->uz_ppera > 1) 709 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 710 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 711 panic("Mallocing before uma_startup2 has been called.\n"); 712 if (uma_boot_free == 0) 713 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 714 tmps = LIST_FIRST(&uma_boot_pages); 715 LIST_REMOVE(tmps, us_link); 716 uma_boot_free--; 717 mem = tmps->us_data; 718 } 719 720 /* Point the slab into the allocated memory */ 721 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 722 slab = (uma_slab_t )(mem + zone->uz_pgoff); 723 724 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 725 for (i = 0; i < zone->uz_ppera; i++) 726 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 727 728 slab->us_zone = zone; 729 slab->us_data = mem; 730 731 /* 732 * This is intended to spread data out across cache lines. 733 * 734 * This code doesn't seem to work properly on x86, and on alpha 735 * it makes absolutely no performance difference. I'm sure it could 736 * use some tuning, but sun makes outrageous claims about it's 737 * performance. 738 */ 739 #if 0 740 if (zone->uz_cachemax) { 741 slab->us_data += zone->uz_cacheoff; 742 zone->uz_cacheoff += UMA_CACHE_INC; 743 if (zone->uz_cacheoff > zone->uz_cachemax) 744 zone->uz_cacheoff = 0; 745 } 746 #endif 747 748 slab->us_freecount = zone->uz_ipers; 749 slab->us_firstfree = 0; 750 slab->us_flags = flags; 751 for (i = 0; i < zone->uz_ipers; i++) 752 slab->us_freelist[i] = i+1; 753 754 if (zone->uz_init) 755 for (i = 0; i < zone->uz_ipers; i++) 756 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 757 zone->uz_size); 758 ZONE_LOCK(zone); 759 760 if (zone->uz_flags & UMA_ZFLAG_HASH) 761 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 762 763 zone->uz_pages += zone->uz_ppera; 764 zone->uz_free += zone->uz_ipers; 765 766 767 return (slab); 768 } 769 770 /* 771 * Allocates a number of pages from the system 772 * 773 * Arguments: 774 * zone Unused 775 * bytes The number of bytes requested 776 * wait Shall we wait? 777 * 778 * Returns: 779 * A pointer to the alloced memory or possibly 780 * NULL if M_NOWAIT is set. 781 */ 782 static void * 783 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 784 { 785 void *p; /* Returned page */ 786 787 *pflag = UMA_SLAB_KMEM; 788 p = (void *) kmem_malloc(kmem_map, bytes, wait); 789 790 return (p); 791 } 792 793 /* 794 * Allocates a number of pages from within an object 795 * 796 * Arguments: 797 * zone Unused 798 * bytes The number of bytes requested 799 * wait Shall we wait? 800 * 801 * Returns: 802 * A pointer to the alloced memory or possibly 803 * NULL if M_NOWAIT is set. 804 * 805 * TODO: If we fail during a multi-page allocation release the pages that have 806 * already been allocated. 807 */ 808 static void * 809 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 810 { 811 vm_offset_t zkva; 812 vm_offset_t retkva; 813 vm_page_t p; 814 int pages; 815 816 retkva = 0; 817 pages = zone->uz_pages; 818 819 /* 820 * This looks a little weird since we're getting one page at a time 821 */ 822 while (bytes > 0) { 823 p = vm_page_alloc(zone->uz_obj, pages, 824 VM_ALLOC_INTERRUPT); 825 if (p == NULL) 826 return (NULL); 827 828 zkva = zone->uz_kva + pages * PAGE_SIZE; 829 if (retkva == 0) 830 retkva = zkva; 831 pmap_qenter(zkva, &p, 1); 832 bytes -= PAGE_SIZE; 833 pages += 1; 834 } 835 836 *flags = UMA_SLAB_PRIV; 837 838 return ((void *)retkva); 839 } 840 841 /* 842 * Frees a number of pages to the system 843 * 844 * Arguments: 845 * mem A pointer to the memory to be freed 846 * size The size of the memory being freed 847 * flags The original p->us_flags field 848 * 849 * Returns: 850 * Nothing 851 * 852 */ 853 static void 854 page_free(void *mem, int size, u_int8_t flags) 855 { 856 vm_map_t map; 857 858 if (flags & UMA_SLAB_KMEM) 859 map = kmem_map; 860 else 861 panic("UMA: page_free used with invalid flags %d\n", flags); 862 863 kmem_free(map, (vm_offset_t)mem, size); 864 } 865 866 /* 867 * Zero fill initializer 868 * 869 * Arguments/Returns follow uma_init specifications 870 * 871 */ 872 static void 873 zero_init(void *mem, int size) 874 { 875 bzero(mem, size); 876 } 877 878 /* 879 * Finish creating a small uma zone. This calculates ipers, and the zone size. 880 * 881 * Arguments 882 * zone The zone we should initialize 883 * 884 * Returns 885 * Nothing 886 */ 887 static void 888 zone_small_init(uma_zone_t zone) 889 { 890 int rsize; 891 int memused; 892 int ipers; 893 894 rsize = zone->uz_size; 895 896 if (rsize < UMA_SMALLEST_UNIT) 897 rsize = UMA_SMALLEST_UNIT; 898 899 if (rsize & zone->uz_align) 900 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 901 902 zone->uz_rsize = rsize; 903 904 rsize += 1; /* Account for the byte of linkage */ 905 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 906 zone->uz_ppera = 1; 907 908 memused = zone->uz_ipers * zone->uz_rsize; 909 910 /* Can we do any better? */ 911 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 912 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 913 return; 914 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 915 if (ipers > zone->uz_ipers) { 916 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 917 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 918 zone->uz_flags |= UMA_ZFLAG_HASH; 919 zone->uz_ipers = ipers; 920 } 921 } 922 923 } 924 925 /* 926 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 927 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 928 * more complicated. 929 * 930 * Arguments 931 * zone The zone we should initialize 932 * 933 * Returns 934 * Nothing 935 */ 936 static void 937 zone_large_init(uma_zone_t zone) 938 { 939 int pages; 940 941 pages = zone->uz_size / UMA_SLAB_SIZE; 942 943 /* Account for remainder */ 944 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 945 pages++; 946 947 zone->uz_ppera = pages; 948 zone->uz_ipers = 1; 949 950 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 951 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 952 zone->uz_flags |= UMA_ZFLAG_HASH; 953 954 zone->uz_rsize = zone->uz_size; 955 } 956 957 /* 958 * Zone header ctor. This initializes all fields, locks, etc. And inserts 959 * the zone onto the global zone list. 960 * 961 * Arguments/Returns follow uma_ctor specifications 962 * udata Actually uma_zcreat_args 963 * 964 */ 965 966 static void 967 zone_ctor(void *mem, int size, void *udata) 968 { 969 struct uma_zctor_args *arg = udata; 970 uma_zone_t zone = mem; 971 int privlc; 972 int cplen; 973 int cpu; 974 975 bzero(zone, size); 976 zone->uz_name = arg->name; 977 zone->uz_size = arg->size; 978 zone->uz_ctor = arg->ctor; 979 zone->uz_dtor = arg->dtor; 980 zone->uz_init = arg->uminit; 981 zone->uz_fini = arg->fini; 982 zone->uz_align = arg->align; 983 zone->uz_free = 0; 984 zone->uz_pages = 0; 985 zone->uz_flags = 0; 986 zone->uz_allocf = page_alloc; 987 zone->uz_freef = page_free; 988 989 if (arg->flags & UMA_ZONE_ZINIT) 990 zone->uz_init = zero_init; 991 992 if (arg->flags & UMA_ZONE_INTERNAL) 993 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 994 995 if (arg->flags & UMA_ZONE_MALLOC) 996 zone->uz_flags |= UMA_ZFLAG_MALLOC; 997 998 if (arg->flags & UMA_ZONE_NOFREE) 999 zone->uz_flags |= UMA_ZFLAG_NOFREE; 1000 1001 if (arg->flags & UMA_ZONE_VM) 1002 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE; 1003 1004 if (zone->uz_size > UMA_SLAB_SIZE) 1005 zone_large_init(zone); 1006 else 1007 zone_small_init(zone); 1008 1009 if (arg->flags & UMA_ZONE_MTXCLASS) 1010 privlc = 1; 1011 else 1012 privlc = 0; 1013 1014 /* We do this so that the per cpu lock name is unique for each zone */ 1015 memcpy(zone->uz_lname, "PCPU ", 5); 1016 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1017 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1018 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1019 1020 /* 1021 * If we're putting the slab header in the actual page we need to 1022 * figure out where in each page it goes. This calculates a right 1023 * justified offset into the memory on a ALIGN_PTR boundary. 1024 */ 1025 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1026 int totsize; 1027 int waste; 1028 1029 /* Size of the slab struct and free list */ 1030 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1031 if (totsize & UMA_ALIGN_PTR) 1032 totsize = (totsize & ~UMA_ALIGN_PTR) + 1033 (UMA_ALIGN_PTR + 1); 1034 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1035 1036 waste = zone->uz_pgoff; 1037 waste -= (zone->uz_ipers * zone->uz_rsize); 1038 1039 /* 1040 * This calculates how much space we have for cache line size 1041 * optimizations. It works by offseting each slab slightly. 1042 * Currently it breaks on x86, and so it is disabled. 1043 */ 1044 1045 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1046 zone->uz_cachemax = waste - UMA_CACHE_INC; 1047 zone->uz_cacheoff = 0; 1048 } 1049 1050 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1051 + zone->uz_ipers; 1052 /* I don't think it's possible, but I'll make sure anyway */ 1053 if (totsize > UMA_SLAB_SIZE) { 1054 printf("zone %s ipers %d rsize %d size %d\n", 1055 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1056 zone->uz_size); 1057 panic("UMA slab won't fit.\n"); 1058 } 1059 } 1060 1061 if (zone->uz_flags & UMA_ZFLAG_HASH) 1062 hash_alloc(&zone->uz_hash); 1063 1064 #ifdef UMA_DEBUG 1065 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1066 zone->uz_name, zone, 1067 zone->uz_size, zone->uz_ipers, 1068 zone->uz_ppera, zone->uz_pgoff); 1069 #endif 1070 ZONE_LOCK_INIT(zone, privlc); 1071 1072 mtx_lock(&uma_mtx); 1073 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1074 mtx_unlock(&uma_mtx); 1075 1076 /* 1077 * Some internal zones don't have room allocated for the per cpu 1078 * caches. If we're internal, bail out here. 1079 */ 1080 1081 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1082 return; 1083 1084 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1085 zone->uz_count = zone->uz_ipers - 1; 1086 else 1087 zone->uz_count = UMA_BUCKET_SIZE - 1; 1088 1089 for (cpu = 0; cpu < maxcpu; cpu++) 1090 CPU_LOCK_INIT(zone, cpu, privlc); 1091 } 1092 1093 /* 1094 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1095 * and removes the zone from the global list. 1096 * 1097 * Arguments/Returns follow uma_dtor specifications 1098 * udata unused 1099 */ 1100 1101 static void 1102 zone_dtor(void *arg, int size, void *udata) 1103 { 1104 uma_zone_t zone; 1105 int cpu; 1106 1107 zone = (uma_zone_t)arg; 1108 1109 ZONE_LOCK(zone); 1110 zone->uz_wssize = 0; 1111 ZONE_UNLOCK(zone); 1112 1113 mtx_lock(&uma_mtx); 1114 LIST_REMOVE(zone, uz_link); 1115 zone_drain(zone); 1116 mtx_unlock(&uma_mtx); 1117 1118 ZONE_LOCK(zone); 1119 if (zone->uz_free != 0) 1120 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1121 zone->uz_name, zone->uz_pages); 1122 1123 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1124 for (cpu = 0; cpu < maxcpu; cpu++) 1125 CPU_LOCK_FINI(zone, cpu); 1126 1127 ZONE_UNLOCK(zone); 1128 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1129 hash_free(&zone->uz_hash); 1130 1131 ZONE_LOCK_FINI(zone); 1132 } 1133 /* 1134 * Traverses every zone in the system and calls a callback 1135 * 1136 * Arguments: 1137 * zfunc A pointer to a function which accepts a zone 1138 * as an argument. 1139 * 1140 * Returns: 1141 * Nothing 1142 */ 1143 static void 1144 zone_foreach(void (*zfunc)(uma_zone_t)) 1145 { 1146 uma_zone_t zone; 1147 1148 mtx_lock(&uma_mtx); 1149 LIST_FOREACH(zone, &uma_zones, uz_link) { 1150 zfunc(zone); 1151 } 1152 mtx_unlock(&uma_mtx); 1153 } 1154 1155 /* Public functions */ 1156 /* See uma.h */ 1157 void 1158 uma_startup(void *bootmem) 1159 { 1160 struct uma_zctor_args args; 1161 uma_slab_t slab; 1162 int slabsize; 1163 int i; 1164 1165 #ifdef UMA_DEBUG 1166 printf("Creating uma zone headers zone.\n"); 1167 #endif 1168 #ifdef SMP 1169 maxcpu = mp_maxid + 1; 1170 #else 1171 maxcpu = 1; 1172 #endif 1173 #ifdef UMA_DEBUG 1174 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1175 Debugger("stop"); 1176 #endif 1177 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1178 /* "manually" Create the initial zone */ 1179 args.name = "UMA Zones"; 1180 args.size = sizeof(struct uma_zone) + 1181 (sizeof(struct uma_cache) * (maxcpu - 1)); 1182 args.ctor = zone_ctor; 1183 args.dtor = zone_dtor; 1184 args.uminit = zero_init; 1185 args.fini = NULL; 1186 args.align = 32 - 1; 1187 args.flags = UMA_ZONE_INTERNAL; 1188 /* The initial zone has no Per cpu queues so it's smaller */ 1189 zone_ctor(zones, sizeof(struct uma_zone), &args); 1190 1191 #ifdef UMA_DEBUG 1192 printf("Filling boot free list.\n"); 1193 #endif 1194 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1195 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1196 slab->us_data = (u_int8_t *)slab; 1197 slab->us_flags = UMA_SLAB_BOOT; 1198 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1199 uma_boot_free++; 1200 } 1201 1202 #ifdef UMA_DEBUG 1203 printf("Creating slab zone.\n"); 1204 #endif 1205 1206 /* 1207 * This is the max number of free list items we'll have with 1208 * offpage slabs. 1209 */ 1210 1211 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1212 slabsize /= UMA_MAX_WASTE; 1213 slabsize++; /* In case there it's rounded */ 1214 slabsize += sizeof(struct uma_slab); 1215 1216 /* Now make a zone for slab headers */ 1217 slabzone = uma_zcreate("UMA Slabs", 1218 slabsize, 1219 NULL, NULL, NULL, NULL, 1220 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1221 1222 hashzone = uma_zcreate("UMA Hash", 1223 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1224 NULL, NULL, NULL, NULL, 1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1226 1227 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1228 NULL, NULL, NULL, NULL, 1229 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1230 1231 1232 #ifdef UMA_DEBUG 1233 printf("UMA startup complete.\n"); 1234 #endif 1235 } 1236 1237 /* see uma.h */ 1238 void 1239 uma_startup2(void) 1240 { 1241 booted = 1; 1242 bucket_enable(); 1243 #ifdef UMA_DEBUG 1244 printf("UMA startup2 complete.\n"); 1245 #endif 1246 } 1247 1248 /* 1249 * Initialize our callout handle 1250 * 1251 */ 1252 1253 static void 1254 uma_startup3(void) 1255 { 1256 #ifdef UMA_DEBUG 1257 printf("Starting callout.\n"); 1258 #endif 1259 callout_init(&uma_callout, 0); 1260 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1261 #ifdef UMA_DEBUG 1262 printf("UMA startup3 complete.\n"); 1263 #endif 1264 } 1265 1266 /* See uma.h */ 1267 uma_zone_t 1268 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1269 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1270 1271 { 1272 struct uma_zctor_args args; 1273 1274 /* This stuff is essential for the zone ctor */ 1275 args.name = name; 1276 args.size = size; 1277 args.ctor = ctor; 1278 args.dtor = dtor; 1279 args.uminit = uminit; 1280 args.fini = fini; 1281 args.align = align; 1282 args.flags = flags; 1283 1284 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1285 } 1286 1287 /* See uma.h */ 1288 void 1289 uma_zdestroy(uma_zone_t zone) 1290 { 1291 uma_zfree_internal(zones, zone, NULL, 0); 1292 } 1293 1294 /* See uma.h */ 1295 void * 1296 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1297 { 1298 void *item; 1299 uma_cache_t cache; 1300 uma_bucket_t bucket; 1301 int cpu; 1302 1303 /* This is the fast path allocation */ 1304 #ifdef UMA_DEBUG_ALLOC_1 1305 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1306 #endif 1307 1308 if (!(flags & M_NOWAIT)) { 1309 KASSERT(curthread->td_intr_nesting_level == 0, 1310 ("malloc(M_WAITOK) in interrupt context")); 1311 WITNESS_SLEEP(1, NULL); 1312 } 1313 1314 zalloc_restart: 1315 cpu = PCPU_GET(cpuid); 1316 CPU_LOCK(zone, cpu); 1317 cache = &zone->uz_cpu[cpu]; 1318 1319 zalloc_start: 1320 bucket = cache->uc_allocbucket; 1321 1322 if (bucket) { 1323 if (bucket->ub_ptr > -1) { 1324 item = bucket->ub_bucket[bucket->ub_ptr]; 1325 #ifdef INVARIANTS 1326 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1327 #endif 1328 bucket->ub_ptr--; 1329 KASSERT(item != NULL, 1330 ("uma_zalloc: Bucket pointer mangled.")); 1331 cache->uc_allocs++; 1332 #ifdef INVARIANTS 1333 uma_dbg_alloc(zone, NULL, item); 1334 #endif 1335 CPU_UNLOCK(zone, cpu); 1336 if (zone->uz_ctor) 1337 zone->uz_ctor(item, zone->uz_size, udata); 1338 if (flags & M_ZERO) 1339 bzero(item, zone->uz_size); 1340 return (item); 1341 } else if (cache->uc_freebucket) { 1342 /* 1343 * We have run out of items in our allocbucket. 1344 * See if we can switch with our free bucket. 1345 */ 1346 if (cache->uc_freebucket->ub_ptr > -1) { 1347 uma_bucket_t swap; 1348 1349 #ifdef UMA_DEBUG_ALLOC 1350 printf("uma_zalloc: Swapping empty with alloc.\n"); 1351 #endif 1352 swap = cache->uc_freebucket; 1353 cache->uc_freebucket = cache->uc_allocbucket; 1354 cache->uc_allocbucket = swap; 1355 1356 goto zalloc_start; 1357 } 1358 } 1359 } 1360 ZONE_LOCK(zone); 1361 /* Since we have locked the zone we may as well send back our stats */ 1362 zone->uz_allocs += cache->uc_allocs; 1363 cache->uc_allocs = 0; 1364 1365 /* Our old one is now a free bucket */ 1366 if (cache->uc_allocbucket) { 1367 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1368 ("uma_zalloc_arg: Freeing a non free bucket.")); 1369 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1370 cache->uc_allocbucket, ub_link); 1371 cache->uc_allocbucket = NULL; 1372 } 1373 1374 /* Check the free list for a new alloc bucket */ 1375 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1376 KASSERT(bucket->ub_ptr != -1, 1377 ("uma_zalloc_arg: Returning an empty bucket.")); 1378 1379 LIST_REMOVE(bucket, ub_link); 1380 cache->uc_allocbucket = bucket; 1381 ZONE_UNLOCK(zone); 1382 goto zalloc_start; 1383 } 1384 /* We are no longer associated with this cpu!!! */ 1385 CPU_UNLOCK(zone, cpu); 1386 1387 /* Bump up our uz_count so we get here less */ 1388 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1389 zone->uz_count++; 1390 1391 /* 1392 * Now lets just fill a bucket and put it on the free list. If that 1393 * works we'll restart the allocation from the begining. 1394 */ 1395 1396 if (uma_zalloc_bucket(zone, flags)) { 1397 ZONE_UNLOCK(zone); 1398 goto zalloc_restart; 1399 } 1400 ZONE_UNLOCK(zone); 1401 /* 1402 * We may not be able to get a bucket so return an actual item. 1403 */ 1404 #ifdef UMA_DEBUG 1405 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1406 #endif 1407 1408 return (uma_zalloc_internal(zone, udata, flags)); 1409 } 1410 1411 static uma_slab_t 1412 uma_zone_slab(uma_zone_t zone, int flags) 1413 { 1414 uma_slab_t slab; 1415 1416 /* 1417 * This is to prevent us from recursively trying to allocate 1418 * buckets. The problem is that if an allocation forces us to 1419 * grab a new bucket we will call page_alloc, which will go off 1420 * and cause the vm to allocate vm_map_entries. If we need new 1421 * buckets there too we will recurse in kmem_alloc and bad 1422 * things happen. So instead we return a NULL bucket, and make 1423 * the code that allocates buckets smart enough to deal with it 1424 */ 1425 if (zone == bucketzone && zone->uz_recurse != 0) 1426 return (NULL); 1427 1428 slab = NULL; 1429 1430 for (;;) { 1431 /* 1432 * Find a slab with some space. Prefer slabs that are partially 1433 * used over those that are totally full. This helps to reduce 1434 * fragmentation. 1435 */ 1436 if (zone->uz_free != 0) { 1437 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1438 slab = LIST_FIRST(&zone->uz_part_slab); 1439 } else { 1440 slab = LIST_FIRST(&zone->uz_free_slab); 1441 LIST_REMOVE(slab, us_link); 1442 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, 1443 us_link); 1444 } 1445 return (slab); 1446 } 1447 1448 /* 1449 * M_NOVM means don't ask at all! 1450 */ 1451 if (flags & M_NOVM) 1452 break; 1453 1454 if (zone->uz_maxpages && 1455 zone->uz_pages >= zone->uz_maxpages) { 1456 zone->uz_flags |= UMA_ZFLAG_FULL; 1457 1458 if (flags & M_WAITOK) 1459 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1460 else 1461 break; 1462 continue; 1463 } 1464 zone->uz_recurse++; 1465 slab = slab_zalloc(zone, flags); 1466 zone->uz_recurse--; 1467 /* 1468 * If we got a slab here it's safe to mark it partially used 1469 * and return. We assume that the caller is going to remove 1470 * at least one item. 1471 */ 1472 if (slab) { 1473 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1474 return (slab); 1475 } 1476 /* 1477 * We might not have been able to get a slab but another cpu 1478 * could have while we were unlocked. Check again before we 1479 * fail. 1480 */ 1481 if ((flags & M_WAITOK) == 0) 1482 flags |= M_NOVM; 1483 } 1484 return (slab); 1485 } 1486 1487 static __inline void * 1488 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 1489 { 1490 void *item; 1491 u_int8_t freei; 1492 1493 freei = slab->us_firstfree; 1494 slab->us_firstfree = slab->us_freelist[freei]; 1495 item = slab->us_data + (zone->uz_rsize * freei); 1496 1497 slab->us_freecount--; 1498 zone->uz_free--; 1499 #ifdef INVARIANTS 1500 uma_dbg_alloc(zone, slab, item); 1501 #endif 1502 /* Move this slab to the full list */ 1503 if (slab->us_freecount == 0) { 1504 LIST_REMOVE(slab, us_link); 1505 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1506 } 1507 1508 return (item); 1509 } 1510 1511 static int 1512 uma_zalloc_bucket(uma_zone_t zone, int flags) 1513 { 1514 uma_bucket_t bucket; 1515 uma_slab_t slab; 1516 1517 /* 1518 * Try this zone's free list first so we don't allocate extra buckets. 1519 */ 1520 1521 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1522 KASSERT(bucket->ub_ptr == -1, 1523 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 1524 LIST_REMOVE(bucket, ub_link); 1525 } else { 1526 int bflags; 1527 1528 bflags = flags; 1529 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1530 bflags |= M_NOVM; 1531 1532 ZONE_UNLOCK(zone); 1533 bucket = uma_zalloc_internal(bucketzone, 1534 NULL, bflags); 1535 ZONE_LOCK(zone); 1536 if (bucket != NULL) { 1537 #ifdef INVARIANTS 1538 bzero(bucket, bucketzone->uz_size); 1539 #endif 1540 bucket->ub_ptr = -1; 1541 } 1542 } 1543 1544 if (bucket == NULL) 1545 return (0); 1546 1547 #ifdef SMP 1548 /* 1549 * This code is here to limit the number of simultaneous bucket fills 1550 * for any given zone to the number of per cpu caches in this zone. This 1551 * is done so that we don't allocate more memory than we really need. 1552 */ 1553 if (zone->uz_fills >= mp_ncpus) 1554 goto done; 1555 1556 #endif 1557 zone->uz_fills++; 1558 1559 /* Try to keep the buckets totally full */ 1560 while ((slab = uma_zone_slab(zone, flags)) != NULL && 1561 bucket->ub_ptr < zone->uz_count) { 1562 while (slab->us_freecount && 1563 bucket->ub_ptr < zone->uz_count) { 1564 bucket->ub_bucket[++bucket->ub_ptr] = 1565 uma_slab_alloc(zone, slab); 1566 } 1567 /* Don't block on the next fill */ 1568 flags |= M_NOWAIT; 1569 flags &= ~M_WAITOK; 1570 } 1571 1572 zone->uz_fills--; 1573 1574 if (bucket->ub_ptr != -1) { 1575 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1576 bucket, ub_link); 1577 return (1); 1578 } 1579 #ifdef SMP 1580 done: 1581 #endif 1582 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1583 1584 return (0); 1585 } 1586 /* 1587 * Allocates an item for an internal zone 1588 * 1589 * Arguments 1590 * zone The zone to alloc for. 1591 * udata The data to be passed to the constructor. 1592 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1593 * 1594 * Returns 1595 * NULL if there is no memory and M_NOWAIT is set 1596 * An item if successful 1597 */ 1598 1599 static void * 1600 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 1601 { 1602 uma_slab_t slab; 1603 void *item; 1604 1605 item = NULL; 1606 1607 /* 1608 * This is to stop us from allocating per cpu buckets while we're 1609 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1610 * boot pages. 1611 */ 1612 1613 if (bucketdisable && zone == bucketzone) 1614 return (NULL); 1615 1616 #ifdef UMA_DEBUG_ALLOC 1617 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1618 #endif 1619 ZONE_LOCK(zone); 1620 1621 slab = uma_zone_slab(zone, flags); 1622 if (slab == NULL) { 1623 ZONE_UNLOCK(zone); 1624 return (NULL); 1625 } 1626 1627 item = uma_slab_alloc(zone, slab); 1628 1629 ZONE_UNLOCK(zone); 1630 1631 if (zone->uz_ctor != NULL) 1632 zone->uz_ctor(item, zone->uz_size, udata); 1633 if (flags & M_ZERO) 1634 bzero(item, zone->uz_size); 1635 1636 return (item); 1637 } 1638 1639 /* See uma.h */ 1640 void 1641 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1642 { 1643 uma_cache_t cache; 1644 uma_bucket_t bucket; 1645 int bflags; 1646 int cpu; 1647 1648 /* This is the fast path free */ 1649 #ifdef UMA_DEBUG_ALLOC_1 1650 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1651 #endif 1652 /* 1653 * The race here is acceptable. If we miss it we'll just have to wait 1654 * a little longer for the limits to be reset. 1655 */ 1656 1657 if (zone->uz_flags & UMA_ZFLAG_FULL) 1658 goto zfree_internal; 1659 1660 if (zone->uz_dtor) 1661 zone->uz_dtor(item, zone->uz_size, udata); 1662 1663 zfree_restart: 1664 cpu = PCPU_GET(cpuid); 1665 CPU_LOCK(zone, cpu); 1666 cache = &zone->uz_cpu[cpu]; 1667 1668 zfree_start: 1669 bucket = cache->uc_freebucket; 1670 1671 if (bucket) { 1672 /* 1673 * Do we have room in our bucket? It is OK for this uz count 1674 * check to be slightly out of sync. 1675 */ 1676 1677 if (bucket->ub_ptr < zone->uz_count) { 1678 bucket->ub_ptr++; 1679 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1680 ("uma_zfree: Freeing to non free bucket index.")); 1681 bucket->ub_bucket[bucket->ub_ptr] = item; 1682 #ifdef INVARIANTS 1683 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1684 uma_dbg_free(zone, udata, item); 1685 else 1686 uma_dbg_free(zone, NULL, item); 1687 #endif 1688 CPU_UNLOCK(zone, cpu); 1689 return; 1690 } else if (cache->uc_allocbucket) { 1691 #ifdef UMA_DEBUG_ALLOC 1692 printf("uma_zfree: Swapping buckets.\n"); 1693 #endif 1694 /* 1695 * We have run out of space in our freebucket. 1696 * See if we can switch with our alloc bucket. 1697 */ 1698 if (cache->uc_allocbucket->ub_ptr < 1699 cache->uc_freebucket->ub_ptr) { 1700 uma_bucket_t swap; 1701 1702 swap = cache->uc_freebucket; 1703 cache->uc_freebucket = cache->uc_allocbucket; 1704 cache->uc_allocbucket = swap; 1705 1706 goto zfree_start; 1707 } 1708 } 1709 } 1710 1711 /* 1712 * We can get here for two reasons: 1713 * 1714 * 1) The buckets are NULL 1715 * 2) The alloc and free buckets are both somewhat full. 1716 * 1717 */ 1718 1719 ZONE_LOCK(zone); 1720 1721 bucket = cache->uc_freebucket; 1722 cache->uc_freebucket = NULL; 1723 1724 /* Can we throw this on the zone full list? */ 1725 if (bucket != NULL) { 1726 #ifdef UMA_DEBUG_ALLOC 1727 printf("uma_zfree: Putting old bucket on the free list.\n"); 1728 #endif 1729 /* ub_ptr is pointing to the last free item */ 1730 KASSERT(bucket->ub_ptr != -1, 1731 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1732 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1733 bucket, ub_link); 1734 } 1735 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1736 LIST_REMOVE(bucket, ub_link); 1737 ZONE_UNLOCK(zone); 1738 cache->uc_freebucket = bucket; 1739 goto zfree_start; 1740 } 1741 /* We're done with this CPU now */ 1742 CPU_UNLOCK(zone, cpu); 1743 1744 /* And the zone.. */ 1745 ZONE_UNLOCK(zone); 1746 1747 #ifdef UMA_DEBUG_ALLOC 1748 printf("uma_zfree: Allocating new free bucket.\n"); 1749 #endif 1750 bflags = M_NOWAIT; 1751 1752 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1753 bflags |= M_NOVM; 1754 #ifdef INVARIANTS 1755 bflags |= M_ZERO; 1756 #endif 1757 bucket = uma_zalloc_internal(bucketzone, 1758 NULL, bflags); 1759 if (bucket) { 1760 bucket->ub_ptr = -1; 1761 ZONE_LOCK(zone); 1762 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1763 bucket, ub_link); 1764 ZONE_UNLOCK(zone); 1765 goto zfree_restart; 1766 } 1767 1768 /* 1769 * If nothing else caught this, we'll just do an internal free. 1770 */ 1771 1772 zfree_internal: 1773 1774 uma_zfree_internal(zone, item, udata, 0); 1775 1776 return; 1777 1778 } 1779 1780 /* 1781 * Frees an item to an INTERNAL zone or allocates a free bucket 1782 * 1783 * Arguments: 1784 * zone The zone to free to 1785 * item The item we're freeing 1786 * udata User supplied data for the dtor 1787 * skip Skip the dtor, it was done in uma_zfree_arg 1788 */ 1789 1790 static void 1791 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1792 { 1793 uma_slab_t slab; 1794 u_int8_t *mem; 1795 u_int8_t freei; 1796 1797 if (!skip && zone->uz_dtor) 1798 zone->uz_dtor(item, zone->uz_size, udata); 1799 1800 ZONE_LOCK(zone); 1801 1802 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1803 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1804 if (zone->uz_flags & UMA_ZFLAG_HASH) 1805 slab = hash_sfind(&zone->uz_hash, mem); 1806 else { 1807 mem += zone->uz_pgoff; 1808 slab = (uma_slab_t)mem; 1809 } 1810 } else { 1811 slab = (uma_slab_t)udata; 1812 } 1813 1814 /* Do we need to remove from any lists? */ 1815 if (slab->us_freecount+1 == zone->uz_ipers) { 1816 LIST_REMOVE(slab, us_link); 1817 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1818 } else if (slab->us_freecount == 0) { 1819 LIST_REMOVE(slab, us_link); 1820 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1821 } 1822 1823 /* Slab management stuff */ 1824 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1825 / zone->uz_rsize; 1826 1827 #ifdef INVARIANTS 1828 if (!skip) 1829 uma_dbg_free(zone, slab, item); 1830 #endif 1831 1832 slab->us_freelist[freei] = slab->us_firstfree; 1833 slab->us_firstfree = freei; 1834 slab->us_freecount++; 1835 1836 /* Zone statistics */ 1837 zone->uz_free++; 1838 1839 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1840 if (zone->uz_pages < zone->uz_maxpages) 1841 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1842 1843 /* We can handle one more allocation */ 1844 wakeup_one(&zone); 1845 } 1846 1847 ZONE_UNLOCK(zone); 1848 } 1849 1850 /* See uma.h */ 1851 void 1852 uma_zone_set_max(uma_zone_t zone, int nitems) 1853 { 1854 ZONE_LOCK(zone); 1855 if (zone->uz_ppera > 1) 1856 zone->uz_maxpages = nitems * zone->uz_ppera; 1857 else 1858 zone->uz_maxpages = nitems / zone->uz_ipers; 1859 1860 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1861 zone->uz_maxpages++; 1862 1863 ZONE_UNLOCK(zone); 1864 } 1865 1866 /* See uma.h */ 1867 void 1868 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1869 { 1870 ZONE_LOCK(zone); 1871 1872 zone->uz_freef = freef; 1873 1874 ZONE_UNLOCK(zone); 1875 } 1876 1877 /* See uma.h */ 1878 void 1879 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1880 { 1881 ZONE_LOCK(zone); 1882 1883 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1884 zone->uz_allocf = allocf; 1885 1886 ZONE_UNLOCK(zone); 1887 } 1888 1889 /* See uma.h */ 1890 int 1891 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1892 { 1893 int pages; 1894 vm_offset_t kva; 1895 1896 mtx_lock(&Giant); 1897 1898 pages = count / zone->uz_ipers; 1899 1900 if (pages * zone->uz_ipers < count) 1901 pages++; 1902 1903 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1904 1905 if (kva == 0) { 1906 mtx_unlock(&Giant); 1907 return (0); 1908 } 1909 1910 1911 if (obj == NULL) 1912 obj = vm_object_allocate(OBJT_DEFAULT, 1913 pages); 1914 else 1915 _vm_object_allocate(OBJT_DEFAULT, 1916 pages, obj); 1917 1918 ZONE_LOCK(zone); 1919 zone->uz_kva = kva; 1920 zone->uz_obj = obj; 1921 zone->uz_maxpages = pages; 1922 1923 zone->uz_allocf = obj_alloc; 1924 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1925 1926 ZONE_UNLOCK(zone); 1927 mtx_unlock(&Giant); 1928 1929 return (1); 1930 } 1931 1932 /* See uma.h */ 1933 void 1934 uma_prealloc(uma_zone_t zone, int items) 1935 { 1936 int slabs; 1937 uma_slab_t slab; 1938 1939 ZONE_LOCK(zone); 1940 slabs = items / zone->uz_ipers; 1941 if (slabs * zone->uz_ipers < items) 1942 slabs++; 1943 1944 while (slabs > 0) { 1945 slab = slab_zalloc(zone, M_WAITOK); 1946 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1947 slabs--; 1948 } 1949 ZONE_UNLOCK(zone); 1950 } 1951 1952 /* See uma.h */ 1953 void 1954 uma_reclaim(void) 1955 { 1956 /* 1957 * You might think that the delay below would improve performance since 1958 * the allocator will give away memory that it may ask for immediately. 1959 * Really, it makes things worse, since cpu cycles are so much cheaper 1960 * than disk activity. 1961 */ 1962 #if 0 1963 static struct timeval tv = {0}; 1964 struct timeval now; 1965 getmicrouptime(&now); 1966 if (now.tv_sec > tv.tv_sec + 30) 1967 tv = now; 1968 else 1969 return; 1970 #endif 1971 #ifdef UMA_DEBUG 1972 printf("UMA: vm asked us to release pages!\n"); 1973 #endif 1974 bucket_enable(); 1975 zone_foreach(zone_drain); 1976 1977 /* 1978 * Some slabs may have been freed but this zone will be visited early 1979 * we visit again so that we can free pages that are empty once other 1980 * zones are drained. We have to do the same for buckets. 1981 */ 1982 zone_drain(slabzone); 1983 zone_drain(bucketzone); 1984 } 1985 1986 void * 1987 uma_large_malloc(int size, int wait) 1988 { 1989 void *mem; 1990 uma_slab_t slab; 1991 u_int8_t flags; 1992 1993 slab = uma_zalloc_internal(slabzone, NULL, wait); 1994 if (slab == NULL) 1995 return (NULL); 1996 1997 mem = page_alloc(NULL, size, &flags, wait); 1998 if (mem) { 1999 vsetslab((vm_offset_t)mem, slab); 2000 slab->us_data = mem; 2001 slab->us_flags = flags | UMA_SLAB_MALLOC; 2002 slab->us_size = size; 2003 } else { 2004 uma_zfree_internal(slabzone, slab, NULL, 0); 2005 } 2006 2007 2008 return (mem); 2009 } 2010 2011 void 2012 uma_large_free(uma_slab_t slab) 2013 { 2014 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2015 page_free(slab->us_data, slab->us_size, slab->us_flags); 2016 uma_zfree_internal(slabzone, slab, NULL, 0); 2017 } 2018 2019 void 2020 uma_print_stats(void) 2021 { 2022 zone_foreach(uma_print_zone); 2023 } 2024 2025 void 2026 uma_print_zone(uma_zone_t zone) 2027 { 2028 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2029 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2030 zone->uz_ipers, zone->uz_ppera, 2031 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2032 } 2033 2034 /* 2035 * Sysctl handler for vm.zone 2036 * 2037 * stolen from vm_zone.c 2038 */ 2039 static int 2040 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2041 { 2042 int error, len, cnt; 2043 const int linesize = 128; /* conservative */ 2044 int totalfree; 2045 char *tmpbuf, *offset; 2046 uma_zone_t z; 2047 char *p; 2048 2049 cnt = 0; 2050 mtx_lock(&uma_mtx); 2051 LIST_FOREACH(z, &uma_zones, uz_link) 2052 cnt++; 2053 mtx_unlock(&uma_mtx); 2054 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2055 M_TEMP, M_WAITOK); 2056 len = snprintf(tmpbuf, linesize, 2057 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2058 if (cnt == 0) 2059 tmpbuf[len - 1] = '\0'; 2060 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2061 if (error || cnt == 0) 2062 goto out; 2063 offset = tmpbuf; 2064 mtx_lock(&uma_mtx); 2065 LIST_FOREACH(z, &uma_zones, uz_link) { 2066 if (cnt == 0) /* list may have changed size */ 2067 break; 2068 ZONE_LOCK(z); 2069 totalfree = z->uz_free + z->uz_cachefree; 2070 len = snprintf(offset, linesize, 2071 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2072 z->uz_name, z->uz_size, 2073 z->uz_maxpages * z->uz_ipers, 2074 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2075 totalfree, 2076 (unsigned long long)z->uz_allocs); 2077 ZONE_UNLOCK(z); 2078 for (p = offset + 12; p > offset && *p == ' '; --p) 2079 /* nothing */ ; 2080 p[1] = ':'; 2081 cnt--; 2082 offset += len; 2083 } 2084 mtx_unlock(&uma_mtx); 2085 *offset++ = '\0'; 2086 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2087 out: 2088 FREE(tmpbuf, M_TEMP); 2089 return (error); 2090 } 2091