1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Investigate cache size adjustments 49 */ 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 59 #include "opt_param.h" 60 #include <sys/param.h> 61 #include <sys/systm.h> 62 #include <sys/kernel.h> 63 #include <sys/types.h> 64 #include <sys/queue.h> 65 #include <sys/malloc.h> 66 #include <sys/lock.h> 67 #include <sys/sysctl.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/smp.h> 71 #include <sys/vmmeter.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 #include <vm/uma_dbg.h> 83 84 /* 85 * This is the zone from which all zones are spawned. The idea is that even 86 * the zone heads are allocated from the allocator, so we use the bss section 87 * to bootstrap us. 88 */ 89 static struct uma_zone masterzone; 90 static uma_zone_t zones = &masterzone; 91 92 /* This is the zone from which all of uma_slab_t's are allocated. */ 93 static uma_zone_t slabzone; 94 95 /* 96 * The initial hash tables come out of this zone so they can be allocated 97 * prior to malloc coming up. 98 */ 99 static uma_zone_t hashzone; 100 101 /* 102 * Zone that buckets come from. 103 */ 104 static uma_zone_t bucketzone; 105 106 /* 107 * Are we allowed to allocate buckets? 108 */ 109 static int bucketdisable = 1; 110 111 /* Linked list of all zones in the system */ 112 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 113 114 /* This mutex protects the zone list */ 115 static struct mtx uma_mtx; 116 117 /* Linked list of boot time pages */ 118 static LIST_HEAD(,uma_slab) uma_boot_pages = 119 LIST_HEAD_INITIALIZER(&uma_boot_pages); 120 121 /* Count of free boottime pages */ 122 static int uma_boot_free = 0; 123 124 /* Is the VM done starting up? */ 125 static int booted = 0; 126 127 /* This is the handle used to schedule our working set calculator */ 128 static struct callout uma_callout; 129 130 /* This is mp_maxid + 1, for use while looping over each cpu */ 131 static int maxcpu; 132 133 /* 134 * This structure is passed as the zone ctor arg so that I don't have to create 135 * a special allocation function just for zones. 136 */ 137 struct uma_zctor_args { 138 char *name; 139 size_t size; 140 uma_ctor ctor; 141 uma_dtor dtor; 142 uma_init uminit; 143 uma_fini fini; 144 int align; 145 u_int16_t flags; 146 }; 147 148 /* Prototypes.. */ 149 150 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 151 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 152 static void page_free(void *, int, u_int8_t); 153 static uma_slab_t slab_zalloc(uma_zone_t, int); 154 static void cache_drain(uma_zone_t); 155 static void bucket_drain(uma_zone_t, uma_bucket_t); 156 static void zone_drain(uma_zone_t); 157 static void zone_ctor(void *, int, void *); 158 static void zone_dtor(void *, int, void *); 159 static void zero_init(void *, int); 160 static void zone_small_init(uma_zone_t zone); 161 static void zone_large_init(uma_zone_t zone); 162 static void zone_foreach(void (*zfunc)(uma_zone_t)); 163 static void zone_timeout(uma_zone_t zone); 164 static int hash_alloc(struct uma_hash *); 165 static int hash_expand(struct uma_hash *, struct uma_hash *); 166 static void hash_free(struct uma_hash *hash); 167 static void uma_timeout(void *); 168 static void uma_startup3(void); 169 static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t); 170 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 171 static void bucket_enable(void); 172 void uma_print_zone(uma_zone_t); 173 void uma_print_stats(void); 174 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 175 176 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 177 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 178 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 179 180 /* 181 * This routine checks to see whether or not it's safe to enable buckets. 182 */ 183 184 static void 185 bucket_enable(void) 186 { 187 if (cnt.v_free_count < cnt.v_free_min) 188 bucketdisable = 1; 189 else 190 bucketdisable = 0; 191 } 192 193 194 /* 195 * Routine called by timeout which is used to fire off some time interval 196 * based calculations. (working set, stats, etc.) 197 * 198 * Arguments: 199 * arg Unused 200 * 201 * Returns: 202 * Nothing 203 */ 204 static void 205 uma_timeout(void *unused) 206 { 207 bucket_enable(); 208 zone_foreach(zone_timeout); 209 210 /* Reschedule this event */ 211 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 212 } 213 214 /* 215 * Routine to perform timeout driven calculations. This does the working set 216 * as well as hash expanding, and per cpu statistics aggregation. 217 * 218 * Arguments: 219 * zone The zone to operate on 220 * 221 * Returns: 222 * Nothing 223 */ 224 static void 225 zone_timeout(uma_zone_t zone) 226 { 227 uma_cache_t cache; 228 u_int64_t alloc; 229 int free; 230 int cpu; 231 232 alloc = 0; 233 free = 0; 234 235 /* 236 * Aggregate per cpu cache statistics back to the zone. 237 * 238 * I may rewrite this to set a flag in the per cpu cache instead of 239 * locking. If the flag is not cleared on the next round I will have 240 * to lock and do it here instead so that the statistics don't get too 241 * far out of sync. 242 */ 243 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 244 for (cpu = 0; cpu < maxcpu; cpu++) { 245 if (CPU_ABSENT(cpu)) 246 continue; 247 CPU_LOCK(zone, cpu); 248 cache = &zone->uz_cpu[cpu]; 249 /* Add them up, and reset */ 250 alloc += cache->uc_allocs; 251 cache->uc_allocs = 0; 252 if (cache->uc_allocbucket) 253 free += cache->uc_allocbucket->ub_ptr + 1; 254 if (cache->uc_freebucket) 255 free += cache->uc_freebucket->ub_ptr + 1; 256 CPU_UNLOCK(zone, cpu); 257 } 258 } 259 260 /* Now push these stats back into the zone.. */ 261 ZONE_LOCK(zone); 262 zone->uz_allocs += alloc; 263 264 /* 265 * cachefree is an instantanious snapshot of what is in the per cpu 266 * caches, not an accurate counter 267 */ 268 zone->uz_cachefree = free; 269 270 /* 271 * Expand the zone hash table. 272 * 273 * This is done if the number of slabs is larger than the hash size. 274 * What I'm trying to do here is completely reduce collisions. This 275 * may be a little aggressive. Should I allow for two collisions max? 276 */ 277 278 if (zone->uz_flags & UMA_ZFLAG_HASH && 279 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) { 280 struct uma_hash newhash; 281 struct uma_hash oldhash; 282 int ret; 283 284 /* 285 * This is so involved because allocating and freeing 286 * while the zone lock is held will lead to deadlock. 287 * I have to do everything in stages and check for 288 * races. 289 */ 290 newhash = zone->uz_hash; 291 ZONE_UNLOCK(zone); 292 ret = hash_alloc(&newhash); 293 ZONE_LOCK(zone); 294 if (ret) { 295 if (hash_expand(&zone->uz_hash, &newhash)) { 296 oldhash = zone->uz_hash; 297 zone->uz_hash = newhash; 298 } else 299 oldhash = newhash; 300 301 ZONE_UNLOCK(zone); 302 hash_free(&oldhash); 303 ZONE_LOCK(zone); 304 } 305 } 306 307 /* 308 * Here we compute the working set size as the total number of items 309 * left outstanding since the last time interval. This is slightly 310 * suboptimal. What we really want is the highest number of outstanding 311 * items during the last time quantum. This should be close enough. 312 * 313 * The working set size is used to throttle the zone_drain function. 314 * We don't want to return memory that we may need again immediately. 315 */ 316 alloc = zone->uz_allocs - zone->uz_oallocs; 317 zone->uz_oallocs = zone->uz_allocs; 318 zone->uz_wssize = alloc; 319 320 ZONE_UNLOCK(zone); 321 } 322 323 /* 324 * Allocate and zero fill the next sized hash table from the appropriate 325 * backing store. 326 * 327 * Arguments: 328 * hash A new hash structure with the old hash size in uh_hashsize 329 * 330 * Returns: 331 * 1 on sucess and 0 on failure. 332 */ 333 static int 334 hash_alloc(struct uma_hash *hash) 335 { 336 int oldsize; 337 int alloc; 338 339 oldsize = hash->uh_hashsize; 340 341 /* We're just going to go to a power of two greater */ 342 if (oldsize) { 343 hash->uh_hashsize = oldsize * 2; 344 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 345 /* XXX Shouldn't be abusing DEVBUF here */ 346 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 347 M_DEVBUF, M_NOWAIT); 348 } else { 349 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 350 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 351 M_WAITOK, NULL); 352 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 353 } 354 if (hash->uh_slab_hash) { 355 bzero(hash->uh_slab_hash, alloc); 356 hash->uh_hashmask = hash->uh_hashsize - 1; 357 return (1); 358 } 359 360 return (0); 361 } 362 363 /* 364 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 365 * to reduce collisions. This must not be done in the regular allocation path, 366 * otherwise, we can recurse on the vm while allocating pages. 367 * 368 * Arguments: 369 * oldhash The hash you want to expand 370 * newhash The hash structure for the new table 371 * 372 * Returns: 373 * Nothing 374 * 375 * Discussion: 376 */ 377 static int 378 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 379 { 380 uma_slab_t slab; 381 int hval; 382 int i; 383 384 if (!newhash->uh_slab_hash) 385 return (0); 386 387 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 388 return (0); 389 390 /* 391 * I need to investigate hash algorithms for resizing without a 392 * full rehash. 393 */ 394 395 for (i = 0; i < oldhash->uh_hashsize; i++) 396 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 397 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 398 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 399 hval = UMA_HASH(newhash, slab->us_data); 400 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 401 slab, us_hlink); 402 } 403 404 return (1); 405 } 406 407 /* 408 * Free the hash bucket to the appropriate backing store. 409 * 410 * Arguments: 411 * slab_hash The hash bucket we're freeing 412 * hashsize The number of entries in that hash bucket 413 * 414 * Returns: 415 * Nothing 416 */ 417 static void 418 hash_free(struct uma_hash *hash) 419 { 420 if (hash->uh_slab_hash == NULL) 421 return; 422 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 423 uma_zfree_internal(hashzone, 424 hash->uh_slab_hash, NULL, 0); 425 else 426 free(hash->uh_slab_hash, M_DEVBUF); 427 } 428 429 /* 430 * Frees all outstanding items in a bucket 431 * 432 * Arguments: 433 * zone The zone to free to, must be unlocked. 434 * bucket The free/alloc bucket with items, cpu queue must be locked. 435 * 436 * Returns: 437 * Nothing 438 */ 439 440 static void 441 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 442 { 443 uma_slab_t slab; 444 int mzone; 445 void *item; 446 447 if (bucket == NULL) 448 return; 449 450 slab = NULL; 451 mzone = 0; 452 453 /* We have to lookup the slab again for malloc.. */ 454 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 455 mzone = 1; 456 457 while (bucket->ub_ptr > -1) { 458 item = bucket->ub_bucket[bucket->ub_ptr]; 459 #ifdef INVARIANTS 460 bucket->ub_bucket[bucket->ub_ptr] = NULL; 461 KASSERT(item != NULL, 462 ("bucket_drain: botched ptr, item is NULL")); 463 #endif 464 bucket->ub_ptr--; 465 /* 466 * This is extremely inefficient. The slab pointer was passed 467 * to uma_zfree_arg, but we lost it because the buckets don't 468 * hold them. This will go away when free() gets a size passed 469 * to it. 470 */ 471 if (mzone) 472 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 473 uma_zfree_internal(zone, item, slab, 1); 474 } 475 } 476 477 /* 478 * Drains the per cpu caches for a zone. 479 * 480 * Arguments: 481 * zone The zone to drain, must be unlocked. 482 * 483 * Returns: 484 * Nothing 485 * 486 * This function returns with the zone locked so that the per cpu queues can 487 * not be filled until zone_drain is finished. 488 * 489 */ 490 static void 491 cache_drain(uma_zone_t zone) 492 { 493 uma_bucket_t bucket; 494 uma_cache_t cache; 495 int cpu; 496 497 /* 498 * Flush out the per cpu queues. 499 * 500 * XXX This causes unnecessary thrashing due to immediately having 501 * empty per cpu queues. I need to improve this. 502 */ 503 504 /* 505 * We have to lock each cpu cache before locking the zone 506 */ 507 ZONE_UNLOCK(zone); 508 509 for (cpu = 0; cpu < maxcpu; cpu++) { 510 if (CPU_ABSENT(cpu)) 511 continue; 512 CPU_LOCK(zone, cpu); 513 cache = &zone->uz_cpu[cpu]; 514 bucket_drain(zone, cache->uc_allocbucket); 515 bucket_drain(zone, cache->uc_freebucket); 516 } 517 518 /* 519 * Drain the bucket queues and free the buckets, we just keep two per 520 * cpu (alloc/free). 521 */ 522 ZONE_LOCK(zone); 523 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 524 LIST_REMOVE(bucket, ub_link); 525 ZONE_UNLOCK(zone); 526 bucket_drain(zone, bucket); 527 uma_zfree_internal(bucketzone, bucket, NULL, 0); 528 ZONE_LOCK(zone); 529 } 530 531 /* Now we do the free queue.. */ 532 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 533 LIST_REMOVE(bucket, ub_link); 534 uma_zfree_internal(bucketzone, bucket, NULL, 0); 535 } 536 537 /* We unlock here, but they will all block until the zone is unlocked */ 538 for (cpu = 0; cpu < maxcpu; cpu++) { 539 if (CPU_ABSENT(cpu)) 540 continue; 541 CPU_UNLOCK(zone, cpu); 542 } 543 544 zone->uz_cachefree = 0; 545 } 546 547 /* 548 * Frees pages from a zone back to the system. This is done on demand from 549 * the pageout daemon. 550 * 551 * Arguments: 552 * zone The zone to free pages from 553 * all Should we drain all items? 554 * 555 * Returns: 556 * Nothing. 557 */ 558 static void 559 zone_drain(uma_zone_t zone) 560 { 561 struct slabhead freeslabs = {}; 562 uma_slab_t slab; 563 uma_slab_t n; 564 u_int64_t extra; 565 u_int8_t flags; 566 u_int8_t *mem; 567 int i; 568 569 /* 570 * We don't want to take pages from staticly allocated zones at this 571 * time 572 */ 573 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 574 return; 575 576 ZONE_LOCK(zone); 577 578 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 579 cache_drain(zone); 580 581 if (zone->uz_free < zone->uz_wssize) 582 goto finished; 583 #ifdef UMA_DEBUG 584 printf("%s working set size: %llu free items: %u\n", 585 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 586 #endif 587 extra = zone->uz_free - zone->uz_wssize; 588 extra /= zone->uz_ipers; 589 590 /* extra is now the number of extra slabs that we can free */ 591 592 if (extra == 0) 593 goto finished; 594 595 slab = LIST_FIRST(&zone->uz_free_slab); 596 while (slab && extra) { 597 n = LIST_NEXT(slab, us_link); 598 599 /* We have no where to free these to */ 600 if (slab->us_flags & UMA_SLAB_BOOT) { 601 slab = n; 602 continue; 603 } 604 605 LIST_REMOVE(slab, us_link); 606 zone->uz_pages -= zone->uz_ppera; 607 zone->uz_free -= zone->uz_ipers; 608 609 if (zone->uz_flags & UMA_ZFLAG_HASH) 610 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data); 611 612 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 613 614 slab = n; 615 extra--; 616 } 617 finished: 618 ZONE_UNLOCK(zone); 619 620 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 621 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 622 if (zone->uz_fini) 623 for (i = 0; i < zone->uz_ipers; i++) 624 zone->uz_fini( 625 slab->us_data + (zone->uz_rsize * i), 626 zone->uz_size); 627 flags = slab->us_flags; 628 mem = slab->us_data; 629 630 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 631 uma_zfree_internal(slabzone, slab, NULL, 0); 632 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 633 for (i = 0; i < zone->uz_ppera; i++) 634 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 635 kmem_object); 636 #ifdef UMA_DEBUG 637 printf("%s: Returning %d bytes.\n", 638 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 639 #endif 640 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 641 } 642 643 } 644 645 /* 646 * Allocate a new slab for a zone. This does not insert the slab onto a list. 647 * 648 * Arguments: 649 * zone The zone to allocate slabs for 650 * wait Shall we wait? 651 * 652 * Returns: 653 * The slab that was allocated or NULL if there is no memory and the 654 * caller specified M_NOWAIT. 655 * 656 */ 657 static uma_slab_t 658 slab_zalloc(uma_zone_t zone, int wait) 659 { 660 uma_slab_t slab; /* Starting slab */ 661 u_int8_t *mem; 662 u_int8_t flags; 663 int i; 664 665 slab = NULL; 666 667 #ifdef UMA_DEBUG 668 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 669 #endif 670 ZONE_UNLOCK(zone); 671 672 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 673 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 674 if (slab == NULL) { 675 ZONE_LOCK(zone); 676 return NULL; 677 } 678 } 679 680 /* 681 * This reproduces the old vm_zone behavior of zero filling pages the 682 * first time they are added to a zone. 683 * 684 * Malloced items are zeroed in uma_zalloc. 685 */ 686 687 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 688 wait |= M_ZERO; 689 else 690 wait &= ~M_ZERO; 691 692 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 693 mtx_lock(&Giant); 694 mem = zone->uz_allocf(zone, 695 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 696 mtx_unlock(&Giant); 697 if (mem == NULL) { 698 ZONE_LOCK(zone); 699 return (NULL); 700 } 701 } else { 702 uma_slab_t tmps; 703 704 if (zone->uz_ppera > 1) 705 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 706 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 707 panic("Mallocing before uma_startup2 has been called.\n"); 708 if (uma_boot_free == 0) 709 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 710 tmps = LIST_FIRST(&uma_boot_pages); 711 LIST_REMOVE(tmps, us_link); 712 uma_boot_free--; 713 mem = tmps->us_data; 714 } 715 716 /* Point the slab into the allocated memory */ 717 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) 718 slab = (uma_slab_t )(mem + zone->uz_pgoff); 719 720 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 721 for (i = 0; i < zone->uz_ppera; i++) 722 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 723 724 slab->us_zone = zone; 725 slab->us_data = mem; 726 727 /* 728 * This is intended to spread data out across cache lines. 729 * 730 * This code doesn't seem to work properly on x86, and on alpha 731 * it makes absolutely no performance difference. I'm sure it could 732 * use some tuning, but sun makes outrageous claims about it's 733 * performance. 734 */ 735 #if 0 736 if (zone->uz_cachemax) { 737 slab->us_data += zone->uz_cacheoff; 738 zone->uz_cacheoff += UMA_CACHE_INC; 739 if (zone->uz_cacheoff > zone->uz_cachemax) 740 zone->uz_cacheoff = 0; 741 } 742 #endif 743 744 slab->us_freecount = zone->uz_ipers; 745 slab->us_firstfree = 0; 746 slab->us_flags = flags; 747 for (i = 0; i < zone->uz_ipers; i++) 748 slab->us_freelist[i] = i+1; 749 750 if (zone->uz_init) 751 for (i = 0; i < zone->uz_ipers; i++) 752 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 753 zone->uz_size); 754 ZONE_LOCK(zone); 755 756 if (zone->uz_flags & UMA_ZFLAG_HASH) 757 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 758 759 zone->uz_pages += zone->uz_ppera; 760 zone->uz_free += zone->uz_ipers; 761 762 763 return (slab); 764 } 765 766 /* 767 * Allocates a number of pages from the system 768 * 769 * Arguments: 770 * zone Unused 771 * bytes The number of bytes requested 772 * wait Shall we wait? 773 * 774 * Returns: 775 * A pointer to the alloced memory or possibly 776 * NULL if M_NOWAIT is set. 777 */ 778 static void * 779 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 780 { 781 void *p; /* Returned page */ 782 783 *pflag = UMA_SLAB_KMEM; 784 p = (void *) kmem_malloc(kmem_map, bytes, wait); 785 786 return (p); 787 } 788 789 /* 790 * Allocates a number of pages from within an object 791 * 792 * Arguments: 793 * zone Unused 794 * bytes The number of bytes requested 795 * wait Shall we wait? 796 * 797 * Returns: 798 * A pointer to the alloced memory or possibly 799 * NULL if M_NOWAIT is set. 800 * 801 * TODO: If we fail during a multi-page allocation release the pages that have 802 * already been allocated. 803 */ 804 static void * 805 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 806 { 807 vm_offset_t zkva; 808 vm_offset_t retkva; 809 vm_page_t p; 810 int pages; 811 812 retkva = 0; 813 pages = zone->uz_pages; 814 815 /* 816 * This looks a little weird since we're getting one page at a time 817 */ 818 while (bytes > 0) { 819 p = vm_page_alloc(zone->uz_obj, pages, 820 VM_ALLOC_INTERRUPT); 821 if (p == NULL) 822 return (NULL); 823 824 zkva = zone->uz_kva + pages * PAGE_SIZE; 825 if (retkva == 0) 826 retkva = zkva; 827 pmap_qenter(zkva, &p, 1); 828 bytes -= PAGE_SIZE; 829 pages += 1; 830 } 831 832 *flags = UMA_SLAB_PRIV; 833 834 return ((void *)retkva); 835 } 836 837 /* 838 * Frees a number of pages to the system 839 * 840 * Arguments: 841 * mem A pointer to the memory to be freed 842 * size The size of the memory being freed 843 * flags The original p->us_flags field 844 * 845 * Returns: 846 * Nothing 847 * 848 */ 849 static void 850 page_free(void *mem, int size, u_int8_t flags) 851 { 852 vm_map_t map; 853 854 if (flags & UMA_SLAB_KMEM) 855 map = kmem_map; 856 else 857 panic("UMA: page_free used with invalid flags %d\n", flags); 858 859 kmem_free(map, (vm_offset_t)mem, size); 860 } 861 862 /* 863 * Zero fill initializer 864 * 865 * Arguments/Returns follow uma_init specifications 866 * 867 */ 868 static void 869 zero_init(void *mem, int size) 870 { 871 bzero(mem, size); 872 } 873 874 /* 875 * Finish creating a small uma zone. This calculates ipers, and the zone size. 876 * 877 * Arguments 878 * zone The zone we should initialize 879 * 880 * Returns 881 * Nothing 882 */ 883 static void 884 zone_small_init(uma_zone_t zone) 885 { 886 int rsize; 887 int memused; 888 int ipers; 889 890 rsize = zone->uz_size; 891 892 if (rsize < UMA_SMALLEST_UNIT) 893 rsize = UMA_SMALLEST_UNIT; 894 895 if (rsize & zone->uz_align) 896 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 897 898 zone->uz_rsize = rsize; 899 900 rsize += 1; /* Account for the byte of linkage */ 901 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 902 zone->uz_ppera = 1; 903 904 memused = zone->uz_ipers * zone->uz_rsize; 905 906 /* Can we do any better? */ 907 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 908 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 909 return; 910 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 911 if (ipers > zone->uz_ipers) { 912 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 913 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 914 zone->uz_flags |= UMA_ZFLAG_HASH; 915 zone->uz_ipers = ipers; 916 } 917 } 918 919 } 920 921 /* 922 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 923 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 924 * more complicated. 925 * 926 * Arguments 927 * zone The zone we should initialize 928 * 929 * Returns 930 * Nothing 931 */ 932 static void 933 zone_large_init(uma_zone_t zone) 934 { 935 int pages; 936 937 pages = zone->uz_size / UMA_SLAB_SIZE; 938 939 /* Account for remainder */ 940 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 941 pages++; 942 943 zone->uz_ppera = pages; 944 zone->uz_ipers = 1; 945 946 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 947 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0) 948 zone->uz_flags |= UMA_ZFLAG_HASH; 949 950 zone->uz_rsize = zone->uz_size; 951 } 952 953 /* 954 * Zone header ctor. This initializes all fields, locks, etc. And inserts 955 * the zone onto the global zone list. 956 * 957 * Arguments/Returns follow uma_ctor specifications 958 * udata Actually uma_zcreat_args 959 * 960 */ 961 962 static void 963 zone_ctor(void *mem, int size, void *udata) 964 { 965 struct uma_zctor_args *arg = udata; 966 uma_zone_t zone = mem; 967 int privlc; 968 int cplen; 969 int cpu; 970 971 bzero(zone, size); 972 zone->uz_name = arg->name; 973 zone->uz_size = arg->size; 974 zone->uz_ctor = arg->ctor; 975 zone->uz_dtor = arg->dtor; 976 zone->uz_init = arg->uminit; 977 zone->uz_fini = arg->fini; 978 zone->uz_align = arg->align; 979 zone->uz_free = 0; 980 zone->uz_pages = 0; 981 zone->uz_flags = 0; 982 zone->uz_allocf = page_alloc; 983 zone->uz_freef = page_free; 984 985 if (arg->flags & UMA_ZONE_ZINIT) 986 zone->uz_init = zero_init; 987 988 if (arg->flags & UMA_ZONE_INTERNAL) 989 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 990 991 if (arg->flags & UMA_ZONE_MALLOC) 992 zone->uz_flags |= UMA_ZFLAG_MALLOC; 993 994 if (arg->flags & UMA_ZONE_NOFREE) 995 zone->uz_flags |= UMA_ZFLAG_NOFREE; 996 997 if (arg->flags & UMA_ZONE_VM) 998 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE; 999 1000 if (zone->uz_size > UMA_SLAB_SIZE) 1001 zone_large_init(zone); 1002 else 1003 zone_small_init(zone); 1004 1005 if (arg->flags & UMA_ZONE_MTXCLASS) 1006 privlc = 1; 1007 else 1008 privlc = 0; 1009 1010 /* We do this so that the per cpu lock name is unique for each zone */ 1011 memcpy(zone->uz_lname, "PCPU ", 5); 1012 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 1013 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 1014 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 1015 1016 /* 1017 * If we're putting the slab header in the actual page we need to 1018 * figure out where in each page it goes. This calculates a right 1019 * justified offset into the memory on a ALIGN_PTR boundary. 1020 */ 1021 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 1022 int totsize; 1023 int waste; 1024 1025 /* Size of the slab struct and free list */ 1026 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 1027 if (totsize & UMA_ALIGN_PTR) 1028 totsize = (totsize & ~UMA_ALIGN_PTR) + 1029 (UMA_ALIGN_PTR + 1); 1030 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 1031 1032 waste = zone->uz_pgoff; 1033 waste -= (zone->uz_ipers * zone->uz_rsize); 1034 1035 /* 1036 * This calculates how much space we have for cache line size 1037 * optimizations. It works by offseting each slab slightly. 1038 * Currently it breaks on x86, and so it is disabled. 1039 */ 1040 1041 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 1042 zone->uz_cachemax = waste - UMA_CACHE_INC; 1043 zone->uz_cacheoff = 0; 1044 } 1045 1046 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 1047 + zone->uz_ipers; 1048 /* I don't think it's possible, but I'll make sure anyway */ 1049 if (totsize > UMA_SLAB_SIZE) { 1050 printf("zone %s ipers %d rsize %d size %d\n", 1051 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 1052 zone->uz_size); 1053 panic("UMA slab won't fit.\n"); 1054 } 1055 } 1056 1057 if (zone->uz_flags & UMA_ZFLAG_HASH) 1058 hash_alloc(&zone->uz_hash); 1059 1060 #ifdef UMA_DEBUG 1061 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1062 zone->uz_name, zone, 1063 zone->uz_size, zone->uz_ipers, 1064 zone->uz_ppera, zone->uz_pgoff); 1065 #endif 1066 ZONE_LOCK_INIT(zone, privlc); 1067 1068 mtx_lock(&uma_mtx); 1069 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 1070 mtx_unlock(&uma_mtx); 1071 1072 /* 1073 * Some internal zones don't have room allocated for the per cpu 1074 * caches. If we're internal, bail out here. 1075 */ 1076 1077 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 1078 return; 1079 1080 if (zone->uz_ipers < UMA_BUCKET_SIZE) 1081 zone->uz_count = zone->uz_ipers - 1; 1082 else 1083 zone->uz_count = UMA_BUCKET_SIZE - 1; 1084 1085 for (cpu = 0; cpu < maxcpu; cpu++) 1086 CPU_LOCK_INIT(zone, cpu, privlc); 1087 } 1088 1089 /* 1090 * Zone header dtor. This frees all data, destroys locks, frees the hash table 1091 * and removes the zone from the global list. 1092 * 1093 * Arguments/Returns follow uma_dtor specifications 1094 * udata unused 1095 */ 1096 1097 static void 1098 zone_dtor(void *arg, int size, void *udata) 1099 { 1100 uma_zone_t zone; 1101 int cpu; 1102 1103 zone = (uma_zone_t)arg; 1104 1105 ZONE_LOCK(zone); 1106 zone->uz_wssize = 0; 1107 ZONE_UNLOCK(zone); 1108 1109 mtx_lock(&uma_mtx); 1110 LIST_REMOVE(zone, uz_link); 1111 zone_drain(zone); 1112 mtx_unlock(&uma_mtx); 1113 1114 ZONE_LOCK(zone); 1115 if (zone->uz_free != 0) 1116 printf("Zone %s was not empty. Lost %d pages of memory.\n", 1117 zone->uz_name, zone->uz_pages); 1118 1119 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0) 1120 for (cpu = 0; cpu < maxcpu; cpu++) 1121 CPU_LOCK_FINI(zone, cpu); 1122 1123 ZONE_UNLOCK(zone); 1124 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0) 1125 hash_free(&zone->uz_hash); 1126 1127 ZONE_LOCK_FINI(zone); 1128 } 1129 /* 1130 * Traverses every zone in the system and calls a callback 1131 * 1132 * Arguments: 1133 * zfunc A pointer to a function which accepts a zone 1134 * as an argument. 1135 * 1136 * Returns: 1137 * Nothing 1138 */ 1139 static void 1140 zone_foreach(void (*zfunc)(uma_zone_t)) 1141 { 1142 uma_zone_t zone; 1143 1144 mtx_lock(&uma_mtx); 1145 LIST_FOREACH(zone, &uma_zones, uz_link) { 1146 zfunc(zone); 1147 } 1148 mtx_unlock(&uma_mtx); 1149 } 1150 1151 /* Public functions */ 1152 /* See uma.h */ 1153 void 1154 uma_startup(void *bootmem) 1155 { 1156 struct uma_zctor_args args; 1157 uma_slab_t slab; 1158 int slabsize; 1159 int i; 1160 1161 #ifdef UMA_DEBUG 1162 printf("Creating uma zone headers zone.\n"); 1163 #endif 1164 #ifdef SMP 1165 maxcpu = mp_maxid + 1; 1166 #else 1167 maxcpu = 1; 1168 #endif 1169 #ifdef UMA_DEBUG 1170 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1171 Debugger("stop"); 1172 #endif 1173 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1174 /* "manually" Create the initial zone */ 1175 args.name = "UMA Zones"; 1176 args.size = sizeof(struct uma_zone) + 1177 (sizeof(struct uma_cache) * (maxcpu - 1)); 1178 args.ctor = zone_ctor; 1179 args.dtor = zone_dtor; 1180 args.uminit = zero_init; 1181 args.fini = NULL; 1182 args.align = 32 - 1; 1183 args.flags = UMA_ZONE_INTERNAL; 1184 /* The initial zone has no Per cpu queues so it's smaller */ 1185 zone_ctor(zones, sizeof(struct uma_zone), &args); 1186 1187 #ifdef UMA_DEBUG 1188 printf("Filling boot free list.\n"); 1189 #endif 1190 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1191 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1192 slab->us_data = (u_int8_t *)slab; 1193 slab->us_flags = UMA_SLAB_BOOT; 1194 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1195 uma_boot_free++; 1196 } 1197 1198 #ifdef UMA_DEBUG 1199 printf("Creating slab zone.\n"); 1200 #endif 1201 1202 /* 1203 * This is the max number of free list items we'll have with 1204 * offpage slabs. 1205 */ 1206 1207 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1208 slabsize /= UMA_MAX_WASTE; 1209 slabsize++; /* In case there it's rounded */ 1210 slabsize += sizeof(struct uma_slab); 1211 1212 /* Now make a zone for slab headers */ 1213 slabzone = uma_zcreate("UMA Slabs", 1214 slabsize, 1215 NULL, NULL, NULL, NULL, 1216 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1217 1218 hashzone = uma_zcreate("UMA Hash", 1219 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1220 NULL, NULL, NULL, NULL, 1221 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1222 1223 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1224 NULL, NULL, NULL, NULL, 1225 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1226 1227 1228 #ifdef UMA_DEBUG 1229 printf("UMA startup complete.\n"); 1230 #endif 1231 } 1232 1233 /* see uma.h */ 1234 void 1235 uma_startup2(void) 1236 { 1237 booted = 1; 1238 bucket_enable(); 1239 #ifdef UMA_DEBUG 1240 printf("UMA startup2 complete.\n"); 1241 #endif 1242 } 1243 1244 /* 1245 * Initialize our callout handle 1246 * 1247 */ 1248 1249 static void 1250 uma_startup3(void) 1251 { 1252 #ifdef UMA_DEBUG 1253 printf("Starting callout.\n"); 1254 #endif 1255 callout_init(&uma_callout, 0); 1256 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1257 #ifdef UMA_DEBUG 1258 printf("UMA startup3 complete.\n"); 1259 #endif 1260 } 1261 1262 /* See uma.h */ 1263 uma_zone_t 1264 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1265 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1266 1267 { 1268 struct uma_zctor_args args; 1269 1270 /* This stuff is essential for the zone ctor */ 1271 args.name = name; 1272 args.size = size; 1273 args.ctor = ctor; 1274 args.dtor = dtor; 1275 args.uminit = uminit; 1276 args.fini = fini; 1277 args.align = align; 1278 args.flags = flags; 1279 1280 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL)); 1281 } 1282 1283 /* See uma.h */ 1284 void 1285 uma_zdestroy(uma_zone_t zone) 1286 { 1287 uma_zfree_internal(zones, zone, NULL, 0); 1288 } 1289 1290 /* See uma.h */ 1291 void * 1292 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1293 { 1294 void *item; 1295 uma_cache_t cache; 1296 uma_bucket_t bucket; 1297 int cpu; 1298 1299 /* This is the fast path allocation */ 1300 #ifdef UMA_DEBUG_ALLOC_1 1301 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1302 #endif 1303 1304 if (!(flags & M_NOWAIT)) { 1305 KASSERT(curthread->td_intr_nesting_level == 0, 1306 ("malloc(M_WAITOK) in interrupt context")); 1307 WITNESS_SLEEP(1, NULL); 1308 } 1309 1310 zalloc_restart: 1311 cpu = PCPU_GET(cpuid); 1312 CPU_LOCK(zone, cpu); 1313 cache = &zone->uz_cpu[cpu]; 1314 1315 zalloc_start: 1316 bucket = cache->uc_allocbucket; 1317 1318 if (bucket) { 1319 if (bucket->ub_ptr > -1) { 1320 item = bucket->ub_bucket[bucket->ub_ptr]; 1321 #ifdef INVARIANTS 1322 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1323 #endif 1324 bucket->ub_ptr--; 1325 KASSERT(item != NULL, 1326 ("uma_zalloc: Bucket pointer mangled.")); 1327 cache->uc_allocs++; 1328 #ifdef INVARIANTS 1329 uma_dbg_alloc(zone, NULL, item); 1330 #endif 1331 CPU_UNLOCK(zone, cpu); 1332 if (zone->uz_ctor) 1333 zone->uz_ctor(item, zone->uz_size, udata); 1334 if (flags & M_ZERO) 1335 bzero(item, zone->uz_size); 1336 return (item); 1337 } else if (cache->uc_freebucket) { 1338 /* 1339 * We have run out of items in our allocbucket. 1340 * See if we can switch with our free bucket. 1341 */ 1342 if (cache->uc_freebucket->ub_ptr > -1) { 1343 uma_bucket_t swap; 1344 1345 #ifdef UMA_DEBUG_ALLOC 1346 printf("uma_zalloc: Swapping empty with alloc.\n"); 1347 #endif 1348 swap = cache->uc_freebucket; 1349 cache->uc_freebucket = cache->uc_allocbucket; 1350 cache->uc_allocbucket = swap; 1351 1352 goto zalloc_start; 1353 } 1354 } 1355 } 1356 ZONE_LOCK(zone); 1357 /* Since we have locked the zone we may as well send back our stats */ 1358 zone->uz_allocs += cache->uc_allocs; 1359 cache->uc_allocs = 0; 1360 1361 /* Our old one is now a free bucket */ 1362 if (cache->uc_allocbucket) { 1363 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1364 ("uma_zalloc_arg: Freeing a non free bucket.")); 1365 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1366 cache->uc_allocbucket, ub_link); 1367 cache->uc_allocbucket = NULL; 1368 } 1369 1370 /* Check the free list for a new alloc bucket */ 1371 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1372 KASSERT(bucket->ub_ptr != -1, 1373 ("uma_zalloc_arg: Returning an empty bucket.")); 1374 1375 LIST_REMOVE(bucket, ub_link); 1376 cache->uc_allocbucket = bucket; 1377 ZONE_UNLOCK(zone); 1378 goto zalloc_start; 1379 } 1380 /* Bump up our uz_count so we get here less */ 1381 if (zone->uz_count < UMA_BUCKET_SIZE - 1) 1382 zone->uz_count++; 1383 1384 /* We are no longer associated with this cpu!!! */ 1385 CPU_UNLOCK(zone, cpu); 1386 1387 /* 1388 * Now lets just fill a bucket and put it on the free list. If that 1389 * works we'll restart the allocation from the begining. 1390 * 1391 * Try this zone's free list first so we don't allocate extra buckets. 1392 */ 1393 1394 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) 1395 LIST_REMOVE(bucket, ub_link); 1396 1397 /* Now we no longer need the zone lock. */ 1398 ZONE_UNLOCK(zone); 1399 1400 if (bucket == NULL) { 1401 int bflags; 1402 1403 bflags = flags; 1404 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1405 bflags |= M_NOVM; 1406 1407 bucket = uma_zalloc_internal(bucketzone, 1408 NULL, bflags, NULL); 1409 } 1410 1411 if (bucket != NULL) { 1412 #ifdef INVARIANTS 1413 bzero(bucket, bucketzone->uz_size); 1414 #endif 1415 bucket->ub_ptr = -1; 1416 1417 if (uma_zalloc_internal(zone, udata, flags, bucket)) 1418 goto zalloc_restart; 1419 else 1420 uma_zfree_internal(bucketzone, bucket, NULL, 0); 1421 } 1422 /* 1423 * We may not get a bucket if we recurse, so 1424 * return an actual item. 1425 */ 1426 #ifdef UMA_DEBUG 1427 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1428 #endif 1429 1430 return (uma_zalloc_internal(zone, udata, flags, NULL)); 1431 } 1432 1433 /* 1434 * Allocates an item for an internal zone OR fills a bucket 1435 * 1436 * Arguments 1437 * zone The zone to alloc for. 1438 * udata The data to be passed to the constructor. 1439 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1440 * bucket The bucket to fill or NULL 1441 * 1442 * Returns 1443 * NULL if there is no memory and M_NOWAIT is set 1444 * An item if called on an interal zone 1445 * Non NULL if called to fill a bucket and it was successful. 1446 * 1447 * Discussion: 1448 * This was much cleaner before it had to do per cpu caches. It is 1449 * complicated now because it has to handle the simple internal case, and 1450 * the more involved bucket filling and allocation. 1451 */ 1452 1453 static void * 1454 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket) 1455 { 1456 uma_slab_t slab; 1457 u_int8_t freei; 1458 void *item; 1459 1460 item = NULL; 1461 1462 /* 1463 * This is to stop us from allocating per cpu buckets while we're 1464 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 1465 * boot pages. 1466 */ 1467 1468 if (bucketdisable && zone == bucketzone) 1469 return (NULL); 1470 1471 #ifdef UMA_DEBUG_ALLOC 1472 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1473 #endif 1474 ZONE_LOCK(zone); 1475 1476 /* 1477 * This code is here to limit the number of simultaneous bucket fills 1478 * for any given zone to the number of per cpu caches in this zone. This 1479 * is done so that we don't allocate more memory than we really need. 1480 */ 1481 1482 if (bucket) { 1483 #ifdef SMP 1484 if (zone->uz_fills >= mp_ncpus) { 1485 #else 1486 if (zone->uz_fills > 1) { 1487 #endif 1488 ZONE_UNLOCK(zone); 1489 return (NULL); 1490 } 1491 1492 zone->uz_fills++; 1493 } 1494 1495 new_slab: 1496 1497 /* Find a slab with some space */ 1498 if (zone->uz_free) { 1499 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1500 slab = LIST_FIRST(&zone->uz_part_slab); 1501 } else { 1502 slab = LIST_FIRST(&zone->uz_free_slab); 1503 LIST_REMOVE(slab, us_link); 1504 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1505 } 1506 } else { 1507 /* 1508 * This is to prevent us from recursively trying to allocate 1509 * buckets. The problem is that if an allocation forces us to 1510 * grab a new bucket we will call page_alloc, which will go off 1511 * and cause the vm to allocate vm_map_entries. If we need new 1512 * buckets there too we will recurse in kmem_alloc and bad 1513 * things happen. So instead we return a NULL bucket, and make 1514 * the code that allocates buckets smart enough to deal with it 1515 */ 1516 if (zone == bucketzone && zone->uz_recurse != 0) { 1517 ZONE_UNLOCK(zone); 1518 return (NULL); 1519 } 1520 while (zone->uz_maxpages && 1521 zone->uz_pages >= zone->uz_maxpages) { 1522 zone->uz_flags |= UMA_ZFLAG_FULL; 1523 1524 if (flags & M_WAITOK) 1525 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0); 1526 else 1527 goto alloc_fail; 1528 1529 goto new_slab; 1530 } 1531 1532 if (flags & M_NOVM) 1533 goto alloc_fail; 1534 1535 zone->uz_recurse++; 1536 slab = slab_zalloc(zone, flags); 1537 zone->uz_recurse--; 1538 /* 1539 * We might not have been able to get a slab but another cpu 1540 * could have while we were unlocked. If we did get a slab put 1541 * it on the partially used slab list. If not check the free 1542 * count and restart or fail accordingly. 1543 */ 1544 if (slab) 1545 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1546 else if (zone->uz_free == 0) 1547 goto alloc_fail; 1548 else 1549 goto new_slab; 1550 } 1551 /* 1552 * If this is our first time though put this guy on the list. 1553 */ 1554 if (bucket != NULL && bucket->ub_ptr == -1) 1555 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1556 bucket, ub_link); 1557 1558 1559 while (slab->us_freecount) { 1560 freei = slab->us_firstfree; 1561 slab->us_firstfree = slab->us_freelist[freei]; 1562 1563 item = slab->us_data + (zone->uz_rsize * freei); 1564 1565 slab->us_freecount--; 1566 zone->uz_free--; 1567 #ifdef INVARIANTS 1568 uma_dbg_alloc(zone, slab, item); 1569 #endif 1570 if (bucket == NULL) { 1571 zone->uz_allocs++; 1572 break; 1573 } 1574 bucket->ub_bucket[++bucket->ub_ptr] = item; 1575 1576 /* Don't overfill the bucket! */ 1577 if (bucket->ub_ptr == zone->uz_count) 1578 break; 1579 } 1580 1581 /* Move this slab to the full list */ 1582 if (slab->us_freecount == 0) { 1583 LIST_REMOVE(slab, us_link); 1584 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1585 } 1586 1587 if (bucket != NULL) { 1588 /* Try to keep the buckets totally full, but don't block */ 1589 if (bucket->ub_ptr < zone->uz_count) { 1590 flags |= M_NOWAIT; 1591 flags &= ~M_WAITOK; 1592 goto new_slab; 1593 } else 1594 zone->uz_fills--; 1595 } 1596 1597 ZONE_UNLOCK(zone); 1598 1599 /* Only construct at this time if we're not filling a bucket */ 1600 if (bucket == NULL) { 1601 if (zone->uz_ctor != NULL) 1602 zone->uz_ctor(item, zone->uz_size, udata); 1603 if (flags & M_ZERO) 1604 bzero(item, zone->uz_size); 1605 } 1606 1607 return (item); 1608 1609 alloc_fail: 1610 if (bucket != NULL) 1611 zone->uz_fills--; 1612 ZONE_UNLOCK(zone); 1613 1614 if (bucket != NULL && bucket->ub_ptr != -1) 1615 return (bucket); 1616 1617 return (NULL); 1618 } 1619 1620 /* See uma.h */ 1621 void 1622 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1623 { 1624 uma_cache_t cache; 1625 uma_bucket_t bucket; 1626 int bflags; 1627 int cpu; 1628 1629 /* This is the fast path free */ 1630 #ifdef UMA_DEBUG_ALLOC_1 1631 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1632 #endif 1633 /* 1634 * The race here is acceptable. If we miss it we'll just have to wait 1635 * a little longer for the limits to be reset. 1636 */ 1637 1638 if (zone->uz_flags & UMA_ZFLAG_FULL) 1639 goto zfree_internal; 1640 1641 zfree_restart: 1642 cpu = PCPU_GET(cpuid); 1643 CPU_LOCK(zone, cpu); 1644 cache = &zone->uz_cpu[cpu]; 1645 1646 zfree_start: 1647 bucket = cache->uc_freebucket; 1648 1649 if (bucket) { 1650 /* 1651 * Do we have room in our bucket? It is OK for this uz count 1652 * check to be slightly out of sync. 1653 */ 1654 1655 if (bucket->ub_ptr < zone->uz_count) { 1656 bucket->ub_ptr++; 1657 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1658 ("uma_zfree: Freeing to non free bucket index.")); 1659 bucket->ub_bucket[bucket->ub_ptr] = item; 1660 if (zone->uz_dtor) 1661 zone->uz_dtor(item, zone->uz_size, udata); 1662 #ifdef INVARIANTS 1663 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 1664 uma_dbg_free(zone, udata, item); 1665 else 1666 uma_dbg_free(zone, NULL, item); 1667 #endif 1668 CPU_UNLOCK(zone, cpu); 1669 return; 1670 } else if (cache->uc_allocbucket) { 1671 #ifdef UMA_DEBUG_ALLOC 1672 printf("uma_zfree: Swapping buckets.\n"); 1673 #endif 1674 /* 1675 * We have run out of space in our freebucket. 1676 * See if we can switch with our alloc bucket. 1677 */ 1678 if (cache->uc_allocbucket->ub_ptr < 1679 cache->uc_freebucket->ub_ptr) { 1680 uma_bucket_t swap; 1681 1682 swap = cache->uc_freebucket; 1683 cache->uc_freebucket = cache->uc_allocbucket; 1684 cache->uc_allocbucket = swap; 1685 1686 goto zfree_start; 1687 } 1688 } 1689 } 1690 1691 /* 1692 * We can get here for two reasons: 1693 * 1694 * 1) The buckets are NULL 1695 * 2) The alloc and free buckets are both somewhat full. 1696 * 1697 */ 1698 1699 ZONE_LOCK(zone); 1700 1701 bucket = cache->uc_freebucket; 1702 cache->uc_freebucket = NULL; 1703 1704 /* Can we throw this on the zone full list? */ 1705 if (bucket != NULL) { 1706 #ifdef UMA_DEBUG_ALLOC 1707 printf("uma_zfree: Putting old bucket on the free list.\n"); 1708 #endif 1709 /* ub_ptr is pointing to the last free item */ 1710 KASSERT(bucket->ub_ptr != -1, 1711 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1712 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1713 bucket, ub_link); 1714 } 1715 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1716 LIST_REMOVE(bucket, ub_link); 1717 ZONE_UNLOCK(zone); 1718 cache->uc_freebucket = bucket; 1719 goto zfree_start; 1720 } 1721 /* We're done with this CPU now */ 1722 CPU_UNLOCK(zone, cpu); 1723 1724 /* And the zone.. */ 1725 ZONE_UNLOCK(zone); 1726 1727 #ifdef UMA_DEBUG_ALLOC 1728 printf("uma_zfree: Allocating new free bucket.\n"); 1729 #endif 1730 bflags = M_NOWAIT; 1731 1732 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE) 1733 bflags |= M_NOVM; 1734 #ifdef INVARIANTS 1735 bflags |= M_ZERO; 1736 #endif 1737 bucket = uma_zalloc_internal(bucketzone, 1738 NULL, bflags, NULL); 1739 if (bucket) { 1740 bucket->ub_ptr = -1; 1741 ZONE_LOCK(zone); 1742 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1743 bucket, ub_link); 1744 ZONE_UNLOCK(zone); 1745 goto zfree_restart; 1746 } 1747 1748 /* 1749 * If nothing else caught this, we'll just do an internal free. 1750 */ 1751 1752 zfree_internal: 1753 1754 uma_zfree_internal(zone, item, udata, 0); 1755 1756 return; 1757 1758 } 1759 1760 /* 1761 * Frees an item to an INTERNAL zone or allocates a free bucket 1762 * 1763 * Arguments: 1764 * zone The zone to free to 1765 * item The item we're freeing 1766 * udata User supplied data for the dtor 1767 * skip Skip the dtor, it was done in uma_zfree_arg 1768 */ 1769 1770 static void 1771 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1772 { 1773 uma_slab_t slab; 1774 u_int8_t *mem; 1775 u_int8_t freei; 1776 1777 ZONE_LOCK(zone); 1778 1779 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1780 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1781 if (zone->uz_flags & UMA_ZFLAG_HASH) 1782 slab = hash_sfind(&zone->uz_hash, mem); 1783 else { 1784 mem += zone->uz_pgoff; 1785 slab = (uma_slab_t)mem; 1786 } 1787 } else { 1788 slab = (uma_slab_t)udata; 1789 } 1790 1791 /* Do we need to remove from any lists? */ 1792 if (slab->us_freecount+1 == zone->uz_ipers) { 1793 LIST_REMOVE(slab, us_link); 1794 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1795 } else if (slab->us_freecount == 0) { 1796 LIST_REMOVE(slab, us_link); 1797 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1798 } 1799 1800 /* Slab management stuff */ 1801 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1802 / zone->uz_rsize; 1803 1804 #ifdef INVARIANTS 1805 if (!skip) 1806 uma_dbg_free(zone, slab, item); 1807 #endif 1808 1809 slab->us_freelist[freei] = slab->us_firstfree; 1810 slab->us_firstfree = freei; 1811 slab->us_freecount++; 1812 1813 /* Zone statistics */ 1814 zone->uz_free++; 1815 1816 if (!skip && zone->uz_dtor) 1817 zone->uz_dtor(item, zone->uz_size, udata); 1818 1819 if (zone->uz_flags & UMA_ZFLAG_FULL) { 1820 if (zone->uz_pages < zone->uz_maxpages) 1821 zone->uz_flags &= ~UMA_ZFLAG_FULL; 1822 1823 /* We can handle one more allocation */ 1824 wakeup_one(&zone); 1825 } 1826 1827 ZONE_UNLOCK(zone); 1828 } 1829 1830 /* See uma.h */ 1831 void 1832 uma_zone_set_max(uma_zone_t zone, int nitems) 1833 { 1834 ZONE_LOCK(zone); 1835 if (zone->uz_ppera > 1) 1836 zone->uz_maxpages = nitems * zone->uz_ppera; 1837 else 1838 zone->uz_maxpages = nitems / zone->uz_ipers; 1839 1840 if (zone->uz_maxpages * zone->uz_ipers < nitems) 1841 zone->uz_maxpages++; 1842 1843 ZONE_UNLOCK(zone); 1844 } 1845 1846 /* See uma.h */ 1847 void 1848 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1849 { 1850 ZONE_LOCK(zone); 1851 1852 zone->uz_freef = freef; 1853 1854 ZONE_UNLOCK(zone); 1855 } 1856 1857 /* See uma.h */ 1858 void 1859 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1860 { 1861 ZONE_LOCK(zone); 1862 1863 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1864 zone->uz_allocf = allocf; 1865 1866 ZONE_UNLOCK(zone); 1867 } 1868 1869 /* See uma.h */ 1870 int 1871 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1872 { 1873 int pages; 1874 vm_offset_t kva; 1875 1876 mtx_lock(&Giant); 1877 1878 pages = count / zone->uz_ipers; 1879 1880 if (pages * zone->uz_ipers < count) 1881 pages++; 1882 1883 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1884 1885 if (kva == 0) { 1886 mtx_unlock(&Giant); 1887 return (0); 1888 } 1889 1890 1891 if (obj == NULL) 1892 obj = vm_object_allocate(OBJT_DEFAULT, 1893 pages); 1894 else 1895 _vm_object_allocate(OBJT_DEFAULT, 1896 pages, obj); 1897 1898 ZONE_LOCK(zone); 1899 zone->uz_kva = kva; 1900 zone->uz_obj = obj; 1901 zone->uz_maxpages = pages; 1902 1903 zone->uz_allocf = obj_alloc; 1904 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1905 1906 ZONE_UNLOCK(zone); 1907 mtx_unlock(&Giant); 1908 1909 return (1); 1910 } 1911 1912 /* See uma.h */ 1913 void 1914 uma_prealloc(uma_zone_t zone, int items) 1915 { 1916 int slabs; 1917 uma_slab_t slab; 1918 1919 ZONE_LOCK(zone); 1920 slabs = items / zone->uz_ipers; 1921 if (slabs * zone->uz_ipers < items) 1922 slabs++; 1923 1924 while (slabs > 0) { 1925 slab = slab_zalloc(zone, M_WAITOK); 1926 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1927 slabs--; 1928 } 1929 ZONE_UNLOCK(zone); 1930 } 1931 1932 /* See uma.h */ 1933 void 1934 uma_reclaim(void) 1935 { 1936 /* 1937 * You might think that the delay below would improve performance since 1938 * the allocator will give away memory that it may ask for immediately. 1939 * Really, it makes things worse, since cpu cycles are so much cheaper 1940 * than disk activity. 1941 */ 1942 #if 0 1943 static struct timeval tv = {0}; 1944 struct timeval now; 1945 getmicrouptime(&now); 1946 if (now.tv_sec > tv.tv_sec + 30) 1947 tv = now; 1948 else 1949 return; 1950 #endif 1951 #ifdef UMA_DEBUG 1952 printf("UMA: vm asked us to release pages!\n"); 1953 #endif 1954 bucket_enable(); 1955 zone_foreach(zone_drain); 1956 1957 /* 1958 * Some slabs may have been freed but this zone will be visited early 1959 * we visit again so that we can free pages that are empty once other 1960 * zones are drained. We have to do the same for buckets. 1961 */ 1962 zone_drain(slabzone); 1963 zone_drain(bucketzone); 1964 } 1965 1966 void * 1967 uma_large_malloc(int size, int wait) 1968 { 1969 void *mem; 1970 uma_slab_t slab; 1971 u_int8_t flags; 1972 1973 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL); 1974 if (slab == NULL) 1975 return (NULL); 1976 1977 mem = page_alloc(NULL, size, &flags, wait); 1978 if (mem) { 1979 vsetslab((vm_offset_t)mem, slab); 1980 slab->us_data = mem; 1981 slab->us_flags = flags | UMA_SLAB_MALLOC; 1982 slab->us_size = size; 1983 } else { 1984 uma_zfree_internal(slabzone, slab, NULL, 0); 1985 } 1986 1987 1988 return (mem); 1989 } 1990 1991 void 1992 uma_large_free(uma_slab_t slab) 1993 { 1994 vsetobj((vm_offset_t)slab->us_data, kmem_object); 1995 page_free(slab->us_data, slab->us_size, slab->us_flags); 1996 uma_zfree_internal(slabzone, slab, NULL, 0); 1997 } 1998 1999 void 2000 uma_print_stats(void) 2001 { 2002 zone_foreach(uma_print_zone); 2003 } 2004 2005 void 2006 uma_print_zone(uma_zone_t zone) 2007 { 2008 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2009 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 2010 zone->uz_ipers, zone->uz_ppera, 2011 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 2012 } 2013 2014 /* 2015 * Sysctl handler for vm.zone 2016 * 2017 * stolen from vm_zone.c 2018 */ 2019 static int 2020 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2021 { 2022 int error, len, cnt; 2023 const int linesize = 128; /* conservative */ 2024 int totalfree; 2025 char *tmpbuf, *offset; 2026 uma_zone_t z; 2027 char *p; 2028 2029 cnt = 0; 2030 mtx_lock(&uma_mtx); 2031 LIST_FOREACH(z, &uma_zones, uz_link) 2032 cnt++; 2033 mtx_unlock(&uma_mtx); 2034 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2035 M_TEMP, M_WAITOK); 2036 len = snprintf(tmpbuf, linesize, 2037 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2038 if (cnt == 0) 2039 tmpbuf[len - 1] = '\0'; 2040 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2041 if (error || cnt == 0) 2042 goto out; 2043 offset = tmpbuf; 2044 mtx_lock(&uma_mtx); 2045 LIST_FOREACH(z, &uma_zones, uz_link) { 2046 if (cnt == 0) /* list may have changed size */ 2047 break; 2048 ZONE_LOCK(z); 2049 totalfree = z->uz_free + z->uz_cachefree; 2050 len = snprintf(offset, linesize, 2051 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2052 z->uz_name, z->uz_size, 2053 z->uz_maxpages * z->uz_ipers, 2054 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 2055 totalfree, 2056 (unsigned long long)z->uz_allocs); 2057 ZONE_UNLOCK(z); 2058 for (p = offset + 12; p > offset && *p == ' '; --p) 2059 /* nothing */ ; 2060 p[1] = ':'; 2061 cnt--; 2062 offset += len; 2063 } 2064 mtx_unlock(&uma_mtx); 2065 *offset++ = '\0'; 2066 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2067 out: 2068 FREE(tmpbuf, M_TEMP); 2069 return (error); 2070 } 2071