1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 * 28 */ 29 30 /* 31 * uma_core.c Implementation of the Universal Memory allocator 32 * 33 * This allocator is intended to replace the multitude of similar object caches 34 * in the standard FreeBSD kernel. The intent is to be flexible as well as 35 * effecient. A primary design goal is to return unused memory to the rest of 36 * the system. This will make the system as a whole more flexible due to the 37 * ability to move memory to subsystems which most need it instead of leaving 38 * pools of reserved memory unused. 39 * 40 * The basic ideas stem from similar slab/zone based allocators whose algorithms 41 * are well known. 42 * 43 */ 44 45 /* 46 * TODO: 47 * - Improve memory usage for large allocations 48 * - Improve INVARIANTS (0xdeadc0de write out) 49 * - Investigate cache size adjustments 50 */ 51 52 /* I should really use ktr.. */ 53 /* 54 #define UMA_DEBUG 1 55 #define UMA_DEBUG_ALLOC 1 56 #define UMA_DEBUG_ALLOC_1 1 57 */ 58 59 60 #include "opt_param.h" 61 #include <sys/param.h> 62 #include <sys/systm.h> 63 #include <sys/kernel.h> 64 #include <sys/types.h> 65 #include <sys/queue.h> 66 #include <sys/malloc.h> 67 #include <sys/lock.h> 68 #include <sys/sysctl.h> 69 #include <machine/types.h> 70 #include <sys/mutex.h> 71 #include <sys/smp.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_page.h> 76 #include <vm/vm_param.h> 77 #include <vm/vm_map.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_extern.h> 80 #include <vm/uma.h> 81 #include <vm/uma_int.h> 82 83 /* 84 * This is the zone from which all zones are spawned. The idea is that even 85 * the zone heads are allocated from the allocator, so we use the bss section 86 * to bootstrap us. 87 */ 88 static struct uma_zone master_zone; 89 static uma_zone_t zones = &master_zone; 90 91 /* This is the zone from which all of uma_slab_t's are allocated. */ 92 static uma_zone_t slabzone; 93 94 /* 95 * The initial hash tables come out of this zone so they can be allocated 96 * prior to malloc coming up. 97 */ 98 static uma_zone_t hashzone; 99 100 /* 101 * Zone that buckets come from. 102 */ 103 static uma_zone_t bucketzone; 104 105 /* Linked list of all zones in the system */ 106 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 107 108 /* This mutex protects the zone list */ 109 static struct mtx uma_mtx; 110 111 /* Linked list of boot time pages */ 112 static LIST_HEAD(,uma_slab) uma_boot_pages = 113 LIST_HEAD_INITIALIZER(&uma_boot_pages); 114 115 /* Count of free boottime pages */ 116 static int uma_boot_free = 0; 117 118 /* Is the VM done starting up? */ 119 static int booted = 0; 120 121 /* This is the handle used to schedule our working set calculator */ 122 static struct callout uma_callout; 123 124 /* This is mp_maxid + 1, for use while looping over each cpu */ 125 static int maxcpu; 126 127 /* 128 * This structure is passed as the zone ctor arg so that I don't have to create 129 * a special allocation function just for zones. 130 */ 131 struct uma_zctor_args { 132 char *name; 133 int size; 134 uma_ctor ctor; 135 uma_dtor dtor; 136 uma_init uminit; 137 uma_fini fini; 138 int align; 139 u_int16_t flags; 140 }; 141 142 /* 143 * This is the malloc hash table which is used to find the zone that a 144 * malloc allocation came from. It is not currently resizeable. The 145 * memory for the actual hash bucket is allocated in kmeminit. 146 */ 147 struct uma_hash mhash; 148 struct uma_hash *mallochash = &mhash; 149 150 /* Prototypes.. */ 151 152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 154 static void page_free(void *, int, u_int8_t); 155 static uma_slab_t slab_zalloc(uma_zone_t, int); 156 static void cache_drain(uma_zone_t); 157 static void bucket_drain(uma_zone_t, uma_bucket_t); 158 static void zone_drain(uma_zone_t); 159 static void zone_ctor(void *, int, void *); 160 static void zero_init(void *, int); 161 static void zone_small_init(uma_zone_t zone); 162 static void zone_large_init(uma_zone_t zone); 163 static void zone_foreach(void (*zfunc)(uma_zone_t)); 164 static void zone_timeout(uma_zone_t zone); 165 static void hash_expand(struct uma_hash *); 166 static void uma_timeout(void *); 167 static void uma_startup3(void); 168 static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int); 169 static void uma_zfree_internal(uma_zone_t, 170 void *, void *, int); 171 void uma_print_zone(uma_zone_t); 172 void uma_print_stats(void); 173 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 174 175 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 176 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 177 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 178 179 180 /* 181 * Routine called by timeout which is used to fire off some time interval 182 * based calculations. (working set, stats, etc.) 183 * 184 * Arguments: 185 * arg Unused 186 * 187 * Returns: 188 * Nothing 189 */ 190 static void 191 uma_timeout(void *unused) 192 { 193 zone_foreach(zone_timeout); 194 195 /* Reschedule this event */ 196 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 197 } 198 199 /* 200 * Routine to perform timeout driven calculations. This does the working set 201 * as well as hash expanding, and per cpu statistics aggregation. 202 * 203 * Arguments: 204 * zone The zone to operate on 205 * 206 * Returns: 207 * Nothing 208 */ 209 static void 210 zone_timeout(uma_zone_t zone) 211 { 212 uma_cache_t cache; 213 u_int64_t alloc; 214 int free; 215 int cpu; 216 217 alloc = 0; 218 free = 0; 219 220 /* 221 * Aggregate per cpu cache statistics back to the zone. 222 * 223 * I may rewrite this to set a flag in the per cpu cache instead of 224 * locking. If the flag is not cleared on the next round I will have 225 * to lock and do it here instead so that the statistics don't get too 226 * far out of sync. 227 */ 228 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 229 for (cpu = 0; cpu < maxcpu; cpu++) { 230 if (CPU_ABSENT(cpu)) 231 continue; 232 CPU_LOCK(zone, cpu); 233 cache = &zone->uz_cpu[cpu]; 234 /* Add them up, and reset */ 235 alloc += cache->uc_allocs; 236 cache->uc_allocs = 0; 237 if (cache->uc_allocbucket) 238 free += cache->uc_allocbucket->ub_ptr + 1; 239 if (cache->uc_freebucket) 240 free += cache->uc_freebucket->ub_ptr + 1; 241 CPU_UNLOCK(zone, cpu); 242 } 243 } 244 245 /* Now push these stats back into the zone.. */ 246 ZONE_LOCK(zone); 247 zone->uz_allocs += alloc; 248 249 /* 250 * cachefree is an instantanious snapshot of what is in the per cpu 251 * caches, not an accurate counter 252 */ 253 zone->uz_cachefree = free; 254 255 /* 256 * Expand the zone hash table. 257 * 258 * This is done if the number of slabs is larger than the hash size. 259 * What I'm trying to do here is completely reduce collisions. This 260 * may be a little aggressive. Should I allow for two collisions max? 261 */ 262 263 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && 264 !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 265 if (zone->uz_pages / zone->uz_ppera 266 >= zone->uz_hash.uh_hashsize) 267 hash_expand(&zone->uz_hash); 268 } 269 270 /* 271 * Here we compute the working set size as the total number of items 272 * left outstanding since the last time interval. This is slightly 273 * suboptimal. What we really want is the highest number of outstanding 274 * items during the last time quantum. This should be close enough. 275 * 276 * The working set size is used to throttle the zone_drain function. 277 * We don't want to return memory that we may need again immediately. 278 */ 279 alloc = zone->uz_allocs - zone->uz_oallocs; 280 zone->uz_oallocs = zone->uz_allocs; 281 zone->uz_wssize = alloc; 282 283 ZONE_UNLOCK(zone); 284 } 285 286 /* 287 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout 288 * to reduce collisions. This must not be done in the regular allocation path, 289 * otherwise, we can recurse on the vm while allocating pages. 290 * 291 * Arguments: 292 * hash The hash you want to expand by a factor of two. 293 * 294 * Returns: 295 * Nothing 296 * 297 * Discussion: 298 */ 299 static void 300 hash_expand(struct uma_hash *hash) 301 { 302 struct slabhead *newhash; 303 struct slabhead *oldhash; 304 uma_slab_t slab; 305 int hzonefree; 306 int hashsize; 307 int alloc; 308 int hval; 309 int i; 310 311 312 /* 313 * Remember the old hash size and see if it has to go back to the 314 * hash zone, or malloc. The hash zone is used for the initial hash 315 */ 316 317 hashsize = hash->uh_hashsize; 318 oldhash = hash->uh_slab_hash; 319 320 if (hashsize == UMA_HASH_SIZE_INIT) 321 hzonefree = 1; 322 else 323 hzonefree = 0; 324 325 326 /* We're just going to go to a power of two greater */ 327 if (hash->uh_hashsize) { 328 alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2); 329 /* XXX Shouldn't be abusing DEVBUF here */ 330 newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); 331 if (newhash == NULL) { 332 return; 333 } 334 hash->uh_hashsize *= 2; 335 } else { 336 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 337 newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1); 338 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 339 } 340 341 bzero(newhash, alloc); 342 343 hash->uh_hashmask = hash->uh_hashsize - 1; 344 345 /* 346 * I need to investigate hash algorithms for resizing without a 347 * full rehash. 348 */ 349 350 for (i = 0; i < hashsize; i++) 351 while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { 352 slab = SLIST_FIRST(&hash->uh_slab_hash[i]); 353 SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); 354 hval = UMA_HASH(hash, slab->us_data); 355 SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); 356 } 357 358 if (hash->uh_slab_hash) { 359 if (hzonefree) 360 uma_zfree_internal(hashzone, 361 hash->uh_slab_hash, NULL, 0); 362 else 363 free(hash->uh_slab_hash, M_DEVBUF); 364 } 365 hash->uh_slab_hash = newhash; 366 367 return; 368 } 369 370 /* 371 * Frees all outstanding items in a bucket 372 * 373 * Arguments: 374 * zone The zone to free to, must be unlocked. 375 * bucket The free/alloc bucket with items, cpu queue must be locked. 376 * 377 * Returns: 378 * Nothing 379 */ 380 381 static void 382 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 383 { 384 uma_slab_t slab; 385 int mzone; 386 void *item; 387 388 if (bucket == NULL) 389 return; 390 391 slab = NULL; 392 mzone = 0; 393 394 /* We have to lookup the slab again for malloc.. */ 395 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 396 mzone = 1; 397 398 while (bucket->ub_ptr > -1) { 399 item = bucket->ub_bucket[bucket->ub_ptr]; 400 #ifdef INVARIANTS 401 bucket->ub_bucket[bucket->ub_ptr] = NULL; 402 KASSERT(item != NULL, 403 ("bucket_drain: botched ptr, item is NULL")); 404 #endif 405 bucket->ub_ptr--; 406 /* 407 * This is extremely inefficient. The slab pointer was passed 408 * to uma_zfree_arg, but we lost it because the buckets don't 409 * hold them. This will go away when free() gets a size passed 410 * to it. 411 */ 412 if (mzone) 413 slab = hash_sfind(mallochash, 414 (u_int8_t *)((unsigned long)item & 415 (~UMA_SLAB_MASK))); 416 uma_zfree_internal(zone, item, slab, 1); 417 } 418 } 419 420 /* 421 * Drains the per cpu caches for a zone. 422 * 423 * Arguments: 424 * zone The zone to drain, must be unlocked. 425 * 426 * Returns: 427 * Nothing 428 * 429 * This function returns with the zone locked so that the per cpu queues can 430 * not be filled until zone_drain is finished. 431 * 432 */ 433 static void 434 cache_drain(uma_zone_t zone) 435 { 436 uma_bucket_t bucket; 437 uma_cache_t cache; 438 int cpu; 439 440 /* 441 * Flush out the per cpu queues. 442 * 443 * XXX This causes unneccisary thrashing due to immediately having 444 * empty per cpu queues. I need to improve this. 445 */ 446 447 /* 448 * We have to lock each cpu cache before locking the zone 449 */ 450 ZONE_UNLOCK(zone); 451 452 for (cpu = 0; cpu < maxcpu; cpu++) { 453 if (CPU_ABSENT(cpu)) 454 continue; 455 CPU_LOCK(zone, cpu); 456 cache = &zone->uz_cpu[cpu]; 457 bucket_drain(zone, cache->uc_allocbucket); 458 bucket_drain(zone, cache->uc_freebucket); 459 } 460 461 /* 462 * Drain the bucket queues and free the buckets, we just keep two per 463 * cpu (alloc/free). 464 */ 465 ZONE_LOCK(zone); 466 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 467 LIST_REMOVE(bucket, ub_link); 468 ZONE_UNLOCK(zone); 469 bucket_drain(zone, bucket); 470 uma_zfree_internal(bucketzone, bucket, NULL, 0); 471 ZONE_LOCK(zone); 472 } 473 474 /* Now we do the free queue.. */ 475 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 476 LIST_REMOVE(bucket, ub_link); 477 uma_zfree_internal(bucketzone, bucket, NULL, 0); 478 } 479 480 /* We unlock here, but they will all block until the zone is unlocked */ 481 for (cpu = 0; cpu < maxcpu; cpu++) { 482 if (CPU_ABSENT(cpu)) 483 continue; 484 CPU_UNLOCK(zone, cpu); 485 } 486 487 zone->uz_cachefree = 0; 488 } 489 490 /* 491 * Frees pages from a zone back to the system. This is done on demand from 492 * the pageout daemon. 493 * 494 * Arguments: 495 * zone The zone to free pages from 496 * 497 * Returns: 498 * Nothing. 499 */ 500 static void 501 zone_drain(uma_zone_t zone) 502 { 503 uma_slab_t slab; 504 uma_slab_t n; 505 u_int64_t extra; 506 u_int8_t flags; 507 u_int8_t *mem; 508 int i; 509 510 /* 511 * We don't want to take pages from staticly allocated zones at this 512 * time 513 */ 514 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) 515 return; 516 517 ZONE_LOCK(zone); 518 519 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) 520 cache_drain(zone); 521 522 if (zone->uz_free < zone->uz_wssize) 523 goto finished; 524 #ifdef UMA_DEBUG 525 printf("%s working set size: %llu free items: %u\n", 526 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); 527 #endif 528 extra = zone->uz_wssize - zone->uz_free; 529 extra /= zone->uz_ipers; 530 531 /* extra is now the number of extra slabs that we can free */ 532 533 if (extra == 0) 534 goto finished; 535 536 slab = LIST_FIRST(&zone->uz_free_slab); 537 while (slab && extra) { 538 n = LIST_NEXT(slab, us_link); 539 540 /* We have no where to free these to */ 541 if (slab->us_flags & UMA_SLAB_BOOT) { 542 slab = n; 543 continue; 544 } 545 546 LIST_REMOVE(slab, us_link); 547 zone->uz_pages -= zone->uz_ppera; 548 zone->uz_free -= zone->uz_ipers; 549 if (zone->uz_fini) 550 for (i = 0; i < zone->uz_ipers; i++) 551 zone->uz_fini( 552 slab->us_data + (zone->uz_rsize * i), 553 zone->uz_size); 554 flags = slab->us_flags; 555 mem = slab->us_data; 556 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { 557 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 558 UMA_HASH_REMOVE(mallochash, 559 slab, slab->us_data); 560 } else { 561 UMA_HASH_REMOVE(&zone->uz_hash, 562 slab, slab->us_data); 563 } 564 uma_zfree_internal(slabzone, slab, NULL, 0); 565 } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) 566 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 567 #ifdef UMA_DEBUG 568 printf("%s: Returning %d bytes.\n", 569 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); 570 #endif 571 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); 572 573 slab = n; 574 extra--; 575 } 576 577 finished: 578 ZONE_UNLOCK(zone); 579 } 580 581 /* 582 * Allocate a new slab for a zone. This does not insert the slab onto a list. 583 * 584 * Arguments: 585 * zone The zone to allocate slabs for 586 * wait Shall we wait? 587 * 588 * Returns: 589 * The slab that was allocated or NULL if there is no memory and the 590 * caller specified M_NOWAIT. 591 * 592 */ 593 static uma_slab_t 594 slab_zalloc(uma_zone_t zone, int wait) 595 { 596 uma_slab_t slab; /* Starting slab */ 597 u_int8_t *mem; 598 u_int8_t flags; 599 int i; 600 601 #ifdef UMA_DEBUG 602 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 603 #endif 604 if (zone->uz_maxpages && 605 zone->uz_pages + zone->uz_ppera > zone->uz_maxpages) 606 return (NULL); 607 608 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { 609 ZONE_UNLOCK(zone); 610 mtx_lock(&Giant); 611 slab = (uma_slab_t )zone->uz_allocf(zone, 612 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); 613 mtx_unlock(&Giant); 614 ZONE_LOCK(zone); 615 if (slab != NULL) 616 slab->us_data = (u_int8_t *)slab; 617 else 618 return (NULL); 619 } else { 620 621 if (zone->uz_ppera > 1) 622 panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); 623 if (zone->uz_flags & UMA_ZFLAG_MALLOC) 624 panic("Mallocing before uma_startup2 has been called.\n"); 625 if (uma_boot_free == 0) 626 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); 627 slab = LIST_FIRST(&uma_boot_pages); 628 LIST_REMOVE(slab, us_link); 629 uma_boot_free--; 630 } 631 632 mem = slab->us_data; 633 634 /* Alloc slab structure for offpage, otherwise adjust it's position */ 635 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 636 slab = (uma_slab_t )(mem + zone->uz_pgoff); 637 } else { 638 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 639 if (slab == NULL) /* XXX This should go away */ 640 panic("UMA: No free slab structures"); 641 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) 642 UMA_HASH_INSERT(&zone->uz_hash, slab, mem); 643 } 644 if (zone->uz_flags & UMA_ZFLAG_MALLOC) { 645 #ifdef UMA_DEBUG 646 printf("Inserting %p into malloc hash from slab %p\n", 647 mem, slab); 648 #endif 649 UMA_HASH_INSERT(mallochash, slab, mem); 650 } 651 652 slab->us_zone = zone; 653 slab->us_data = mem; 654 655 /* 656 * This is intended to spread data out across cache lines. 657 * 658 * This code doesn't seem to work properly on x86, and on alpha 659 * it makes absolutely no performance difference. I'm sure it could 660 * use some tuning, but sun makes outrageous claims about it's 661 * performance. 662 */ 663 #if 0 664 if (zone->uz_cachemax) { 665 slab->us_data += zone->uz_cacheoff; 666 zone->uz_cacheoff += UMA_CACHE_INC; 667 if (zone->uz_cacheoff > zone->uz_cachemax) 668 zone->uz_cacheoff = 0; 669 } 670 #endif 671 672 slab->us_freecount = zone->uz_ipers; 673 slab->us_firstfree = 0; 674 slab->us_flags = flags; 675 for (i = 0; i < zone->uz_ipers; i++) 676 slab->us_freelist[i] = i+1; 677 678 if (zone->uz_init) 679 for (i = 0; i < zone->uz_ipers; i++) 680 zone->uz_init(slab->us_data + (zone->uz_rsize * i), 681 zone->uz_size); 682 683 zone->uz_pages += zone->uz_ppera; 684 zone->uz_free += zone->uz_ipers; 685 686 return (slab); 687 } 688 689 /* 690 * Allocates a number of pages from the system 691 * 692 * Arguments: 693 * zone Unused 694 * bytes The number of bytes requested 695 * wait Shall we wait? 696 * 697 * Returns: 698 * A pointer to the alloced memory or possibly 699 * NULL if M_NOWAIT is set. 700 */ 701 static void * 702 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 703 { 704 void *p; /* Returned page */ 705 706 /* 707 * XXX The original zone allocator did this, but I don't think it's 708 * neccisary in current. 709 */ 710 711 if (lockstatus(&kernel_map->lock, NULL)) { 712 *pflag = UMA_SLAB_KMEM; 713 p = (void *) kmem_malloc(kmem_map, bytes, wait); 714 } else { 715 *pflag = UMA_SLAB_KMAP; 716 p = (void *) kmem_alloc(kernel_map, bytes); 717 } 718 719 return (p); 720 } 721 722 /* 723 * Allocates a number of pages from within an object 724 * 725 * Arguments: 726 * zone Unused 727 * bytes The number of bytes requested 728 * wait Shall we wait? 729 * 730 * Returns: 731 * A pointer to the alloced memory or possibly 732 * NULL if M_NOWAIT is set. 733 */ 734 static void * 735 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 736 { 737 vm_offset_t zkva; 738 vm_offset_t retkva; 739 vm_page_t p; 740 int pages; 741 742 retkva = NULL; 743 pages = zone->uz_pages; 744 745 /* 746 * This looks a little weird since we're getting one page at a time 747 */ 748 while (bytes > 0) { 749 p = vm_page_alloc(zone->uz_obj, pages, 750 VM_ALLOC_INTERRUPT); 751 if (p == NULL) 752 return (NULL); 753 754 zkva = zone->uz_kva + pages * PAGE_SIZE; 755 if (retkva == NULL) 756 retkva = zkva; 757 pmap_qenter(zkva, &p, 1); 758 bytes -= PAGE_SIZE; 759 pages += 1; 760 } 761 762 *flags = UMA_SLAB_PRIV; 763 764 return ((void *)retkva); 765 } 766 767 /* 768 * Frees a number of pages to the system 769 * 770 * Arguments: 771 * mem A pointer to the memory to be freed 772 * size The size of the memory being freed 773 * flags The original p->us_flags field 774 * 775 * Returns: 776 * Nothing 777 * 778 */ 779 static void 780 page_free(void *mem, int size, u_int8_t flags) 781 { 782 vm_map_t map; 783 if (flags & UMA_SLAB_KMEM) 784 map = kmem_map; 785 else if (flags & UMA_SLAB_KMAP) 786 map = kernel_map; 787 else 788 panic("UMA: page_free used with invalid flags %d\n", flags); 789 790 kmem_free(map, (vm_offset_t)mem, size); 791 } 792 793 /* 794 * Zero fill initializer 795 * 796 * Arguments/Returns follow uma_init specifications 797 * 798 */ 799 static void 800 zero_init(void *mem, int size) 801 { 802 bzero(mem, size); 803 } 804 805 /* 806 * Finish creating a small uma zone. This calculates ipers, and the zone size. 807 * 808 * Arguments 809 * zone The zone we should initialize 810 * 811 * Returns 812 * Nothing 813 */ 814 static void 815 zone_small_init(uma_zone_t zone) 816 { 817 int rsize; 818 int memused; 819 int ipers; 820 821 rsize = zone->uz_size; 822 823 if (rsize < UMA_SMALLEST_UNIT) 824 rsize = UMA_SMALLEST_UNIT; 825 826 if (rsize & zone->uz_align) 827 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); 828 829 zone->uz_rsize = rsize; 830 831 rsize += 1; /* Account for the byte of linkage */ 832 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 833 zone->uz_ppera = 1; 834 835 memused = zone->uz_ipers * zone->uz_rsize; 836 837 /* Can we do any better? */ 838 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { 839 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 840 return; 841 ipers = UMA_SLAB_SIZE / zone->uz_rsize; 842 if (ipers > zone->uz_ipers) { 843 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 844 zone->uz_ipers = ipers; 845 } 846 } 847 848 } 849 850 /* 851 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 852 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 853 * more complicated. 854 * 855 * Arguments 856 * zone The zone we should initialize 857 * 858 * Returns 859 * Nothing 860 */ 861 static void 862 zone_large_init(uma_zone_t zone) 863 { 864 int pages; 865 866 pages = zone->uz_size / UMA_SLAB_SIZE; 867 868 /* Account for remainder */ 869 if ((pages * UMA_SLAB_SIZE) < zone->uz_size) 870 pages++; 871 872 zone->uz_ppera = pages; 873 zone->uz_ipers = 1; 874 875 zone->uz_flags |= UMA_ZFLAG_OFFPAGE; 876 zone->uz_rsize = zone->uz_size; 877 } 878 879 /* 880 * Zone header ctor. This initializes all fields, locks, etc. And inserts 881 * the zone onto the global zone list. 882 * 883 * Arguments/Returns follow uma_ctor specifications 884 * udata Actually uma_zcreat_args 885 * 886 */ 887 888 static void 889 zone_ctor(void *mem, int size, void *udata) 890 { 891 struct uma_zctor_args *arg = udata; 892 uma_zone_t zone = mem; 893 int cplen; 894 int cpu; 895 896 bzero(zone, size); 897 zone->uz_name = arg->name; 898 zone->uz_size = arg->size; 899 zone->uz_ctor = arg->ctor; 900 zone->uz_dtor = arg->dtor; 901 zone->uz_init = arg->uminit; 902 zone->uz_align = arg->align; 903 zone->uz_free = 0; 904 zone->uz_pages = 0; 905 zone->uz_flags = 0; 906 zone->uz_allocf = page_alloc; 907 zone->uz_freef = page_free; 908 909 if (arg->flags & UMA_ZONE_ZINIT) 910 zone->uz_init = zero_init; 911 912 if (arg->flags & UMA_ZONE_INTERNAL) 913 zone->uz_flags |= UMA_ZFLAG_INTERNAL; 914 915 if (arg->flags & UMA_ZONE_MALLOC) 916 zone->uz_flags |= UMA_ZFLAG_MALLOC; 917 918 if (arg->flags & UMA_ZONE_NOFREE) 919 zone->uz_flags |= UMA_ZFLAG_NOFREE; 920 921 if (zone->uz_size > UMA_SLAB_SIZE) 922 zone_large_init(zone); 923 else 924 zone_small_init(zone); 925 926 /* We do this so that the per cpu lock name is unique for each zone */ 927 memcpy(zone->uz_lname, "PCPU ", 5); 928 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); 929 memcpy(zone->uz_lname+5, zone->uz_name, cplen); 930 zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; 931 932 /* 933 * If we're putting the slab header in the actual page we need to 934 * figure out where in each page it goes. This calculates a right 935 * justified offset into the memory on a ALIGN_PTR boundary. 936 */ 937 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { 938 int totsize; 939 int waste; 940 941 /* Size of the slab struct and free list */ 942 totsize = sizeof(struct uma_slab) + zone->uz_ipers; 943 if (totsize & UMA_ALIGN_PTR) 944 totsize = (totsize & ~UMA_ALIGN_PTR) + 945 (UMA_ALIGN_PTR + 1); 946 zone->uz_pgoff = UMA_SLAB_SIZE - totsize; 947 948 waste = zone->uz_pgoff; 949 waste -= (zone->uz_ipers * zone->uz_rsize); 950 951 /* 952 * This calculates how much space we have for cache line size 953 * optimizations. It works by offseting each slab slightly. 954 * Currently it breaks on x86, and so it is disabled. 955 */ 956 957 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { 958 zone->uz_cachemax = waste - UMA_CACHE_INC; 959 zone->uz_cacheoff = 0; 960 } 961 962 totsize = zone->uz_pgoff + sizeof(struct uma_slab) 963 + zone->uz_ipers; 964 /* I don't think it's possible, but I'll make sure anyway */ 965 if (totsize > UMA_SLAB_SIZE) { 966 printf("zone %s ipers %d rsize %d size %d\n", 967 zone->uz_name, zone->uz_ipers, zone->uz_rsize, 968 zone->uz_size); 969 panic("UMA slab won't fit.\n"); 970 } 971 } else { 972 /* hash_expand here to allocate the initial hash table */ 973 hash_expand(&zone->uz_hash); 974 zone->uz_pgoff = 0; 975 } 976 977 #ifdef UMA_DEBUG 978 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 979 zone->uz_name, zone, 980 zone->uz_size, zone->uz_ipers, 981 zone->uz_ppera, zone->uz_pgoff); 982 #endif 983 ZONE_LOCK_INIT(zone); 984 985 mtx_lock(&uma_mtx); 986 LIST_INSERT_HEAD(&uma_zones, zone, uz_link); 987 mtx_unlock(&uma_mtx); 988 989 /* 990 * Some internal zones don't have room allocated for the per cpu 991 * caches. If we're internal, bail out here. 992 */ 993 994 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 995 return; 996 997 for (cpu = 0; cpu < maxcpu; cpu++) { 998 if (zone->uz_ipers < UMA_BUCKET_SIZE) 999 zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1; 1000 else 1001 zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1; 1002 CPU_LOCK_INIT(zone, cpu); 1003 } 1004 } 1005 1006 /* 1007 * Traverses every zone in the system and calls a callback 1008 * 1009 * Arguments: 1010 * zfunc A pointer to a function which accepts a zone 1011 * as an argument. 1012 * 1013 * Returns: 1014 * Nothing 1015 */ 1016 static void 1017 zone_foreach(void (*zfunc)(uma_zone_t)) 1018 { 1019 uma_zone_t zone; 1020 1021 mtx_lock(&uma_mtx); 1022 LIST_FOREACH(zone, &uma_zones, uz_link) { 1023 zfunc(zone); 1024 } 1025 mtx_unlock(&uma_mtx); 1026 } 1027 1028 /* Public functions */ 1029 /* See uma.h */ 1030 void 1031 uma_startup(void *bootmem) 1032 { 1033 struct uma_zctor_args args; 1034 uma_slab_t slab; 1035 int slabsize; 1036 int i; 1037 1038 #ifdef UMA_DEBUG 1039 printf("Creating uma zone headers zone.\n"); 1040 #endif 1041 #ifdef SMP 1042 maxcpu = mp_maxid + 1; 1043 #else 1044 maxcpu = 1; 1045 #endif 1046 #ifdef UMA_DEBUG 1047 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); 1048 Debugger("stop"); 1049 #endif 1050 mtx_init(&uma_mtx, "UMA lock", MTX_DEF); 1051 /* "manually" Create the initial zone */ 1052 args.name = "UMA Zones"; 1053 args.size = sizeof(struct uma_zone) + 1054 (sizeof(struct uma_cache) * (maxcpu - 1)); 1055 args.ctor = zone_ctor; 1056 args.dtor = NULL; 1057 args.uminit = zero_init; 1058 args.fini = NULL; 1059 args.align = 32 - 1; 1060 args.flags = UMA_ZONE_INTERNAL; 1061 /* The initial zone has no Per cpu queues so it's smaller */ 1062 zone_ctor(zones, sizeof(struct uma_zone), &args); 1063 1064 #ifdef UMA_DEBUG 1065 printf("Filling boot free list.\n"); 1066 #endif 1067 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1068 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1069 slab->us_data = (u_int8_t *)slab; 1070 slab->us_flags = UMA_SLAB_BOOT; 1071 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1072 uma_boot_free++; 1073 } 1074 1075 #ifdef UMA_DEBUG 1076 printf("Creating slab zone.\n"); 1077 #endif 1078 1079 /* 1080 * This is the max number of free list items we'll have with 1081 * offpage slabs. 1082 */ 1083 1084 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1085 slabsize /= UMA_MAX_WASTE; 1086 slabsize++; /* In case there it's rounded */ 1087 slabsize += sizeof(struct uma_slab); 1088 1089 /* Now make a zone for slab headers */ 1090 slabzone = uma_zcreate("UMA Slabs", 1091 slabsize, 1092 NULL, NULL, NULL, NULL, 1093 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1094 1095 hashzone = uma_zcreate("UMA Hash", 1096 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1097 NULL, NULL, NULL, NULL, 1098 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1099 1100 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), 1101 NULL, NULL, NULL, NULL, 1102 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); 1103 1104 1105 #ifdef UMA_DEBUG 1106 printf("UMA startup complete.\n"); 1107 #endif 1108 } 1109 1110 /* see uma.h */ 1111 void 1112 uma_startup2(void *hashmem, u_long elems) 1113 { 1114 bzero(hashmem, elems * sizeof(void *)); 1115 mallochash->uh_slab_hash = hashmem; 1116 mallochash->uh_hashsize = elems; 1117 mallochash->uh_hashmask = elems - 1; 1118 booted = 1; 1119 #ifdef UMA_DEBUG 1120 printf("UMA startup2 complete.\n"); 1121 #endif 1122 } 1123 1124 /* 1125 * Initialize our callout handle 1126 * 1127 */ 1128 1129 static void 1130 uma_startup3(void) 1131 { 1132 #ifdef UMA_DEBUG 1133 printf("Starting callout.\n"); 1134 #endif 1135 /* We'll be mpsafe once the vm is locked. */ 1136 callout_init(&uma_callout, 0); 1137 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); 1138 #ifdef UMA_DEBUG 1139 printf("UMA startup3 complete.\n"); 1140 #endif 1141 } 1142 1143 /* See uma.h */ 1144 uma_zone_t 1145 uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, 1146 uma_fini fini, int align, u_int16_t flags) 1147 1148 { 1149 struct uma_zctor_args args; 1150 1151 /* This stuff is essential for the zone ctor */ 1152 args.name = name; 1153 args.size = size; 1154 args.ctor = ctor; 1155 args.dtor = dtor; 1156 args.uminit = uminit; 1157 args.fini = fini; 1158 args.align = align; 1159 args.flags = flags; 1160 1161 return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1)); 1162 } 1163 1164 /* See uma.h */ 1165 void * 1166 uma_zalloc_arg(uma_zone_t zone, void *udata, int wait) 1167 { 1168 void *item; 1169 uma_cache_t cache; 1170 uma_bucket_t bucket; 1171 int isitem; 1172 int cpu; 1173 1174 /* This is the fast path allocation */ 1175 #ifdef UMA_DEBUG_ALLOC_1 1176 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1177 #endif 1178 cpu = PCPU_GET(cpuid); 1179 CPU_LOCK(zone, cpu); 1180 cache = &zone->uz_cpu[cpu]; 1181 cache->uc_allocs++; 1182 1183 zalloc_start: 1184 bucket = cache->uc_allocbucket; 1185 1186 if (bucket) { 1187 if (bucket->ub_ptr > -1) { 1188 item = bucket->ub_bucket[bucket->ub_ptr]; 1189 #ifdef INVARIANTS 1190 bucket->ub_bucket[bucket->ub_ptr] = NULL; 1191 #endif 1192 bucket->ub_ptr--; 1193 KASSERT(item != NULL, 1194 ("uma_zalloc: Bucket pointer mangled.")); 1195 cache->uc_allocs++; 1196 CPU_UNLOCK(zone, cpu); 1197 if (zone->uz_ctor) 1198 zone->uz_ctor(item, zone->uz_size, udata); 1199 return (item); 1200 } else if (cache->uc_freebucket) { 1201 /* 1202 * We have run out of items in our allocbucket. 1203 * See if we can switch with our free bucket. 1204 */ 1205 if (cache->uc_freebucket->ub_ptr > -1) { 1206 uma_bucket_t swap; 1207 1208 #ifdef UMA_DEBUG_ALLOC 1209 printf("uma_zalloc: Swapping empty with alloc.\n"); 1210 #endif 1211 swap = cache->uc_freebucket; 1212 cache->uc_freebucket = cache->uc_allocbucket; 1213 cache->uc_allocbucket = swap; 1214 1215 goto zalloc_start; 1216 } 1217 } 1218 } 1219 /* 1220 * We can get here for three reasons: 1221 * 1222 * 1) The buckets are NULL 1223 * 2) The zone is INTERNAL, and so it has no buckets. 1224 * 3) The alloc and free buckets are both empty. 1225 * 1226 * Just handoff to uma_zalloc_internal to do the hard stuff 1227 * 1228 */ 1229 #ifdef UMA_DEBUG_ALLOC 1230 printf("uma_zalloc: Falling back to zalloc_internal.\n"); 1231 #endif 1232 1233 item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu); 1234 1235 #ifdef UMA_DEBUG 1236 printf("uma_zalloc: zalloc_internal completed.\n"); 1237 #endif 1238 1239 if (item && isitem == 0) 1240 goto zalloc_start; 1241 1242 /* 1243 * If isitem is set then we should just return it. The cpu lock 1244 * was unlocked when we couldn't get a bucket. 1245 */ 1246 return item; 1247 } 1248 1249 /* 1250 * Allocates an item for an internal zone OR fills a bucket 1251 * 1252 * Arguments 1253 * zone The zone to alloc for. 1254 * udata The data to be passed to the constructor. 1255 * wait M_WAITOK or M_NOWAIT. 1256 * isitem The returned value is an item if this is true. 1257 * cpu The cpu # of the cache that we should use, or -1. 1258 * 1259 * Returns 1260 * NULL if there is no memory and M_NOWAIT is set 1261 * An item if called on an interal zone 1262 * Non NULL if called to fill a bucket and it was successful. 1263 * 1264 * Discussion: 1265 * This was much cleaner before it had to do per cpu caches. It is 1266 * complicated now because it has to handle the simple internal case, and 1267 * the more involved bucket filling and allocation. The isitem is there 1268 * to remove a failure case. You shouldn't fail on allocating from a zone 1269 * because there were no buckets. This allows the exported zalloc to just 1270 * return the item. 1271 * 1272 */ 1273 1274 static void * 1275 uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu) 1276 { 1277 uma_bucket_t bucket; 1278 uma_cache_t cache; 1279 uma_slab_t slab; 1280 u_int8_t freei; 1281 void *item; 1282 1283 bucket = NULL; 1284 cache = NULL; 1285 item = NULL; 1286 1287 /* 1288 * This is to stop us from allocating per cpu buckets while we're running 1289 * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages. 1290 */ 1291 1292 if (!booted && zone == bucketzone) 1293 return (NULL); 1294 1295 #ifdef UMA_DEBUG_ALLOC 1296 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1297 #endif 1298 if (isitem != NULL) 1299 *isitem = 0; 1300 1301 ZONE_LOCK(zone); 1302 1303 /* We got here because we need to fill some buckets */ 1304 if (cpu != -1) { 1305 cache = &zone->uz_cpu[cpu]; 1306 1307 zone->uz_allocs += cache->uc_allocs; 1308 /* Check the free list */ 1309 bucket = LIST_FIRST(&zone->uz_full_bucket); 1310 if (bucket) { 1311 LIST_REMOVE(bucket, ub_link); 1312 /* Our old one is now a free bucket */ 1313 if (cache->uc_allocbucket) { 1314 KASSERT(cache->uc_allocbucket->ub_ptr == -1, 1315 ("uma_zalloc_internal: Freeing a non free bucket.")); 1316 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1317 cache->uc_allocbucket, ub_link); 1318 } 1319 KASSERT(bucket->ub_ptr != -1, 1320 ("uma_zalloc_internal: Returning an empty bucket.")); 1321 /*zone->uz_free -= bucket->ub_ptr + 1;*/ 1322 cache->uc_allocbucket = bucket; 1323 ZONE_UNLOCK(zone); 1324 return (bucket); 1325 } 1326 /* Bump up our uc_count so we get here less */ 1327 if (cache->uc_count < UMA_BUCKET_SIZE - 1) 1328 cache->uc_count++; 1329 /* Nothing on the free list, try to re-use the old one */ 1330 bucket = cache->uc_allocbucket; 1331 if (bucket == NULL) { 1332 /* Nope, we need a new one */ 1333 CPU_UNLOCK(zone, cpu); 1334 ZONE_UNLOCK(zone); 1335 bucket = uma_zalloc_internal(bucketzone, 1336 NULL, wait, NULL, -1); 1337 CPU_LOCK(zone, cpu); 1338 ZONE_LOCK(zone); 1339 /* Did we lose the race? */ 1340 if (cache->uc_allocbucket) { 1341 #ifdef UMA_DEBUG 1342 printf("uma_zalloc_internal: Lost race with another CPU.\n"); 1343 #endif 1344 if (bucket) 1345 uma_zfree_internal(bucketzone, 1346 bucket, NULL, 0); 1347 ZONE_UNLOCK(zone); 1348 return (cache->uc_allocbucket); 1349 } 1350 cache->uc_allocbucket = bucket; 1351 1352 if (bucket) { 1353 #ifdef INVARIANTS 1354 bzero(bucket, bucketzone->uz_size); 1355 #endif 1356 bucket->ub_ptr = -1; 1357 } else { 1358 /* 1359 * We may not get a bucket if we recurse, so 1360 * return an actual item. The rest of this code 1361 * does the right thing if the cache is NULL. 1362 */ 1363 #ifdef UMA_DEBUG 1364 printf("uma_zalloc_internal: Bucketzone returned NULL\n"); 1365 #endif 1366 CPU_UNLOCK(zone, cpu); 1367 cache = NULL; 1368 cpu = -1; 1369 } 1370 } 1371 } 1372 1373 new_slab: 1374 1375 /* Find a slab with some space */ 1376 if (zone->uz_free) { 1377 if (!LIST_EMPTY(&zone->uz_part_slab)) { 1378 slab = LIST_FIRST(&zone->uz_part_slab); 1379 } else { 1380 slab = LIST_FIRST(&zone->uz_free_slab); 1381 LIST_REMOVE(slab, us_link); 1382 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1383 } 1384 } else { 1385 /* 1386 * This is to prevent us from recursively trying to allocate 1387 * buckets. The problem is that if an allocation forces us to 1388 * grab a new bucket we will call page_alloc, which will go off 1389 * and cause the vm to allocate vm_map_entries. If we need new 1390 * buckets there too we will recurse in kmem_alloc and bad 1391 * things happen. So instead we return a NULL bucket, and make 1392 * the code that allocates buckets smart enough to deal with it */ 1393 if (zone == bucketzone && zone->uz_recurse != 0) { 1394 ZONE_UNLOCK(zone); 1395 return (NULL); 1396 } 1397 zone->uz_recurse++; 1398 slab = slab_zalloc(zone, wait); 1399 zone->uz_recurse--; 1400 if (slab) { 1401 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1402 /* 1403 * We might not have been able to get a page, but another cpu 1404 * could have while we were unlocked. 1405 */ 1406 } else if (zone->uz_free == 0) { 1407 ZONE_UNLOCK(zone); 1408 /* If we're filling a bucket return what we have */ 1409 if (bucket != NULL && bucket->ub_ptr != -1) { 1410 return (bucket); 1411 } else 1412 return (NULL); 1413 } else { 1414 /* Another cpu must have succeeded */ 1415 if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) { 1416 slab = LIST_FIRST(&zone->uz_free_slab); 1417 LIST_REMOVE(slab, us_link); 1418 LIST_INSERT_HEAD(&zone->uz_part_slab, 1419 slab, us_link); 1420 } 1421 } 1422 } 1423 1424 while (slab->us_freecount) { 1425 freei = slab->us_firstfree; 1426 slab->us_firstfree = slab->us_freelist[freei]; 1427 #ifdef INVARIANTS 1428 slab->us_freelist[freei] = 255; 1429 #endif 1430 slab->us_freecount--; 1431 zone->uz_free--; 1432 item = slab->us_data + (zone->uz_rsize * freei); 1433 1434 if (cache == NULL) { 1435 zone->uz_allocs++; 1436 break; 1437 } 1438 1439 bucket->ub_bucket[++bucket->ub_ptr] = item; 1440 1441 /* Don't overfill the bucket! */ 1442 if (bucket->ub_ptr == cache->uc_count) 1443 break; 1444 } 1445 1446 /* Move this slab to the full list */ 1447 if (slab->us_freecount == 0) { 1448 LIST_REMOVE(slab, us_link); 1449 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); 1450 } 1451 1452 if (cache != NULL) { 1453 /* Try to keep the buckets totally full, but don't block */ 1454 if (bucket->ub_ptr < cache->uc_count) { 1455 wait = M_NOWAIT; 1456 goto new_slab; 1457 } 1458 } 1459 1460 ZONE_UNLOCK(zone); 1461 1462 /* Only construct at this time if we're not filling a bucket */ 1463 if (cache == NULL) { 1464 if (zone->uz_ctor) 1465 zone->uz_ctor(item, zone->uz_size, udata); 1466 1467 if (isitem != NULL) 1468 *isitem = 1; 1469 } 1470 1471 return (item); 1472 } 1473 1474 /* See uma.h */ 1475 void 1476 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1477 { 1478 uma_cache_t cache; 1479 uma_bucket_t bucket; 1480 int cpu; 1481 1482 /* This is the fast path free */ 1483 #ifdef UMA_DEBUG_ALLOC_1 1484 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1485 #endif 1486 cpu = PCPU_GET(cpuid); 1487 CPU_LOCK(zone, cpu); 1488 cache = &zone->uz_cpu[cpu]; 1489 1490 zfree_start: 1491 bucket = cache->uc_freebucket; 1492 1493 if (bucket) { 1494 /* Do we have room in our bucket? */ 1495 if (bucket->ub_ptr < cache->uc_count) { 1496 bucket->ub_ptr++; 1497 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, 1498 ("uma_zfree: Freeing to non free bucket index.")); 1499 bucket->ub_bucket[bucket->ub_ptr] = item; 1500 CPU_UNLOCK(zone, cpu); 1501 if (zone->uz_dtor) 1502 zone->uz_dtor(item, zone->uz_size, udata); 1503 return; 1504 } else if (cache->uc_allocbucket) { 1505 #ifdef UMA_DEBUG_ALLOC 1506 printf("uma_zfree: Swapping buckets.\n"); 1507 #endif 1508 /* 1509 * We have run out of space in our freebucket. 1510 * See if we can switch with our alloc bucket. 1511 */ 1512 if (cache->uc_allocbucket->ub_ptr < 1513 cache->uc_freebucket->ub_ptr) { 1514 uma_bucket_t swap; 1515 1516 swap = cache->uc_freebucket; 1517 cache->uc_freebucket = cache->uc_allocbucket; 1518 cache->uc_allocbucket = swap; 1519 1520 goto zfree_start; 1521 } 1522 } 1523 } 1524 1525 /* 1526 * We can get here for three reasons: 1527 * 1528 * 1) The buckets are NULL 1529 * 2) The zone is INTERNAL, and so it has no buckets. 1530 * 3) The alloc and free buckets are both somewhat full. 1531 * 1532 */ 1533 1534 ZONE_LOCK(zone); 1535 1536 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { 1537 bucket = cache->uc_freebucket; 1538 cache->uc_freebucket = NULL; 1539 1540 /* Can we throw this on the zone full list? */ 1541 if (bucket != NULL) { 1542 #ifdef UMA_DEBUG_ALLOC 1543 printf("uma_zfree: Putting old bucket on the free list.\n"); 1544 #endif 1545 /* ub_ptr is pointing to the last free item */ 1546 KASSERT(bucket->ub_ptr != -1, 1547 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 1548 /*zone->uz_free += bucket->ub_ptr + 1;*/ 1549 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1550 bucket, ub_link); 1551 bucket = LIST_FIRST(&zone->uz_free_bucket); 1552 if (bucket) 1553 LIST_REMOVE(bucket, ub_link); 1554 } 1555 /* 1556 * Do we need to alloc one? Either the freebucket was NULL 1557 * or the free_bucket list was empty. 1558 */ 1559 if (bucket == NULL) { 1560 #ifdef UMA_DEBUG_ALLOC 1561 printf("uma_zfree: Allocating new free bucket.\n"); 1562 #endif 1563 /* This has to be done so we don't recurse on a lock */ 1564 ZONE_UNLOCK(zone); 1565 CPU_UNLOCK(zone, cpu); 1566 bucket = uma_zalloc_internal(bucketzone, 1567 NULL, M_NOWAIT, NULL, -1); 1568 CPU_LOCK(zone, cpu); 1569 ZONE_LOCK(zone); 1570 if (bucket) { 1571 #ifdef INVARIANTS 1572 bzero(bucket, bucketzone->uz_size); 1573 #endif 1574 bucket->ub_ptr = -1; 1575 } 1576 /* Did we lose the race? */ 1577 if (cache->uc_freebucket != NULL) { 1578 if (bucket) 1579 uma_zfree_internal(bucketzone, 1580 bucket, NULL, 0); 1581 ZONE_UNLOCK(zone); 1582 goto zfree_start; 1583 } 1584 /* If we couldn't get one just free directly */ 1585 if (bucket == NULL) 1586 goto zfree_internal; 1587 } 1588 cache->uc_freebucket = bucket; 1589 ZONE_UNLOCK(zone); 1590 goto zfree_start; 1591 } 1592 1593 zfree_internal: 1594 1595 CPU_UNLOCK(zone, cpu); 1596 ZONE_UNLOCK(zone); 1597 uma_zfree_internal(zone, item, udata, 0); 1598 1599 return; 1600 1601 } 1602 1603 /* 1604 * Frees an item to an INTERNAL zone or allocates a free bucket 1605 * 1606 * Arguments: 1607 * zone The zone to free to 1608 * item The item we're freeing 1609 * udata User supplied data for the dtor 1610 * skip Skip the dtor, it was done in uma_zfree_arg 1611 */ 1612 1613 static void 1614 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 1615 { 1616 uma_slab_t slab; 1617 u_int8_t *mem; 1618 u_int8_t freei; 1619 1620 ZONE_LOCK(zone); 1621 1622 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { 1623 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 1624 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) 1625 slab = hash_sfind(&zone->uz_hash, mem); 1626 else { 1627 mem += zone->uz_pgoff; 1628 slab = (uma_slab_t)mem; 1629 } 1630 } else { 1631 slab = (uma_slab_t)udata; 1632 } 1633 1634 /* Do we need to remove from any lists? */ 1635 if (slab->us_freecount+1 == zone->uz_ipers) { 1636 LIST_REMOVE(slab, us_link); 1637 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1638 } else if (slab->us_freecount == 0) { 1639 LIST_REMOVE(slab, us_link); 1640 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); 1641 } 1642 1643 /* Slab management stuff */ 1644 freei = ((unsigned long)item - (unsigned long)slab->us_data) 1645 / zone->uz_rsize; 1646 #ifdef INVARIANTS 1647 if (((freei * zone->uz_rsize) + slab->us_data) != item) 1648 panic("zone: %s(%p) slab %p freed address %p unaligned.\n", 1649 zone->uz_name, zone, slab, item); 1650 if (freei >= zone->uz_ipers) 1651 panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", 1652 zone->uz_name, zone, slab, freei, zone->uz_ipers-1); 1653 1654 if (slab->us_freelist[freei] != 255) { 1655 printf("Slab at %p, freei %d = %d.\n", 1656 slab, freei, slab->us_freelist[freei]); 1657 panic("Duplicate free of item %p from zone %p(%s)\n", 1658 item, zone, zone->uz_name); 1659 } 1660 #endif 1661 slab->us_freelist[freei] = slab->us_firstfree; 1662 slab->us_firstfree = freei; 1663 slab->us_freecount++; 1664 1665 /* Zone statistics */ 1666 zone->uz_free++; 1667 1668 ZONE_UNLOCK(zone); 1669 1670 if (!skip && zone->uz_dtor) 1671 zone->uz_dtor(item, zone->uz_size, udata); 1672 } 1673 1674 /* See uma.h */ 1675 void 1676 uma_zone_set_max(uma_zone_t zone, int nitems) 1677 { 1678 ZONE_LOCK(zone); 1679 if (zone->uz_ppera > 1) 1680 zone->uz_maxpages = nitems / zone->uz_ppera; 1681 else 1682 zone->uz_maxpages = nitems / zone->uz_ipers; 1683 ZONE_UNLOCK(zone); 1684 } 1685 1686 /* See uma.h */ 1687 void 1688 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 1689 { 1690 ZONE_LOCK(zone); 1691 1692 zone->uz_freef = freef; 1693 1694 ZONE_UNLOCK(zone); 1695 } 1696 1697 /* See uma.h */ 1698 void 1699 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 1700 { 1701 ZONE_LOCK(zone); 1702 1703 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; 1704 zone->uz_allocf = allocf; 1705 1706 ZONE_UNLOCK(zone); 1707 } 1708 1709 /* See uma.h */ 1710 int 1711 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 1712 { 1713 int pages; 1714 vm_offset_t kva; 1715 1716 ZONE_LOCK(zone); 1717 mtx_lock(&Giant); 1718 1719 zone->uz_obj = obj; 1720 pages = count / zone->uz_ipers; 1721 1722 if (pages * zone->uz_ipers < count) 1723 pages++; 1724 zone->uz_kva = NULL; 1725 ZONE_UNLOCK(zone); 1726 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 1727 ZONE_LOCK(zone); 1728 1729 zone->uz_kva = kva; 1730 1731 if (zone->uz_kva == 0) { 1732 ZONE_UNLOCK(zone); 1733 return (0); 1734 } 1735 1736 zone->uz_maxpages = pages; 1737 1738 if (zone->uz_obj == NULL) 1739 zone->uz_obj = vm_object_allocate(OBJT_DEFAULT, 1740 zone->uz_maxpages); 1741 else 1742 _vm_object_allocate(OBJT_DEFAULT, 1743 zone->uz_maxpages, zone->uz_obj); 1744 1745 zone->uz_allocf = obj_alloc; 1746 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; 1747 1748 mtx_unlock(&Giant); 1749 ZONE_UNLOCK(zone); 1750 1751 return (1); 1752 } 1753 1754 /* See uma.h */ 1755 void 1756 uma_prealloc(uma_zone_t zone, int items) 1757 { 1758 int slabs; 1759 uma_slab_t slab; 1760 1761 ZONE_LOCK(zone); 1762 slabs = items / zone->uz_ipers; 1763 if (slabs * zone->uz_ipers < items) 1764 slabs++; 1765 1766 while (slabs > 0) { 1767 slab = slab_zalloc(zone, M_WAITOK); 1768 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); 1769 slabs--; 1770 } 1771 ZONE_UNLOCK(zone); 1772 } 1773 1774 /* See uma.h */ 1775 void 1776 uma_reclaim(void) 1777 { 1778 /* 1779 * You might think that the delay below would improve performance since 1780 * the allocator will give away memory that it may ask for immediately. 1781 * Really, it makes things worse, since cpu cycles are so much cheaper 1782 * than disk activity. 1783 */ 1784 #if 0 1785 static struct timeval tv = {0}; 1786 struct timeval now; 1787 getmicrouptime(&now); 1788 if (now.tv_sec > tv.tv_sec + 30) 1789 tv = now; 1790 else 1791 return; 1792 #endif 1793 #ifdef UMA_DEBUG 1794 printf("UMA: vm asked us to release pages!\n"); 1795 #endif 1796 zone_foreach(zone_drain); 1797 1798 /* 1799 * Some slabs may have been freed but this zone will be visited early 1800 * we visit again so that we can free pages that are empty once other 1801 * zones are drained. We have to do the same for buckets. 1802 */ 1803 zone_drain(slabzone); 1804 zone_drain(bucketzone); 1805 } 1806 1807 void * 1808 uma_large_malloc(int size, int wait) 1809 { 1810 void *mem; 1811 uma_slab_t slab; 1812 u_int8_t flags; 1813 1814 slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); 1815 if (slab == NULL) 1816 return (NULL); 1817 1818 mem = page_alloc(NULL, size, &flags, wait); 1819 if (mem) { 1820 slab->us_data = mem; 1821 slab->us_flags = flags | UMA_SLAB_MALLOC; 1822 slab->us_size = size; 1823 UMA_HASH_INSERT(mallochash, slab, mem); 1824 } else { 1825 uma_zfree_internal(slabzone, slab, NULL, 0); 1826 } 1827 1828 1829 return (mem); 1830 } 1831 1832 void 1833 uma_large_free(uma_slab_t slab) 1834 { 1835 UMA_HASH_REMOVE(mallochash, slab, slab->us_data); 1836 page_free(slab->us_data, slab->us_size, slab->us_flags); 1837 uma_zfree_internal(slabzone, slab, NULL, 0); 1838 } 1839 1840 void 1841 uma_print_stats(void) 1842 { 1843 zone_foreach(uma_print_zone); 1844 } 1845 1846 void 1847 uma_print_zone(uma_zone_t zone) 1848 { 1849 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 1850 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, 1851 zone->uz_ipers, zone->uz_ppera, 1852 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); 1853 } 1854 1855 /* 1856 * Sysctl handler for vm.zone 1857 * 1858 * stolen from vm_zone.c 1859 */ 1860 static int 1861 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 1862 { 1863 int error, len, cnt; 1864 const int linesize = 128; /* conservative */ 1865 int totalfree; 1866 char *tmpbuf, *offset; 1867 uma_zone_t z; 1868 char *p; 1869 1870 cnt = 0; 1871 LIST_FOREACH(z, &uma_zones, uz_link) 1872 cnt++; 1873 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 1874 M_TEMP, M_WAITOK); 1875 len = snprintf(tmpbuf, linesize, 1876 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 1877 if (cnt == 0) 1878 tmpbuf[len - 1] = '\0'; 1879 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 1880 if (error || cnt == 0) 1881 goto out; 1882 offset = tmpbuf; 1883 mtx_lock(&uma_mtx); 1884 LIST_FOREACH(z, &uma_zones, uz_link) { 1885 if (cnt == 0) /* list may have changed size */ 1886 break; 1887 ZONE_LOCK(z); 1888 totalfree = z->uz_free + z->uz_cachefree; 1889 len = snprintf(offset, linesize, 1890 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 1891 z->uz_name, z->uz_size, 1892 z->uz_maxpages * z->uz_ipers, 1893 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, 1894 totalfree, 1895 (unsigned long long)z->uz_allocs); 1896 ZONE_UNLOCK(z); 1897 for (p = offset + 12; p > offset && *p == ' '; --p) 1898 /* nothing */ ; 1899 p[1] = ':'; 1900 cnt--; 1901 offset += len; 1902 } 1903 mtx_unlock(&uma_mtx); 1904 *offset++ = '\0'; 1905 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 1906 out: 1907 FREE(tmpbuf, M_TEMP); 1908 return (error); 1909 } 1910