1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * uma_core.c Implementation of the Universal Memory allocator 29 * 30 * This allocator is intended to replace the multitude of similar object caches 31 * in the standard FreeBSD kernel. The intent is to be flexible as well as 32 * effecient. A primary design goal is to return unused memory to the rest of 33 * the system. This will make the system as a whole more flexible due to the 34 * ability to move memory to subsystems which most need it instead of leaving 35 * pools of reserved memory unused. 36 * 37 * The basic ideas stem from similar slab/zone based allocators whose algorithms 38 * are well known. 39 * 40 */ 41 42 /* 43 * TODO: 44 * - Improve memory usage for large allocations 45 * - Investigate cache size adjustments 46 */ 47 48 #include <sys/cdefs.h> 49 __FBSDID("$FreeBSD$"); 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 #include "opt_param.h" 59 #include <sys/param.h> 60 #include <sys/systm.h> 61 #include <sys/kernel.h> 62 #include <sys/types.h> 63 #include <sys/queue.h> 64 #include <sys/malloc.h> 65 #include <sys/lock.h> 66 #include <sys/sysctl.h> 67 #include <sys/mutex.h> 68 #include <sys/proc.h> 69 #include <sys/smp.h> 70 #include <sys/vmmeter.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_param.h> 76 #include <vm/vm_map.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 #include <vm/uma.h> 80 #include <vm/uma_int.h> 81 #include <vm/uma_dbg.h> 82 83 #include <machine/vmparam.h> 84 85 /* 86 * This is the zone and keg from which all zones are spawned. The idea is that 87 * even the zone & keg heads are allocated from the allocator, so we use the 88 * bss section to bootstrap us. 89 */ 90 static struct uma_keg masterkeg; 91 static struct uma_zone masterzone_k; 92 static struct uma_zone masterzone_z; 93 static uma_zone_t kegs = &masterzone_k; 94 static uma_zone_t zones = &masterzone_z; 95 96 /* This is the zone from which all of uma_slab_t's are allocated. */ 97 static uma_zone_t slabzone; 98 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 99 100 /* 101 * The initial hash tables come out of this zone so they can be allocated 102 * prior to malloc coming up. 103 */ 104 static uma_zone_t hashzone; 105 106 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 107 108 /* 109 * Are we allowed to allocate buckets? 110 */ 111 static int bucketdisable = 1; 112 113 /* Linked list of all kegs in the system */ 114 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 115 116 /* This mutex protects the keg list */ 117 static struct mtx uma_mtx; 118 119 /* These are the pcpu cache locks */ 120 static struct mtx uma_pcpu_mtx[MAXCPU]; 121 122 /* Linked list of boot time pages */ 123 static LIST_HEAD(,uma_slab) uma_boot_pages = 124 LIST_HEAD_INITIALIZER(&uma_boot_pages); 125 126 /* Count of free boottime pages */ 127 static int uma_boot_free = 0; 128 129 /* Is the VM done starting up? */ 130 static int booted = 0; 131 132 /* 133 * This is the handle used to schedule events that need to happen 134 * outside of the allocation fast path. 135 */ 136 static struct callout uma_callout; 137 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 138 139 /* 140 * This structure is passed as the zone ctor arg so that I don't have to create 141 * a special allocation function just for zones. 142 */ 143 struct uma_zctor_args { 144 char *name; 145 size_t size; 146 uma_ctor ctor; 147 uma_dtor dtor; 148 uma_init uminit; 149 uma_fini fini; 150 uma_keg_t keg; 151 int align; 152 u_int16_t flags; 153 }; 154 155 struct uma_kctor_args { 156 uma_zone_t zone; 157 size_t size; 158 uma_init uminit; 159 uma_fini fini; 160 int align; 161 u_int16_t flags; 162 }; 163 164 struct uma_bucket_zone { 165 uma_zone_t ubz_zone; 166 char *ubz_name; 167 int ubz_entries; 168 }; 169 170 #define BUCKET_MAX 128 171 172 struct uma_bucket_zone bucket_zones[] = { 173 { NULL, "16 Bucket", 16 }, 174 { NULL, "32 Bucket", 32 }, 175 { NULL, "64 Bucket", 64 }, 176 { NULL, "128 Bucket", 128 }, 177 { NULL, NULL, 0} 178 }; 179 180 #define BUCKET_SHIFT 4 181 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 182 183 uint8_t bucket_size[BUCKET_ZONES]; 184 185 /* Prototypes.. */ 186 187 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 188 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 189 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 190 static void page_free(void *, int, u_int8_t); 191 static uma_slab_t slab_zalloc(uma_zone_t, int); 192 static void cache_drain(uma_zone_t); 193 static void bucket_drain(uma_zone_t, uma_bucket_t); 194 static void bucket_cache_drain(uma_zone_t zone); 195 static void keg_ctor(void *, int, void *); 196 static void keg_dtor(void *, int, void *); 197 static void zone_ctor(void *, int, void *); 198 static void zone_dtor(void *, int, void *); 199 static void zero_init(void *, int); 200 static void zone_small_init(uma_zone_t zone); 201 static void zone_large_init(uma_zone_t zone); 202 static void zone_foreach(void (*zfunc)(uma_zone_t)); 203 static void zone_timeout(uma_zone_t zone); 204 static int hash_alloc(struct uma_hash *); 205 static int hash_expand(struct uma_hash *, struct uma_hash *); 206 static void hash_free(struct uma_hash *hash); 207 static void uma_timeout(void *); 208 static void uma_startup3(void); 209 static void *uma_zalloc_internal(uma_zone_t, void *, int); 210 static void uma_zfree_internal(uma_zone_t, void *, void *, int); 211 static void bucket_enable(void); 212 static void bucket_init(void); 213 static uma_bucket_t bucket_alloc(int, int); 214 static void bucket_free(uma_bucket_t); 215 static void bucket_zone_drain(void); 216 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 217 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 218 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 219 static void zone_drain(uma_zone_t); 220 static void uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 221 uma_fini fini, int align, u_int16_t flags); 222 223 void uma_print_zone(uma_zone_t); 224 void uma_print_stats(void); 225 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 226 227 static int nosleepwithlocks = 0; 228 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 229 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 230 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 231 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 232 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 233 234 /* 235 * This routine checks to see whether or not it's safe to enable buckets. 236 */ 237 238 static void 239 bucket_enable(void) 240 { 241 if (cnt.v_free_count < cnt.v_free_min) 242 bucketdisable = 1; 243 else 244 bucketdisable = 0; 245 } 246 247 static void 248 bucket_init(void) 249 { 250 struct uma_bucket_zone *ubz; 251 int i; 252 int j; 253 254 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 255 int size; 256 257 ubz = &bucket_zones[j]; 258 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 259 size += sizeof(void *) * ubz->ubz_entries; 260 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 261 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 262 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 263 bucket_size[i >> BUCKET_SHIFT] = j; 264 } 265 } 266 267 static uma_bucket_t 268 bucket_alloc(int entries, int bflags) 269 { 270 struct uma_bucket_zone *ubz; 271 uma_bucket_t bucket; 272 int idx; 273 274 /* 275 * This is to stop us from allocating per cpu buckets while we're 276 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 277 * boot pages. This also prevents us from allocating buckets in 278 * low memory situations. 279 */ 280 281 if (bucketdisable) 282 return (NULL); 283 idx = howmany(entries, 1 << BUCKET_SHIFT); 284 ubz = &bucket_zones[bucket_size[idx]]; 285 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 286 if (bucket) { 287 #ifdef INVARIANTS 288 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 289 #endif 290 bucket->ub_cnt = 0; 291 bucket->ub_entries = ubz->ubz_entries; 292 } 293 294 return (bucket); 295 } 296 297 static void 298 bucket_free(uma_bucket_t bucket) 299 { 300 struct uma_bucket_zone *ubz; 301 int idx; 302 303 idx = howmany(bucket->ub_entries, 1 << BUCKET_SHIFT); 304 ubz = &bucket_zones[bucket_size[idx]]; 305 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, 0); 306 } 307 308 static void 309 bucket_zone_drain(void) 310 { 311 struct uma_bucket_zone *ubz; 312 313 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 314 zone_drain(ubz->ubz_zone); 315 } 316 317 318 /* 319 * Routine called by timeout which is used to fire off some time interval 320 * based calculations. (stats, hash size, etc.) 321 * 322 * Arguments: 323 * arg Unused 324 * 325 * Returns: 326 * Nothing 327 */ 328 static void 329 uma_timeout(void *unused) 330 { 331 bucket_enable(); 332 zone_foreach(zone_timeout); 333 334 /* Reschedule this event */ 335 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 336 } 337 338 /* 339 * Routine to perform timeout driven calculations. This expands the 340 * hashes and does per cpu statistics aggregation. 341 * 342 * Arguments: 343 * zone The zone to operate on 344 * 345 * Returns: 346 * Nothing 347 */ 348 static void 349 zone_timeout(uma_zone_t zone) 350 { 351 uma_keg_t keg; 352 uma_cache_t cache; 353 u_int64_t alloc; 354 int cpu; 355 356 keg = zone->uz_keg; 357 alloc = 0; 358 359 /* 360 * Aggregate per cpu cache statistics back to the zone. 361 * 362 * XXX This should be done in the sysctl handler. 363 * 364 * I may rewrite this to set a flag in the per cpu cache instead of 365 * locking. If the flag is not cleared on the next round I will have 366 * to lock and do it here instead so that the statistics don't get too 367 * far out of sync. 368 */ 369 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) { 370 for (cpu = 0; cpu <= mp_maxid; cpu++) { 371 if (CPU_ABSENT(cpu)) 372 continue; 373 CPU_LOCK(cpu); 374 cache = &zone->uz_cpu[cpu]; 375 /* Add them up, and reset */ 376 alloc += cache->uc_allocs; 377 cache->uc_allocs = 0; 378 CPU_UNLOCK(cpu); 379 } 380 } 381 382 /* Now push these stats back into the zone.. */ 383 ZONE_LOCK(zone); 384 zone->uz_allocs += alloc; 385 386 /* 387 * Expand the zone hash table. 388 * 389 * This is done if the number of slabs is larger than the hash size. 390 * What I'm trying to do here is completely reduce collisions. This 391 * may be a little aggressive. Should I allow for two collisions max? 392 */ 393 394 if (keg->uk_flags & UMA_ZONE_HASH && 395 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 396 struct uma_hash newhash; 397 struct uma_hash oldhash; 398 int ret; 399 400 /* 401 * This is so involved because allocating and freeing 402 * while the zone lock is held will lead to deadlock. 403 * I have to do everything in stages and check for 404 * races. 405 */ 406 newhash = keg->uk_hash; 407 ZONE_UNLOCK(zone); 408 ret = hash_alloc(&newhash); 409 ZONE_LOCK(zone); 410 if (ret) { 411 if (hash_expand(&keg->uk_hash, &newhash)) { 412 oldhash = keg->uk_hash; 413 keg->uk_hash = newhash; 414 } else 415 oldhash = newhash; 416 417 ZONE_UNLOCK(zone); 418 hash_free(&oldhash); 419 ZONE_LOCK(zone); 420 } 421 } 422 ZONE_UNLOCK(zone); 423 } 424 425 /* 426 * Allocate and zero fill the next sized hash table from the appropriate 427 * backing store. 428 * 429 * Arguments: 430 * hash A new hash structure with the old hash size in uh_hashsize 431 * 432 * Returns: 433 * 1 on sucess and 0 on failure. 434 */ 435 static int 436 hash_alloc(struct uma_hash *hash) 437 { 438 int oldsize; 439 int alloc; 440 441 oldsize = hash->uh_hashsize; 442 443 /* We're just going to go to a power of two greater */ 444 if (oldsize) { 445 hash->uh_hashsize = oldsize * 2; 446 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 447 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 448 M_UMAHASH, M_NOWAIT); 449 } else { 450 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 451 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 452 M_WAITOK); 453 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 454 } 455 if (hash->uh_slab_hash) { 456 bzero(hash->uh_slab_hash, alloc); 457 hash->uh_hashmask = hash->uh_hashsize - 1; 458 return (1); 459 } 460 461 return (0); 462 } 463 464 /* 465 * Expands the hash table for HASH zones. This is done from zone_timeout 466 * to reduce collisions. This must not be done in the regular allocation 467 * path, otherwise, we can recurse on the vm while allocating pages. 468 * 469 * Arguments: 470 * oldhash The hash you want to expand 471 * newhash The hash structure for the new table 472 * 473 * Returns: 474 * Nothing 475 * 476 * Discussion: 477 */ 478 static int 479 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 480 { 481 uma_slab_t slab; 482 int hval; 483 int i; 484 485 if (!newhash->uh_slab_hash) 486 return (0); 487 488 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 489 return (0); 490 491 /* 492 * I need to investigate hash algorithms for resizing without a 493 * full rehash. 494 */ 495 496 for (i = 0; i < oldhash->uh_hashsize; i++) 497 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 498 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 499 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 500 hval = UMA_HASH(newhash, slab->us_data); 501 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 502 slab, us_hlink); 503 } 504 505 return (1); 506 } 507 508 /* 509 * Free the hash bucket to the appropriate backing store. 510 * 511 * Arguments: 512 * slab_hash The hash bucket we're freeing 513 * hashsize The number of entries in that hash bucket 514 * 515 * Returns: 516 * Nothing 517 */ 518 static void 519 hash_free(struct uma_hash *hash) 520 { 521 if (hash->uh_slab_hash == NULL) 522 return; 523 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 524 uma_zfree_internal(hashzone, 525 hash->uh_slab_hash, NULL, 0); 526 else 527 free(hash->uh_slab_hash, M_UMAHASH); 528 } 529 530 /* 531 * Frees all outstanding items in a bucket 532 * 533 * Arguments: 534 * zone The zone to free to, must be unlocked. 535 * bucket The free/alloc bucket with items, cpu queue must be locked. 536 * 537 * Returns: 538 * Nothing 539 */ 540 541 static void 542 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 543 { 544 uma_slab_t slab; 545 int mzone; 546 void *item; 547 548 if (bucket == NULL) 549 return; 550 551 slab = NULL; 552 mzone = 0; 553 554 /* We have to lookup the slab again for malloc.. */ 555 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 556 mzone = 1; 557 558 while (bucket->ub_cnt > 0) { 559 bucket->ub_cnt--; 560 item = bucket->ub_bucket[bucket->ub_cnt]; 561 #ifdef INVARIANTS 562 bucket->ub_bucket[bucket->ub_cnt] = NULL; 563 KASSERT(item != NULL, 564 ("bucket_drain: botched ptr, item is NULL")); 565 #endif 566 /* 567 * This is extremely inefficient. The slab pointer was passed 568 * to uma_zfree_arg, but we lost it because the buckets don't 569 * hold them. This will go away when free() gets a size passed 570 * to it. 571 */ 572 if (mzone) 573 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 574 uma_zfree_internal(zone, item, slab, 1); 575 } 576 } 577 578 /* 579 * Drains the per cpu caches for a zone. 580 * 581 * Arguments: 582 * zone The zone to drain, must be unlocked. 583 * 584 * Returns: 585 * Nothing 586 */ 587 static void 588 cache_drain(uma_zone_t zone) 589 { 590 uma_cache_t cache; 591 int cpu; 592 593 /* 594 * We have to lock each cpu cache before locking the zone 595 */ 596 for (cpu = 0; cpu <= mp_maxid; cpu++) { 597 if (CPU_ABSENT(cpu)) 598 continue; 599 CPU_LOCK(cpu); 600 cache = &zone->uz_cpu[cpu]; 601 bucket_drain(zone, cache->uc_allocbucket); 602 bucket_drain(zone, cache->uc_freebucket); 603 if (cache->uc_allocbucket != NULL) 604 bucket_free(cache->uc_allocbucket); 605 if (cache->uc_freebucket != NULL) 606 bucket_free(cache->uc_freebucket); 607 cache->uc_allocbucket = cache->uc_freebucket = NULL; 608 } 609 ZONE_LOCK(zone); 610 bucket_cache_drain(zone); 611 ZONE_UNLOCK(zone); 612 for (cpu = 0; cpu <= mp_maxid; cpu++) { 613 if (CPU_ABSENT(cpu)) 614 continue; 615 CPU_UNLOCK(cpu); 616 } 617 } 618 619 /* 620 * Drain the cached buckets from a zone. Expects a locked zone on entry. 621 */ 622 static void 623 bucket_cache_drain(uma_zone_t zone) 624 { 625 uma_bucket_t bucket; 626 627 /* 628 * Drain the bucket queues and free the buckets, we just keep two per 629 * cpu (alloc/free). 630 */ 631 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 632 LIST_REMOVE(bucket, ub_link); 633 ZONE_UNLOCK(zone); 634 bucket_drain(zone, bucket); 635 bucket_free(bucket); 636 ZONE_LOCK(zone); 637 } 638 639 /* Now we do the free queue.. */ 640 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 641 LIST_REMOVE(bucket, ub_link); 642 bucket_free(bucket); 643 } 644 } 645 646 /* 647 * Frees pages from a zone back to the system. This is done on demand from 648 * the pageout daemon. 649 * 650 * Arguments: 651 * zone The zone to free pages from 652 * all Should we drain all items? 653 * 654 * Returns: 655 * Nothing. 656 */ 657 static void 658 zone_drain(uma_zone_t zone) 659 { 660 struct slabhead freeslabs = {}; 661 uma_keg_t keg; 662 uma_slab_t slab; 663 uma_slab_t n; 664 u_int8_t flags; 665 u_int8_t *mem; 666 int i; 667 668 keg = zone->uz_keg; 669 670 /* 671 * We don't want to take pages from statically allocated zones at this 672 * time 673 */ 674 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 675 return; 676 677 ZONE_LOCK(zone); 678 679 #ifdef UMA_DEBUG 680 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 681 #endif 682 bucket_cache_drain(zone); 683 if (keg->uk_free == 0) 684 goto finished; 685 686 slab = LIST_FIRST(&keg->uk_free_slab); 687 while (slab) { 688 n = LIST_NEXT(slab, us_link); 689 690 /* We have no where to free these to */ 691 if (slab->us_flags & UMA_SLAB_BOOT) { 692 slab = n; 693 continue; 694 } 695 696 LIST_REMOVE(slab, us_link); 697 keg->uk_pages -= keg->uk_ppera; 698 keg->uk_free -= keg->uk_ipers; 699 700 if (keg->uk_flags & UMA_ZONE_HASH) 701 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 702 703 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 704 705 slab = n; 706 } 707 finished: 708 ZONE_UNLOCK(zone); 709 710 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 711 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 712 if (keg->uk_fini) 713 for (i = 0; i < keg->uk_ipers; i++) 714 keg->uk_fini( 715 slab->us_data + (keg->uk_rsize * i), 716 keg->uk_size); 717 flags = slab->us_flags; 718 mem = slab->us_data; 719 720 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 721 (keg->uk_flags & UMA_ZONE_REFCNT)) { 722 vm_object_t obj; 723 724 if (flags & UMA_SLAB_KMEM) 725 obj = kmem_object; 726 else 727 obj = NULL; 728 for (i = 0; i < keg->uk_ppera; i++) 729 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 730 obj); 731 } 732 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 733 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0); 734 #ifdef UMA_DEBUG 735 printf("%s: Returning %d bytes.\n", 736 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 737 #endif 738 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 739 } 740 } 741 742 /* 743 * Allocate a new slab for a zone. This does not insert the slab onto a list. 744 * 745 * Arguments: 746 * zone The zone to allocate slabs for 747 * wait Shall we wait? 748 * 749 * Returns: 750 * The slab that was allocated or NULL if there is no memory and the 751 * caller specified M_NOWAIT. 752 */ 753 static uma_slab_t 754 slab_zalloc(uma_zone_t zone, int wait) 755 { 756 uma_slabrefcnt_t slabref; 757 uma_slab_t slab; 758 uma_keg_t keg; 759 u_int8_t *mem; 760 u_int8_t flags; 761 int i; 762 763 slab = NULL; 764 keg = zone->uz_keg; 765 766 #ifdef UMA_DEBUG 767 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 768 #endif 769 ZONE_UNLOCK(zone); 770 771 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 772 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 773 if (slab == NULL) { 774 ZONE_LOCK(zone); 775 return NULL; 776 } 777 } 778 779 /* 780 * This reproduces the old vm_zone behavior of zero filling pages the 781 * first time they are added to a zone. 782 * 783 * Malloced items are zeroed in uma_zalloc. 784 */ 785 786 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 787 wait |= M_ZERO; 788 else 789 wait &= ~M_ZERO; 790 791 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 792 &flags, wait); 793 if (mem == NULL) { 794 ZONE_LOCK(zone); 795 return (NULL); 796 } 797 798 /* Point the slab into the allocated memory */ 799 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 800 slab = (uma_slab_t )(mem + keg->uk_pgoff); 801 802 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 803 (keg->uk_flags & UMA_ZONE_REFCNT)) 804 for (i = 0; i < keg->uk_ppera; i++) 805 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 806 807 slab->us_keg = keg; 808 slab->us_data = mem; 809 slab->us_freecount = keg->uk_ipers; 810 slab->us_firstfree = 0; 811 slab->us_flags = flags; 812 for (i = 0; i < keg->uk_ipers; i++) 813 slab->us_freelist[i].us_item = i+1; 814 815 if (keg->uk_flags & UMA_ZONE_REFCNT) { 816 slabref = (uma_slabrefcnt_t)slab; 817 for (i = 0; i < keg->uk_ipers; i++) 818 slabref->us_freelist[i].us_refcnt = 0; 819 } 820 821 if (keg->uk_init) 822 for (i = 0; i < keg->uk_ipers; i++) 823 keg->uk_init(slab->us_data + (keg->uk_rsize * i), 824 keg->uk_size); 825 ZONE_LOCK(zone); 826 827 if (keg->uk_flags & UMA_ZONE_HASH) 828 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 829 830 keg->uk_pages += keg->uk_ppera; 831 keg->uk_free += keg->uk_ipers; 832 833 return (slab); 834 } 835 836 /* 837 * This function is intended to be used early on in place of page_alloc() so 838 * that we may use the boot time page cache to satisfy allocations before 839 * the VM is ready. 840 */ 841 static void * 842 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 843 { 844 uma_keg_t keg; 845 846 keg = zone->uz_keg; 847 848 /* 849 * Check our small startup cache to see if it has pages remaining. 850 */ 851 mtx_lock(&uma_mtx); 852 if (uma_boot_free != 0) { 853 uma_slab_t tmps; 854 855 tmps = LIST_FIRST(&uma_boot_pages); 856 LIST_REMOVE(tmps, us_link); 857 uma_boot_free--; 858 mtx_unlock(&uma_mtx); 859 *pflag = tmps->us_flags; 860 return (tmps->us_data); 861 } 862 mtx_unlock(&uma_mtx); 863 if (booted == 0) 864 panic("UMA: Increase UMA_BOOT_PAGES"); 865 /* 866 * Now that we've booted reset these users to their real allocator. 867 */ 868 #ifdef UMA_MD_SMALL_ALLOC 869 keg->uk_allocf = uma_small_alloc; 870 #else 871 keg->uk_allocf = page_alloc; 872 #endif 873 return keg->uk_allocf(zone, bytes, pflag, wait); 874 } 875 876 /* 877 * Allocates a number of pages from the system 878 * 879 * Arguments: 880 * zone Unused 881 * bytes The number of bytes requested 882 * wait Shall we wait? 883 * 884 * Returns: 885 * A pointer to the alloced memory or possibly 886 * NULL if M_NOWAIT is set. 887 */ 888 static void * 889 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 890 { 891 void *p; /* Returned page */ 892 893 *pflag = UMA_SLAB_KMEM; 894 p = (void *) kmem_malloc(kmem_map, bytes, wait); 895 896 return (p); 897 } 898 899 /* 900 * Allocates a number of pages from within an object 901 * 902 * Arguments: 903 * zone Unused 904 * bytes The number of bytes requested 905 * wait Shall we wait? 906 * 907 * Returns: 908 * A pointer to the alloced memory or possibly 909 * NULL if M_NOWAIT is set. 910 */ 911 static void * 912 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 913 { 914 vm_object_t object; 915 vm_offset_t retkva, zkva; 916 vm_page_t p; 917 int pages, startpages; 918 919 object = zone->uz_keg->uk_obj; 920 retkva = 0; 921 922 /* 923 * This looks a little weird since we're getting one page at a time. 924 */ 925 VM_OBJECT_LOCK(object); 926 p = TAILQ_LAST(&object->memq, pglist); 927 pages = p != NULL ? p->pindex + 1 : 0; 928 startpages = pages; 929 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 930 for (; bytes > 0; bytes -= PAGE_SIZE) { 931 p = vm_page_alloc(object, pages, 932 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 933 if (p == NULL) { 934 if (pages != startpages) 935 pmap_qremove(retkva, pages - startpages); 936 while (pages != startpages) { 937 pages--; 938 p = TAILQ_LAST(&object->memq, pglist); 939 vm_page_lock_queues(); 940 vm_page_unwire(p, 0); 941 vm_page_free(p); 942 vm_page_unlock_queues(); 943 } 944 retkva = 0; 945 goto done; 946 } 947 pmap_qenter(zkva, &p, 1); 948 if (retkva == 0) 949 retkva = zkva; 950 zkva += PAGE_SIZE; 951 pages += 1; 952 } 953 done: 954 VM_OBJECT_UNLOCK(object); 955 *flags = UMA_SLAB_PRIV; 956 957 return ((void *)retkva); 958 } 959 960 /* 961 * Frees a number of pages to the system 962 * 963 * Arguments: 964 * mem A pointer to the memory to be freed 965 * size The size of the memory being freed 966 * flags The original p->us_flags field 967 * 968 * Returns: 969 * Nothing 970 */ 971 static void 972 page_free(void *mem, int size, u_int8_t flags) 973 { 974 vm_map_t map; 975 976 if (flags & UMA_SLAB_KMEM) 977 map = kmem_map; 978 else 979 panic("UMA: page_free used with invalid flags %d\n", flags); 980 981 kmem_free(map, (vm_offset_t)mem, size); 982 } 983 984 /* 985 * Zero fill initializer 986 * 987 * Arguments/Returns follow uma_init specifications 988 */ 989 static void 990 zero_init(void *mem, int size) 991 { 992 bzero(mem, size); 993 } 994 995 /* 996 * Finish creating a small uma zone. This calculates ipers, and the zone size. 997 * 998 * Arguments 999 * zone The zone we should initialize 1000 * 1001 * Returns 1002 * Nothing 1003 */ 1004 static void 1005 zone_small_init(uma_zone_t zone) 1006 { 1007 uma_keg_t keg; 1008 int rsize; 1009 int memused; 1010 int ipers; 1011 1012 keg = zone->uz_keg; 1013 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1014 rsize = keg->uk_size; 1015 1016 if (rsize < UMA_SMALLEST_UNIT) 1017 rsize = UMA_SMALLEST_UNIT; 1018 1019 if (rsize & keg->uk_align) 1020 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1021 1022 keg->uk_rsize = rsize; 1023 1024 rsize += 1; /* Account for the byte of linkage */ 1025 keg->uk_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; 1026 keg->uk_ppera = 1; 1027 1028 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0, uh-oh!")); 1029 memused = keg->uk_ipers * keg->uk_rsize; 1030 1031 /* Can we do any better? */ 1032 if ((keg->uk_flags & UMA_ZONE_REFCNT) || 1033 ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE)) { 1034 /* 1035 * We can't do this if we're internal or if we've been 1036 * asked to not go to the VM for buckets. If we do this we 1037 * may end up going to the VM (kmem_map) for slabs which we 1038 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1039 * result of UMA_ZONE_VM, which clearly forbids it. 1040 */ 1041 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1042 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1043 return; 1044 ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1045 if ((keg->uk_flags & UMA_ZONE_REFCNT) || 1046 (ipers > keg->uk_ipers)) { 1047 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1048 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1049 keg->uk_flags |= UMA_ZONE_HASH; 1050 keg->uk_ipers = ipers; 1051 } 1052 } 1053 } 1054 1055 /* 1056 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1057 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1058 * more complicated. 1059 * 1060 * Arguments 1061 * zone The zone we should initialize 1062 * 1063 * Returns 1064 * Nothing 1065 */ 1066 static void 1067 zone_large_init(uma_zone_t zone) 1068 { 1069 uma_keg_t keg; 1070 int pages; 1071 1072 keg = zone->uz_keg; 1073 1074 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1075 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1076 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1077 1078 pages = keg->uk_size / UMA_SLAB_SIZE; 1079 1080 /* Account for remainder */ 1081 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1082 pages++; 1083 1084 keg->uk_ppera = pages; 1085 keg->uk_ipers = 1; 1086 1087 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1088 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1089 keg->uk_flags |= UMA_ZONE_HASH; 1090 1091 keg->uk_rsize = keg->uk_size; 1092 } 1093 1094 /* 1095 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1096 * the keg onto the global keg list. 1097 * 1098 * Arguments/Returns follow uma_ctor specifications 1099 * udata Actually uma_kctor_args 1100 */ 1101 static void 1102 keg_ctor(void *mem, int size, void *udata) 1103 { 1104 struct uma_kctor_args *arg = udata; 1105 uma_keg_t keg = mem; 1106 uma_zone_t zone; 1107 1108 bzero(keg, size); 1109 keg->uk_size = arg->size; 1110 keg->uk_init = arg->uminit; 1111 keg->uk_fini = arg->fini; 1112 keg->uk_align = arg->align; 1113 keg->uk_free = 0; 1114 keg->uk_pages = 0; 1115 keg->uk_flags = arg->flags; 1116 keg->uk_allocf = page_alloc; 1117 keg->uk_freef = page_free; 1118 keg->uk_recurse = 0; 1119 keg->uk_slabzone = NULL; 1120 1121 /* 1122 * The master zone is passed to us at keg-creation time. 1123 */ 1124 zone = arg->zone; 1125 zone->uz_keg = keg; 1126 1127 if (arg->flags & UMA_ZONE_VM) 1128 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1129 1130 if (arg->flags & UMA_ZONE_ZINIT) 1131 keg->uk_init = zero_init; 1132 1133 /* 1134 * The +1 byte added to uk_size is to account for the byte of 1135 * linkage that is added to the size in zone_small_init(). If 1136 * we don't account for this here then we may end up in 1137 * zone_small_init() with a calculated 'ipers' of 0. 1138 */ 1139 if ((keg->uk_size+1) > (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1140 zone_large_init(zone); 1141 else 1142 zone_small_init(zone); 1143 1144 if (keg->uk_flags & UMA_ZONE_REFCNT) 1145 keg->uk_slabzone = slabrefzone; 1146 else if (keg->uk_flags & UMA_ZONE_OFFPAGE) 1147 keg->uk_slabzone = slabzone; 1148 1149 /* 1150 * If we haven't booted yet we need allocations to go through the 1151 * startup cache until the vm is ready. 1152 */ 1153 if (keg->uk_ppera == 1) { 1154 #ifdef UMA_MD_SMALL_ALLOC 1155 keg->uk_allocf = uma_small_alloc; 1156 keg->uk_freef = uma_small_free; 1157 #endif 1158 if (booted == 0) 1159 keg->uk_allocf = startup_alloc; 1160 } 1161 1162 /* 1163 * Initialize keg's lock (shared among zones) through 1164 * Master zone 1165 */ 1166 zone->uz_lock = &keg->uk_lock; 1167 if (arg->flags & UMA_ZONE_MTXCLASS) 1168 ZONE_LOCK_INIT(zone, 1); 1169 else 1170 ZONE_LOCK_INIT(zone, 0); 1171 1172 /* 1173 * If we're putting the slab header in the actual page we need to 1174 * figure out where in each page it goes. This calculates a right 1175 * justified offset into the memory on an ALIGN_PTR boundary. 1176 */ 1177 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1178 int totsize; 1179 1180 /* Size of the slab struct and free list */ 1181 totsize = sizeof(struct uma_slab) + keg->uk_ipers; 1182 if (totsize & UMA_ALIGN_PTR) 1183 totsize = (totsize & ~UMA_ALIGN_PTR) + 1184 (UMA_ALIGN_PTR + 1); 1185 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1186 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1187 + keg->uk_ipers; 1188 /* I don't think it's possible, but I'll make sure anyway */ 1189 if (totsize > UMA_SLAB_SIZE) { 1190 printf("zone %s ipers %d rsize %d size %d\n", 1191 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1192 keg->uk_size); 1193 panic("UMA slab won't fit.\n"); 1194 } 1195 } 1196 1197 if (keg->uk_flags & UMA_ZONE_HASH) 1198 hash_alloc(&keg->uk_hash); 1199 1200 #ifdef UMA_DEBUG 1201 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1202 zone->uz_name, zone, 1203 keg->uk_size, keg->uk_ipers, 1204 keg->uk_ppera, keg->uk_pgoff); 1205 #endif 1206 1207 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1208 1209 mtx_lock(&uma_mtx); 1210 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1211 mtx_unlock(&uma_mtx); 1212 } 1213 1214 /* 1215 * Zone header ctor. This initializes all fields, locks, etc. 1216 * 1217 * Arguments/Returns follow uma_ctor specifications 1218 * udata Actually uma_zctor_args 1219 */ 1220 1221 static void 1222 zone_ctor(void *mem, int size, void *udata) 1223 { 1224 struct uma_zctor_args *arg = udata; 1225 uma_zone_t zone = mem; 1226 uma_zone_t z; 1227 uma_keg_t keg; 1228 1229 bzero(zone, size); 1230 zone->uz_name = arg->name; 1231 zone->uz_ctor = arg->ctor; 1232 zone->uz_dtor = arg->dtor; 1233 zone->uz_init = NULL; 1234 zone->uz_fini = NULL; 1235 zone->uz_allocs = 0; 1236 zone->uz_fills = zone->uz_count = 0; 1237 1238 if (arg->flags & UMA_ZONE_SECONDARY) { 1239 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1240 keg = arg->keg; 1241 zone->uz_keg = keg; 1242 zone->uz_init = arg->uminit; 1243 zone->uz_fini = arg->fini; 1244 zone->uz_lock = &keg->uk_lock; 1245 mtx_lock(&uma_mtx); 1246 ZONE_LOCK(zone); 1247 keg->uk_flags |= UMA_ZONE_SECONDARY; 1248 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1249 if (LIST_NEXT(z, uz_link) == NULL) { 1250 LIST_INSERT_AFTER(z, zone, uz_link); 1251 break; 1252 } 1253 } 1254 ZONE_UNLOCK(zone); 1255 mtx_unlock(&uma_mtx); 1256 } else if (arg->keg == NULL) { 1257 uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1258 arg->align, arg->flags); 1259 } else { 1260 struct uma_kctor_args karg; 1261 1262 /* We should only be here from uma_startup() */ 1263 karg.size = arg->size; 1264 karg.uminit = arg->uminit; 1265 karg.fini = arg->fini; 1266 karg.align = arg->align; 1267 karg.flags = arg->flags; 1268 karg.zone = zone; 1269 keg_ctor(arg->keg, sizeof(struct uma_keg), &karg); 1270 } 1271 keg = zone->uz_keg; 1272 zone->uz_lock = &keg->uk_lock; 1273 1274 /* 1275 * Some internal zones don't have room allocated for the per cpu 1276 * caches. If we're internal, bail out here. 1277 */ 1278 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1279 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1280 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1281 return; 1282 } 1283 1284 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1285 zone->uz_count = BUCKET_MAX; 1286 else if (keg->uk_ipers <= BUCKET_MAX) 1287 zone->uz_count = keg->uk_ipers; 1288 else 1289 zone->uz_count = BUCKET_MAX; 1290 } 1291 1292 /* 1293 * Keg header dtor. This frees all data, destroys locks, frees the hash 1294 * table and removes the keg from the global list. 1295 * 1296 * Arguments/Returns follow uma_dtor specifications 1297 * udata unused 1298 */ 1299 static void 1300 keg_dtor(void *arg, int size, void *udata) 1301 { 1302 uma_keg_t keg; 1303 1304 keg = (uma_keg_t)arg; 1305 mtx_lock(&keg->uk_lock); 1306 if (keg->uk_free != 0) { 1307 printf("Freed UMA keg was not empty (%d items). " 1308 " Lost %d pages of memory.\n", 1309 keg->uk_free, keg->uk_pages); 1310 } 1311 mtx_unlock(&keg->uk_lock); 1312 1313 if (keg->uk_flags & UMA_ZONE_HASH) 1314 hash_free(&keg->uk_hash); 1315 1316 mtx_destroy(&keg->uk_lock); 1317 } 1318 1319 /* 1320 * Zone header dtor. 1321 * 1322 * Arguments/Returns follow uma_dtor specifications 1323 * udata unused 1324 */ 1325 static void 1326 zone_dtor(void *arg, int size, void *udata) 1327 { 1328 uma_zone_t zone; 1329 uma_keg_t keg; 1330 1331 zone = (uma_zone_t)arg; 1332 keg = zone->uz_keg; 1333 1334 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1335 cache_drain(zone); 1336 1337 mtx_lock(&uma_mtx); 1338 zone_drain(zone); 1339 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1340 LIST_REMOVE(zone, uz_link); 1341 /* 1342 * XXX there are some races here where 1343 * the zone can be drained but zone lock 1344 * released and then refilled before we 1345 * remove it... we dont care for now 1346 */ 1347 ZONE_LOCK(zone); 1348 if (LIST_EMPTY(&keg->uk_zones)) 1349 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1350 ZONE_UNLOCK(zone); 1351 mtx_unlock(&uma_mtx); 1352 } else { 1353 LIST_REMOVE(keg, uk_link); 1354 LIST_REMOVE(zone, uz_link); 1355 mtx_unlock(&uma_mtx); 1356 uma_zfree_internal(kegs, keg, NULL, 0); 1357 } 1358 zone->uz_keg = NULL; 1359 } 1360 1361 /* 1362 * Traverses every zone in the system and calls a callback 1363 * 1364 * Arguments: 1365 * zfunc A pointer to a function which accepts a zone 1366 * as an argument. 1367 * 1368 * Returns: 1369 * Nothing 1370 */ 1371 static void 1372 zone_foreach(void (*zfunc)(uma_zone_t)) 1373 { 1374 uma_keg_t keg; 1375 uma_zone_t zone; 1376 1377 mtx_lock(&uma_mtx); 1378 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1379 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1380 zfunc(zone); 1381 } 1382 mtx_unlock(&uma_mtx); 1383 } 1384 1385 /* Public functions */ 1386 /* See uma.h */ 1387 void 1388 uma_startup(void *bootmem) 1389 { 1390 struct uma_zctor_args args; 1391 uma_slab_t slab; 1392 int slabsize; 1393 int i; 1394 1395 #ifdef UMA_DEBUG 1396 printf("Creating uma keg headers zone and keg.\n"); 1397 #endif 1398 /* 1399 * The general UMA lock is a recursion-allowed lock because 1400 * there is a code path where, while we're still configured 1401 * to use startup_alloc() for backend page allocations, we 1402 * may end up in uma_reclaim() which calls zone_foreach(zone_drain), 1403 * which grabs uma_mtx, only to later call into startup_alloc() 1404 * because while freeing we needed to allocate a bucket. Since 1405 * startup_alloc() also takes uma_mtx, we need to be able to 1406 * recurse on it. 1407 */ 1408 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE); 1409 1410 /* "manually" create the initial zone */ 1411 args.name = "UMA Kegs"; 1412 args.size = sizeof(struct uma_keg); 1413 args.ctor = keg_ctor; 1414 args.dtor = keg_dtor; 1415 args.uminit = zero_init; 1416 args.fini = NULL; 1417 args.keg = &masterkeg; 1418 args.align = 32 - 1; 1419 args.flags = UMA_ZFLAG_INTERNAL; 1420 /* The initial zone has no Per cpu queues so it's smaller */ 1421 zone_ctor(kegs, sizeof(struct uma_zone), &args); 1422 1423 #ifdef UMA_DEBUG 1424 printf("Filling boot free list.\n"); 1425 #endif 1426 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1427 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1428 slab->us_data = (u_int8_t *)slab; 1429 slab->us_flags = UMA_SLAB_BOOT; 1430 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1431 uma_boot_free++; 1432 } 1433 1434 #ifdef UMA_DEBUG 1435 printf("Creating uma zone headers zone and keg.\n"); 1436 #endif 1437 args.name = "UMA Zones"; 1438 args.size = sizeof(struct uma_zone) + 1439 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1440 args.ctor = zone_ctor; 1441 args.dtor = zone_dtor; 1442 args.uminit = zero_init; 1443 args.fini = NULL; 1444 args.keg = NULL; 1445 args.align = 32 - 1; 1446 args.flags = UMA_ZFLAG_INTERNAL; 1447 /* The initial zone has no Per cpu queues so it's smaller */ 1448 zone_ctor(zones, sizeof(struct uma_zone), &args); 1449 1450 #ifdef UMA_DEBUG 1451 printf("Initializing pcpu cache locks.\n"); 1452 #endif 1453 /* Initialize the pcpu cache lock set once and for all */ 1454 for (i = 0; i <= mp_maxid; i++) 1455 CPU_LOCK_INIT(i); 1456 1457 #ifdef UMA_DEBUG 1458 printf("Creating slab and hash zones.\n"); 1459 #endif 1460 1461 /* 1462 * This is the max number of free list items we'll have with 1463 * offpage slabs. 1464 */ 1465 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); 1466 slabsize /= UMA_MAX_WASTE; 1467 slabsize++; /* In case there it's rounded */ 1468 slabsize += sizeof(struct uma_slab); 1469 1470 /* Now make a zone for slab headers */ 1471 slabzone = uma_zcreate("UMA Slabs", 1472 slabsize, 1473 NULL, NULL, NULL, NULL, 1474 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1475 1476 /* 1477 * We also create a zone for the bigger slabs with reference 1478 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1479 */ 1480 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt); 1481 slabsize /= UMA_MAX_WASTE; 1482 slabsize++; 1483 slabsize += 4 * slabsize; 1484 slabsize += sizeof(struct uma_slab_refcnt); 1485 slabrefzone = uma_zcreate("UMA RCntSlabs", 1486 slabsize, 1487 NULL, NULL, NULL, NULL, 1488 UMA_ALIGN_PTR, 1489 UMA_ZFLAG_INTERNAL); 1490 1491 hashzone = uma_zcreate("UMA Hash", 1492 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1493 NULL, NULL, NULL, NULL, 1494 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1495 1496 bucket_init(); 1497 1498 #ifdef UMA_MD_SMALL_ALLOC 1499 booted = 1; 1500 #endif 1501 1502 #ifdef UMA_DEBUG 1503 printf("UMA startup complete.\n"); 1504 #endif 1505 } 1506 1507 /* see uma.h */ 1508 void 1509 uma_startup2(void) 1510 { 1511 booted = 1; 1512 bucket_enable(); 1513 #ifdef UMA_DEBUG 1514 printf("UMA startup2 complete.\n"); 1515 #endif 1516 } 1517 1518 /* 1519 * Initialize our callout handle 1520 * 1521 */ 1522 1523 static void 1524 uma_startup3(void) 1525 { 1526 #ifdef UMA_DEBUG 1527 printf("Starting callout.\n"); 1528 #endif 1529 callout_init(&uma_callout, CALLOUT_MPSAFE); 1530 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1531 #ifdef UMA_DEBUG 1532 printf("UMA startup3 complete.\n"); 1533 #endif 1534 } 1535 1536 static void 1537 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1538 int align, u_int16_t flags) 1539 { 1540 struct uma_kctor_args args; 1541 1542 args.size = size; 1543 args.uminit = uminit; 1544 args.fini = fini; 1545 args.align = align; 1546 args.flags = flags; 1547 args.zone = zone; 1548 zone = uma_zalloc_internal(kegs, &args, M_WAITOK); 1549 } 1550 1551 /* See uma.h */ 1552 uma_zone_t 1553 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1554 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1555 1556 { 1557 struct uma_zctor_args args; 1558 1559 /* This stuff is essential for the zone ctor */ 1560 args.name = name; 1561 args.size = size; 1562 args.ctor = ctor; 1563 args.dtor = dtor; 1564 args.uminit = uminit; 1565 args.fini = fini; 1566 args.align = align; 1567 args.flags = flags; 1568 args.keg = NULL; 1569 1570 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1571 } 1572 1573 /* See uma.h */ 1574 uma_zone_t 1575 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1576 uma_init zinit, uma_fini zfini, uma_zone_t master) 1577 { 1578 struct uma_zctor_args args; 1579 1580 args.name = name; 1581 args.size = master->uz_keg->uk_size; 1582 args.ctor = ctor; 1583 args.dtor = dtor; 1584 args.uminit = zinit; 1585 args.fini = zfini; 1586 args.align = master->uz_keg->uk_align; 1587 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1588 args.keg = master->uz_keg; 1589 1590 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1591 } 1592 1593 /* See uma.h */ 1594 void 1595 uma_zdestroy(uma_zone_t zone) 1596 { 1597 uma_zfree_internal(zones, zone, NULL, 0); 1598 } 1599 1600 /* See uma.h */ 1601 void * 1602 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1603 { 1604 void *item; 1605 uma_cache_t cache; 1606 uma_bucket_t bucket; 1607 int cpu; 1608 int badness; 1609 1610 /* This is the fast path allocation */ 1611 #ifdef UMA_DEBUG_ALLOC_1 1612 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1613 #endif 1614 1615 if (!(flags & M_NOWAIT)) { 1616 KASSERT(curthread->td_intr_nesting_level == 0, 1617 ("malloc(M_WAITOK) in interrupt context")); 1618 badness = nosleepwithlocks; 1619 #ifdef WITNESS 1620 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1621 NULL, 1622 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1623 zone->uz_name); 1624 #endif 1625 if (badness) { 1626 flags &= ~M_WAITOK; 1627 flags |= M_NOWAIT; 1628 } 1629 } 1630 1631 zalloc_restart: 1632 cpu = PCPU_GET(cpuid); 1633 CPU_LOCK(cpu); 1634 cache = &zone->uz_cpu[cpu]; 1635 1636 zalloc_start: 1637 bucket = cache->uc_allocbucket; 1638 1639 if (bucket) { 1640 if (bucket->ub_cnt > 0) { 1641 bucket->ub_cnt--; 1642 item = bucket->ub_bucket[bucket->ub_cnt]; 1643 #ifdef INVARIANTS 1644 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1645 #endif 1646 KASSERT(item != NULL, 1647 ("uma_zalloc: Bucket pointer mangled.")); 1648 cache->uc_allocs++; 1649 #ifdef INVARIANTS 1650 ZONE_LOCK(zone); 1651 uma_dbg_alloc(zone, NULL, item); 1652 ZONE_UNLOCK(zone); 1653 #endif 1654 CPU_UNLOCK(cpu); 1655 if (zone->uz_ctor) 1656 zone->uz_ctor(item,zone->uz_keg->uk_size,udata); 1657 if (flags & M_ZERO) 1658 bzero(item, zone->uz_keg->uk_size); 1659 return (item); 1660 } else if (cache->uc_freebucket) { 1661 /* 1662 * We have run out of items in our allocbucket. 1663 * See if we can switch with our free bucket. 1664 */ 1665 if (cache->uc_freebucket->ub_cnt > 0) { 1666 #ifdef UMA_DEBUG_ALLOC 1667 printf("uma_zalloc: Swapping empty with" 1668 " alloc.\n"); 1669 #endif 1670 bucket = cache->uc_freebucket; 1671 cache->uc_freebucket = cache->uc_allocbucket; 1672 cache->uc_allocbucket = bucket; 1673 1674 goto zalloc_start; 1675 } 1676 } 1677 } 1678 ZONE_LOCK(zone); 1679 /* Since we have locked the zone we may as well send back our stats */ 1680 zone->uz_allocs += cache->uc_allocs; 1681 cache->uc_allocs = 0; 1682 1683 /* Our old one is now a free bucket */ 1684 if (cache->uc_allocbucket) { 1685 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1686 ("uma_zalloc_arg: Freeing a non free bucket.")); 1687 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1688 cache->uc_allocbucket, ub_link); 1689 cache->uc_allocbucket = NULL; 1690 } 1691 1692 /* Check the free list for a new alloc bucket */ 1693 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1694 KASSERT(bucket->ub_cnt != 0, 1695 ("uma_zalloc_arg: Returning an empty bucket.")); 1696 1697 LIST_REMOVE(bucket, ub_link); 1698 cache->uc_allocbucket = bucket; 1699 ZONE_UNLOCK(zone); 1700 goto zalloc_start; 1701 } 1702 /* We are no longer associated with this cpu!!! */ 1703 CPU_UNLOCK(cpu); 1704 1705 /* Bump up our uz_count so we get here less */ 1706 if (zone->uz_count < BUCKET_MAX) 1707 zone->uz_count++; 1708 1709 /* 1710 * Now lets just fill a bucket and put it on the free list. If that 1711 * works we'll restart the allocation from the begining. 1712 */ 1713 if (uma_zalloc_bucket(zone, flags)) { 1714 ZONE_UNLOCK(zone); 1715 goto zalloc_restart; 1716 } 1717 ZONE_UNLOCK(zone); 1718 /* 1719 * We may not be able to get a bucket so return an actual item. 1720 */ 1721 #ifdef UMA_DEBUG 1722 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1723 #endif 1724 1725 return (uma_zalloc_internal(zone, udata, flags)); 1726 } 1727 1728 static uma_slab_t 1729 uma_zone_slab(uma_zone_t zone, int flags) 1730 { 1731 uma_slab_t slab; 1732 uma_keg_t keg; 1733 1734 keg = zone->uz_keg; 1735 1736 /* 1737 * This is to prevent us from recursively trying to allocate 1738 * buckets. The problem is that if an allocation forces us to 1739 * grab a new bucket we will call page_alloc, which will go off 1740 * and cause the vm to allocate vm_map_entries. If we need new 1741 * buckets there too we will recurse in kmem_alloc and bad 1742 * things happen. So instead we return a NULL bucket, and make 1743 * the code that allocates buckets smart enough to deal with it 1744 */ 1745 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1746 return (NULL); 1747 1748 slab = NULL; 1749 1750 for (;;) { 1751 /* 1752 * Find a slab with some space. Prefer slabs that are partially 1753 * used over those that are totally full. This helps to reduce 1754 * fragmentation. 1755 */ 1756 if (keg->uk_free != 0) { 1757 if (!LIST_EMPTY(&keg->uk_part_slab)) { 1758 slab = LIST_FIRST(&keg->uk_part_slab); 1759 } else { 1760 slab = LIST_FIRST(&keg->uk_free_slab); 1761 LIST_REMOVE(slab, us_link); 1762 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 1763 us_link); 1764 } 1765 return (slab); 1766 } 1767 1768 /* 1769 * M_NOVM means don't ask at all! 1770 */ 1771 if (flags & M_NOVM) 1772 break; 1773 1774 if (keg->uk_maxpages && 1775 keg->uk_pages >= keg->uk_maxpages) { 1776 keg->uk_flags |= UMA_ZFLAG_FULL; 1777 1778 if (flags & M_NOWAIT) 1779 break; 1780 else 1781 msleep(keg, &keg->uk_lock, PVM, 1782 "zonelimit", 0); 1783 continue; 1784 } 1785 keg->uk_recurse++; 1786 slab = slab_zalloc(zone, flags); 1787 keg->uk_recurse--; 1788 1789 /* 1790 * If we got a slab here it's safe to mark it partially used 1791 * and return. We assume that the caller is going to remove 1792 * at least one item. 1793 */ 1794 if (slab) { 1795 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 1796 return (slab); 1797 } 1798 /* 1799 * We might not have been able to get a slab but another cpu 1800 * could have while we were unlocked. Check again before we 1801 * fail. 1802 */ 1803 if (flags & M_NOWAIT) 1804 flags |= M_NOVM; 1805 } 1806 return (slab); 1807 } 1808 1809 static void * 1810 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 1811 { 1812 uma_keg_t keg; 1813 void *item; 1814 u_int8_t freei; 1815 1816 keg = zone->uz_keg; 1817 1818 freei = slab->us_firstfree; 1819 slab->us_firstfree = slab->us_freelist[freei].us_item; 1820 item = slab->us_data + (keg->uk_rsize * freei); 1821 1822 slab->us_freecount--; 1823 keg->uk_free--; 1824 #ifdef INVARIANTS 1825 uma_dbg_alloc(zone, slab, item); 1826 #endif 1827 /* Move this slab to the full list */ 1828 if (slab->us_freecount == 0) { 1829 LIST_REMOVE(slab, us_link); 1830 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 1831 } 1832 1833 return (item); 1834 } 1835 1836 static int 1837 uma_zalloc_bucket(uma_zone_t zone, int flags) 1838 { 1839 uma_bucket_t bucket; 1840 uma_slab_t slab; 1841 int16_t saved; 1842 int max; 1843 1844 /* 1845 * Try this zone's free list first so we don't allocate extra buckets. 1846 */ 1847 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 1848 KASSERT(bucket->ub_cnt == 0, 1849 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 1850 LIST_REMOVE(bucket, ub_link); 1851 } else { 1852 int bflags; 1853 1854 bflags = (flags & ~M_ZERO); 1855 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 1856 bflags |= M_NOVM; 1857 1858 ZONE_UNLOCK(zone); 1859 bucket = bucket_alloc(zone->uz_count, bflags); 1860 ZONE_LOCK(zone); 1861 } 1862 1863 if (bucket == NULL) 1864 return (0); 1865 1866 #ifdef SMP 1867 /* 1868 * This code is here to limit the number of simultaneous bucket fills 1869 * for any given zone to the number of per cpu caches in this zone. This 1870 * is done so that we don't allocate more memory than we really need. 1871 */ 1872 if (zone->uz_fills >= mp_ncpus) 1873 goto done; 1874 1875 #endif 1876 zone->uz_fills++; 1877 1878 max = MIN(bucket->ub_entries, zone->uz_count); 1879 /* Try to keep the buckets totally full */ 1880 saved = bucket->ub_cnt; 1881 while (bucket->ub_cnt < max && 1882 (slab = uma_zone_slab(zone, flags)) != NULL) { 1883 while (slab->us_freecount && bucket->ub_cnt < max) { 1884 bucket->ub_bucket[bucket->ub_cnt++] = 1885 uma_slab_alloc(zone, slab); 1886 } 1887 1888 /* Don't block on the next fill */ 1889 flags |= M_NOWAIT; 1890 } 1891 1892 /* 1893 * We unlock here because we need to call the zone's init. 1894 * It should be safe to unlock because the slab dealt with 1895 * above is already on the appropriate list within the keg 1896 * and the bucket we filled is not yet on any list, so we 1897 * own it. 1898 */ 1899 if (zone->uz_init != NULL) { 1900 int i; 1901 1902 ZONE_UNLOCK(zone); 1903 for (i = saved; i < bucket->ub_cnt; i++) 1904 zone->uz_init(bucket->ub_bucket[i], 1905 zone->uz_keg->uk_size); 1906 ZONE_LOCK(zone); 1907 } 1908 1909 zone->uz_fills--; 1910 if (bucket->ub_cnt != 0) { 1911 LIST_INSERT_HEAD(&zone->uz_full_bucket, 1912 bucket, ub_link); 1913 return (1); 1914 } 1915 #ifdef SMP 1916 done: 1917 #endif 1918 bucket_free(bucket); 1919 1920 return (0); 1921 } 1922 /* 1923 * Allocates an item for an internal zone 1924 * 1925 * Arguments 1926 * zone The zone to alloc for. 1927 * udata The data to be passed to the constructor. 1928 * flags M_WAITOK, M_NOWAIT, M_ZERO. 1929 * 1930 * Returns 1931 * NULL if there is no memory and M_NOWAIT is set 1932 * An item if successful 1933 */ 1934 1935 static void * 1936 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 1937 { 1938 uma_keg_t keg; 1939 uma_slab_t slab; 1940 void *item; 1941 1942 item = NULL; 1943 keg = zone->uz_keg; 1944 1945 #ifdef UMA_DEBUG_ALLOC 1946 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 1947 #endif 1948 ZONE_LOCK(zone); 1949 1950 slab = uma_zone_slab(zone, flags); 1951 if (slab == NULL) { 1952 ZONE_UNLOCK(zone); 1953 return (NULL); 1954 } 1955 1956 item = uma_slab_alloc(zone, slab); 1957 1958 ZONE_UNLOCK(zone); 1959 1960 /* 1961 * We have to call both the zone's init (not the keg's init) 1962 * and the zone's ctor. This is because the item is going from 1963 * a keg slab directly to the user, and the user is expecting it 1964 * to be both zone-init'd as well as zone-ctor'd. 1965 */ 1966 if (zone->uz_init != NULL) 1967 zone->uz_init(item, keg->uk_size); 1968 if (zone->uz_ctor != NULL) 1969 zone->uz_ctor(item, keg->uk_size, udata); 1970 if (flags & M_ZERO) 1971 bzero(item, keg->uk_size); 1972 1973 return (item); 1974 } 1975 1976 /* See uma.h */ 1977 void 1978 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 1979 { 1980 uma_keg_t keg; 1981 uma_cache_t cache; 1982 uma_bucket_t bucket; 1983 int bflags; 1984 int cpu; 1985 int skip; 1986 1987 /* This is the fast path free */ 1988 skip = 0; 1989 keg = zone->uz_keg; 1990 1991 #ifdef UMA_DEBUG_ALLOC_1 1992 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 1993 #endif 1994 /* 1995 * The race here is acceptable. If we miss it we'll just have to wait 1996 * a little longer for the limits to be reset. 1997 */ 1998 1999 if (keg->uk_flags & UMA_ZFLAG_FULL) 2000 goto zfree_internal; 2001 2002 if (zone->uz_dtor) { 2003 zone->uz_dtor(item, keg->uk_size, udata); 2004 skip = 1; 2005 } 2006 2007 zfree_restart: 2008 cpu = PCPU_GET(cpuid); 2009 CPU_LOCK(cpu); 2010 cache = &zone->uz_cpu[cpu]; 2011 2012 zfree_start: 2013 bucket = cache->uc_freebucket; 2014 2015 if (bucket) { 2016 /* 2017 * Do we have room in our bucket? It is OK for this uz count 2018 * check to be slightly out of sync. 2019 */ 2020 2021 if (bucket->ub_cnt < bucket->ub_entries) { 2022 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2023 ("uma_zfree: Freeing to non free bucket index.")); 2024 bucket->ub_bucket[bucket->ub_cnt] = item; 2025 bucket->ub_cnt++; 2026 #ifdef INVARIANTS 2027 ZONE_LOCK(zone); 2028 if (keg->uk_flags & UMA_ZONE_MALLOC) 2029 uma_dbg_free(zone, udata, item); 2030 else 2031 uma_dbg_free(zone, NULL, item); 2032 ZONE_UNLOCK(zone); 2033 #endif 2034 CPU_UNLOCK(cpu); 2035 return; 2036 } else if (cache->uc_allocbucket) { 2037 #ifdef UMA_DEBUG_ALLOC 2038 printf("uma_zfree: Swapping buckets.\n"); 2039 #endif 2040 /* 2041 * We have run out of space in our freebucket. 2042 * See if we can switch with our alloc bucket. 2043 */ 2044 if (cache->uc_allocbucket->ub_cnt < 2045 cache->uc_freebucket->ub_cnt) { 2046 bucket = cache->uc_freebucket; 2047 cache->uc_freebucket = cache->uc_allocbucket; 2048 cache->uc_allocbucket = bucket; 2049 goto zfree_start; 2050 } 2051 } 2052 } 2053 /* 2054 * We can get here for two reasons: 2055 * 2056 * 1) The buckets are NULL 2057 * 2) The alloc and free buckets are both somewhat full. 2058 */ 2059 2060 ZONE_LOCK(zone); 2061 2062 bucket = cache->uc_freebucket; 2063 cache->uc_freebucket = NULL; 2064 2065 /* Can we throw this on the zone full list? */ 2066 if (bucket != NULL) { 2067 #ifdef UMA_DEBUG_ALLOC 2068 printf("uma_zfree: Putting old bucket on the free list.\n"); 2069 #endif 2070 /* ub_cnt is pointing to the last free item */ 2071 KASSERT(bucket->ub_cnt != 0, 2072 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2073 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2074 bucket, ub_link); 2075 } 2076 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2077 LIST_REMOVE(bucket, ub_link); 2078 ZONE_UNLOCK(zone); 2079 cache->uc_freebucket = bucket; 2080 goto zfree_start; 2081 } 2082 /* We're done with this CPU now */ 2083 CPU_UNLOCK(cpu); 2084 2085 /* And the zone.. */ 2086 ZONE_UNLOCK(zone); 2087 2088 #ifdef UMA_DEBUG_ALLOC 2089 printf("uma_zfree: Allocating new free bucket.\n"); 2090 #endif 2091 bflags = M_NOWAIT; 2092 2093 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2094 bflags |= M_NOVM; 2095 bucket = bucket_alloc(zone->uz_count, bflags); 2096 if (bucket) { 2097 ZONE_LOCK(zone); 2098 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2099 bucket, ub_link); 2100 ZONE_UNLOCK(zone); 2101 goto zfree_restart; 2102 } 2103 2104 /* 2105 * If nothing else caught this, we'll just do an internal free. 2106 */ 2107 2108 zfree_internal: 2109 2110 #ifdef INVARIANTS 2111 /* 2112 * If we need to skip the dtor and the uma_dbg_free in 2113 * uma_zfree_internal because we've already called the dtor 2114 * above, but we ended up here, then we need to make sure 2115 * that we take care of the uma_dbg_free immediately. 2116 */ 2117 if (skip) { 2118 ZONE_LOCK(zone); 2119 if (keg->uk_flags & UMA_ZONE_MALLOC) 2120 uma_dbg_free(zone, udata, item); 2121 else 2122 uma_dbg_free(zone, NULL, item); 2123 ZONE_UNLOCK(zone); 2124 } 2125 #endif 2126 uma_zfree_internal(zone, item, udata, skip); 2127 2128 return; 2129 } 2130 2131 /* 2132 * Frees an item to an INTERNAL zone or allocates a free bucket 2133 * 2134 * Arguments: 2135 * zone The zone to free to 2136 * item The item we're freeing 2137 * udata User supplied data for the dtor 2138 * skip Skip the dtor, it was done in uma_zfree_arg 2139 */ 2140 static void 2141 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) 2142 { 2143 uma_slab_t slab; 2144 uma_keg_t keg; 2145 u_int8_t *mem; 2146 u_int8_t freei; 2147 2148 keg = zone->uz_keg; 2149 2150 if (!skip && zone->uz_dtor) 2151 zone->uz_dtor(item, keg->uk_size, udata); 2152 if (zone->uz_fini) 2153 zone->uz_fini(item, keg->uk_size); 2154 2155 ZONE_LOCK(zone); 2156 2157 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2158 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2159 if (keg->uk_flags & UMA_ZONE_HASH) 2160 slab = hash_sfind(&keg->uk_hash, mem); 2161 else { 2162 mem += keg->uk_pgoff; 2163 slab = (uma_slab_t)mem; 2164 } 2165 } else { 2166 slab = (uma_slab_t)udata; 2167 } 2168 2169 /* Do we need to remove from any lists? */ 2170 if (slab->us_freecount+1 == keg->uk_ipers) { 2171 LIST_REMOVE(slab, us_link); 2172 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2173 } else if (slab->us_freecount == 0) { 2174 LIST_REMOVE(slab, us_link); 2175 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2176 } 2177 2178 /* Slab management stuff */ 2179 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2180 / keg->uk_rsize; 2181 2182 #ifdef INVARIANTS 2183 if (!skip) 2184 uma_dbg_free(zone, slab, item); 2185 #endif 2186 2187 slab->us_freelist[freei].us_item = slab->us_firstfree; 2188 slab->us_firstfree = freei; 2189 slab->us_freecount++; 2190 2191 /* Zone statistics */ 2192 keg->uk_free++; 2193 2194 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2195 if (keg->uk_pages < keg->uk_maxpages) 2196 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2197 2198 /* We can handle one more allocation */ 2199 wakeup_one(keg); 2200 } 2201 2202 ZONE_UNLOCK(zone); 2203 } 2204 2205 /* See uma.h */ 2206 void 2207 uma_zone_set_max(uma_zone_t zone, int nitems) 2208 { 2209 uma_keg_t keg; 2210 2211 keg = zone->uz_keg; 2212 ZONE_LOCK(zone); 2213 if (keg->uk_ppera > 1) 2214 keg->uk_maxpages = nitems * keg->uk_ppera; 2215 else 2216 keg->uk_maxpages = nitems / keg->uk_ipers; 2217 2218 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2219 keg->uk_maxpages++; 2220 2221 ZONE_UNLOCK(zone); 2222 } 2223 2224 /* See uma.h */ 2225 void 2226 uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2227 { 2228 ZONE_LOCK(zone); 2229 KASSERT(zone->uz_keg->uk_pages == 0, 2230 ("uma_zone_set_init on non-empty keg")); 2231 zone->uz_keg->uk_init = uminit; 2232 ZONE_UNLOCK(zone); 2233 } 2234 2235 /* See uma.h */ 2236 void 2237 uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2238 { 2239 ZONE_LOCK(zone); 2240 KASSERT(zone->uz_keg->uk_pages == 0, 2241 ("uma_zone_set_fini on non-empty keg")); 2242 zone->uz_keg->uk_fini = fini; 2243 ZONE_UNLOCK(zone); 2244 } 2245 2246 /* See uma.h */ 2247 void 2248 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2249 { 2250 ZONE_LOCK(zone); 2251 KASSERT(zone->uz_keg->uk_pages == 0, 2252 ("uma_zone_set_zinit on non-empty keg")); 2253 zone->uz_init = zinit; 2254 ZONE_UNLOCK(zone); 2255 } 2256 2257 /* See uma.h */ 2258 void 2259 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2260 { 2261 ZONE_LOCK(zone); 2262 KASSERT(zone->uz_keg->uk_pages == 0, 2263 ("uma_zone_set_zfini on non-empty keg")); 2264 zone->uz_fini = zfini; 2265 ZONE_UNLOCK(zone); 2266 } 2267 2268 /* See uma.h */ 2269 void 2270 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2271 { 2272 ZONE_LOCK(zone); 2273 zone->uz_keg->uk_freef = freef; 2274 ZONE_UNLOCK(zone); 2275 } 2276 2277 /* See uma.h */ 2278 void 2279 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2280 { 2281 ZONE_LOCK(zone); 2282 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2283 zone->uz_keg->uk_allocf = allocf; 2284 ZONE_UNLOCK(zone); 2285 } 2286 2287 /* See uma.h */ 2288 int 2289 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2290 { 2291 uma_keg_t keg; 2292 vm_offset_t kva; 2293 int pages; 2294 2295 keg = zone->uz_keg; 2296 pages = count / keg->uk_ipers; 2297 2298 if (pages * keg->uk_ipers < count) 2299 pages++; 2300 2301 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); 2302 2303 if (kva == 0) 2304 return (0); 2305 if (obj == NULL) { 2306 obj = vm_object_allocate(OBJT_DEFAULT, 2307 pages); 2308 } else { 2309 VM_OBJECT_LOCK_INIT(obj); 2310 _vm_object_allocate(OBJT_DEFAULT, 2311 pages, obj); 2312 } 2313 ZONE_LOCK(zone); 2314 keg->uk_kva = kva; 2315 keg->uk_obj = obj; 2316 keg->uk_maxpages = pages; 2317 keg->uk_allocf = obj_alloc; 2318 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2319 ZONE_UNLOCK(zone); 2320 return (1); 2321 } 2322 2323 /* See uma.h */ 2324 void 2325 uma_prealloc(uma_zone_t zone, int items) 2326 { 2327 int slabs; 2328 uma_slab_t slab; 2329 uma_keg_t keg; 2330 2331 keg = zone->uz_keg; 2332 ZONE_LOCK(zone); 2333 slabs = items / keg->uk_ipers; 2334 if (slabs * keg->uk_ipers < items) 2335 slabs++; 2336 while (slabs > 0) { 2337 slab = slab_zalloc(zone, M_WAITOK); 2338 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2339 slabs--; 2340 } 2341 ZONE_UNLOCK(zone); 2342 } 2343 2344 /* See uma.h */ 2345 u_int32_t * 2346 uma_find_refcnt(uma_zone_t zone, void *item) 2347 { 2348 uma_slabrefcnt_t slab; 2349 uma_keg_t keg; 2350 u_int32_t *refcnt; 2351 int idx; 2352 2353 keg = zone->uz_keg; 2354 slab = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 2355 KASSERT(slab != NULL, 2356 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2357 idx = ((unsigned long)item - (unsigned long)slab->us_data) 2358 / keg->uk_rsize; 2359 refcnt = &(slab->us_freelist[idx].us_refcnt); 2360 return refcnt; 2361 } 2362 2363 /* See uma.h */ 2364 void 2365 uma_reclaim(void) 2366 { 2367 #ifdef UMA_DEBUG 2368 printf("UMA: vm asked us to release pages!\n"); 2369 #endif 2370 bucket_enable(); 2371 zone_foreach(zone_drain); 2372 /* 2373 * Some slabs may have been freed but this zone will be visited early 2374 * we visit again so that we can free pages that are empty once other 2375 * zones are drained. We have to do the same for buckets. 2376 */ 2377 zone_drain(slabzone); 2378 zone_drain(slabrefzone); 2379 bucket_zone_drain(); 2380 } 2381 2382 void * 2383 uma_large_malloc(int size, int wait) 2384 { 2385 void *mem; 2386 uma_slab_t slab; 2387 u_int8_t flags; 2388 2389 slab = uma_zalloc_internal(slabzone, NULL, wait); 2390 if (slab == NULL) 2391 return (NULL); 2392 mem = page_alloc(NULL, size, &flags, wait); 2393 if (mem) { 2394 vsetslab((vm_offset_t)mem, slab); 2395 slab->us_data = mem; 2396 slab->us_flags = flags | UMA_SLAB_MALLOC; 2397 slab->us_size = size; 2398 } else { 2399 uma_zfree_internal(slabzone, slab, NULL, 0); 2400 } 2401 2402 return (mem); 2403 } 2404 2405 void 2406 uma_large_free(uma_slab_t slab) 2407 { 2408 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2409 page_free(slab->us_data, slab->us_size, slab->us_flags); 2410 uma_zfree_internal(slabzone, slab, NULL, 0); 2411 } 2412 2413 void 2414 uma_print_stats(void) 2415 { 2416 zone_foreach(uma_print_zone); 2417 } 2418 2419 static void 2420 slab_print(uma_slab_t slab) 2421 { 2422 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2423 slab->us_keg, slab->us_data, slab->us_freecount, 2424 slab->us_firstfree); 2425 } 2426 2427 static void 2428 cache_print(uma_cache_t cache) 2429 { 2430 printf("alloc: %p(%d), free: %p(%d)\n", 2431 cache->uc_allocbucket, 2432 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2433 cache->uc_freebucket, 2434 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2435 } 2436 2437 void 2438 uma_print_zone(uma_zone_t zone) 2439 { 2440 uma_cache_t cache; 2441 uma_keg_t keg; 2442 uma_slab_t slab; 2443 int i; 2444 2445 keg = zone->uz_keg; 2446 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2447 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2448 keg->uk_ipers, keg->uk_ppera, 2449 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2450 printf("Part slabs:\n"); 2451 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2452 slab_print(slab); 2453 printf("Free slabs:\n"); 2454 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2455 slab_print(slab); 2456 printf("Full slabs:\n"); 2457 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2458 slab_print(slab); 2459 for (i = 0; i <= mp_maxid; i++) { 2460 if (CPU_ABSENT(i)) 2461 continue; 2462 cache = &zone->uz_cpu[i]; 2463 printf("CPU %d Cache:\n", i); 2464 cache_print(cache); 2465 } 2466 } 2467 2468 /* 2469 * Sysctl handler for vm.zone 2470 * 2471 * stolen from vm_zone.c 2472 */ 2473 static int 2474 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2475 { 2476 int error, len, cnt; 2477 const int linesize = 128; /* conservative */ 2478 int totalfree; 2479 char *tmpbuf, *offset; 2480 uma_zone_t z; 2481 uma_keg_t zk; 2482 char *p; 2483 int cpu; 2484 int cachefree; 2485 uma_bucket_t bucket; 2486 uma_cache_t cache; 2487 2488 cnt = 0; 2489 mtx_lock(&uma_mtx); 2490 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2491 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2492 cnt++; 2493 } 2494 mtx_unlock(&uma_mtx); 2495 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2496 M_TEMP, M_WAITOK); 2497 len = snprintf(tmpbuf, linesize, 2498 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2499 if (cnt == 0) 2500 tmpbuf[len - 1] = '\0'; 2501 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2502 if (error || cnt == 0) 2503 goto out; 2504 offset = tmpbuf; 2505 mtx_lock(&uma_mtx); 2506 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2507 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2508 if (cnt == 0) /* list may have changed size */ 2509 break; 2510 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2511 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2512 if (CPU_ABSENT(cpu)) 2513 continue; 2514 CPU_LOCK(cpu); 2515 } 2516 } 2517 ZONE_LOCK(z); 2518 cachefree = 0; 2519 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2520 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2521 if (CPU_ABSENT(cpu)) 2522 continue; 2523 cache = &z->uz_cpu[cpu]; 2524 if (cache->uc_allocbucket != NULL) 2525 cachefree += cache->uc_allocbucket->ub_cnt; 2526 if (cache->uc_freebucket != NULL) 2527 cachefree += cache->uc_freebucket->ub_cnt; 2528 CPU_UNLOCK(cpu); 2529 } 2530 } 2531 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2532 cachefree += bucket->ub_cnt; 2533 } 2534 totalfree = zk->uk_free + cachefree; 2535 len = snprintf(offset, linesize, 2536 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2537 z->uz_name, zk->uk_size, 2538 zk->uk_maxpages * zk->uk_ipers, 2539 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2540 totalfree, 2541 (unsigned long long)z->uz_allocs); 2542 ZONE_UNLOCK(z); 2543 for (p = offset + 12; p > offset && *p == ' '; --p) 2544 /* nothing */ ; 2545 p[1] = ':'; 2546 cnt--; 2547 offset += len; 2548 } 2549 } 2550 mtx_unlock(&uma_mtx); 2551 *offset++ = '\0'; 2552 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2553 out: 2554 FREE(tmpbuf, M_TEMP); 2555 return (error); 2556 } 2557