1 /*- 2 * Copyright (c) 2004, 2005, 3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4 * Copyright (c) 2002, 2003, 2004, 2005, 5 * Jeffrey Roberson <jeff@FreeBSD.org>. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * uma_core.c Implementation of the Universal Memory allocator 31 * 32 * This allocator is intended to replace the multitude of similar object caches 33 * in the standard FreeBSD kernel. The intent is to be flexible as well as 34 * effecient. A primary design goal is to return unused memory to the rest of 35 * the system. This will make the system as a whole more flexible due to the 36 * ability to move memory to subsystems which most need it instead of leaving 37 * pools of reserved memory unused. 38 * 39 * The basic ideas stem from similar slab/zone based allocators whose algorithms 40 * are well known. 41 * 42 */ 43 44 /* 45 * TODO: 46 * - Improve memory usage for large allocations 47 * - Investigate cache size adjustments 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 /* I should really use ktr.. */ 54 /* 55 #define UMA_DEBUG 1 56 #define UMA_DEBUG_ALLOC 1 57 #define UMA_DEBUG_ALLOC_1 1 58 */ 59 60 #include "opt_param.h" 61 #include <sys/param.h> 62 #include <sys/systm.h> 63 #include <sys/kernel.h> 64 #include <sys/types.h> 65 #include <sys/queue.h> 66 #include <sys/malloc.h> 67 #include <sys/ktr.h> 68 #include <sys/lock.h> 69 #include <sys/sysctl.h> 70 #include <sys/mutex.h> 71 #include <sys/proc.h> 72 #include <sys/smp.h> 73 #include <sys/vmmeter.h> 74 75 #include <vm/vm.h> 76 #include <vm/vm_object.h> 77 #include <vm/vm_page.h> 78 #include <vm/vm_param.h> 79 #include <vm/vm_map.h> 80 #include <vm/vm_kern.h> 81 #include <vm/vm_extern.h> 82 #include <vm/uma.h> 83 #include <vm/uma_int.h> 84 #include <vm/uma_dbg.h> 85 86 #include <machine/vmparam.h> 87 88 /* 89 * This is the zone and keg from which all zones are spawned. The idea is that 90 * even the zone & keg heads are allocated from the allocator, so we use the 91 * bss section to bootstrap us. 92 */ 93 static struct uma_keg masterkeg; 94 static struct uma_zone masterzone_k; 95 static struct uma_zone masterzone_z; 96 static uma_zone_t kegs = &masterzone_k; 97 static uma_zone_t zones = &masterzone_z; 98 99 /* This is the zone from which all of uma_slab_t's are allocated. */ 100 static uma_zone_t slabzone; 101 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 102 103 /* 104 * The initial hash tables come out of this zone so they can be allocated 105 * prior to malloc coming up. 106 */ 107 static uma_zone_t hashzone; 108 109 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 110 111 /* 112 * Are we allowed to allocate buckets? 113 */ 114 static int bucketdisable = 1; 115 116 /* Linked list of all kegs in the system */ 117 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 118 119 /* This mutex protects the keg list */ 120 static struct mtx uma_mtx; 121 122 /* These are the pcpu cache locks */ 123 static struct mtx uma_pcpu_mtx[MAXCPU]; 124 125 /* Linked list of boot time pages */ 126 static LIST_HEAD(,uma_slab) uma_boot_pages = 127 LIST_HEAD_INITIALIZER(&uma_boot_pages); 128 129 /* Count of free boottime pages */ 130 static int uma_boot_free = 0; 131 132 /* Is the VM done starting up? */ 133 static int booted = 0; 134 135 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 136 static u_int uma_max_ipers; 137 static u_int uma_max_ipers_ref; 138 139 /* 140 * This is the handle used to schedule events that need to happen 141 * outside of the allocation fast path. 142 */ 143 static struct callout uma_callout; 144 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 145 146 /* 147 * This structure is passed as the zone ctor arg so that I don't have to create 148 * a special allocation function just for zones. 149 */ 150 struct uma_zctor_args { 151 char *name; 152 size_t size; 153 uma_ctor ctor; 154 uma_dtor dtor; 155 uma_init uminit; 156 uma_fini fini; 157 uma_keg_t keg; 158 int align; 159 u_int16_t flags; 160 }; 161 162 struct uma_kctor_args { 163 uma_zone_t zone; 164 size_t size; 165 uma_init uminit; 166 uma_fini fini; 167 int align; 168 u_int16_t flags; 169 }; 170 171 struct uma_bucket_zone { 172 uma_zone_t ubz_zone; 173 char *ubz_name; 174 int ubz_entries; 175 }; 176 177 #define BUCKET_MAX 128 178 179 struct uma_bucket_zone bucket_zones[] = { 180 { NULL, "16 Bucket", 16 }, 181 { NULL, "32 Bucket", 32 }, 182 { NULL, "64 Bucket", 64 }, 183 { NULL, "128 Bucket", 128 }, 184 { NULL, NULL, 0} 185 }; 186 187 #define BUCKET_SHIFT 4 188 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 189 190 /* 191 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket 192 * of approximately the right size. 193 */ 194 static uint8_t bucket_size[BUCKET_ZONES]; 195 196 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 197 198 /* Prototypes.. */ 199 200 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 201 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 202 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 203 static void page_free(void *, int, u_int8_t); 204 static uma_slab_t slab_zalloc(uma_zone_t, int); 205 static void cache_drain(uma_zone_t); 206 static void bucket_drain(uma_zone_t, uma_bucket_t); 207 static void bucket_cache_drain(uma_zone_t zone); 208 static int keg_ctor(void *, int, void *, int); 209 static void keg_dtor(void *, int, void *); 210 static int zone_ctor(void *, int, void *, int); 211 static void zone_dtor(void *, int, void *); 212 static int zero_init(void *, int, int); 213 static void zone_small_init(uma_zone_t zone); 214 static void zone_large_init(uma_zone_t zone); 215 static void zone_foreach(void (*zfunc)(uma_zone_t)); 216 static void zone_timeout(uma_zone_t zone); 217 static int hash_alloc(struct uma_hash *); 218 static int hash_expand(struct uma_hash *, struct uma_hash *); 219 static void hash_free(struct uma_hash *hash); 220 static void uma_timeout(void *); 221 static void uma_startup3(void); 222 static void *uma_zalloc_internal(uma_zone_t, void *, int); 223 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip); 224 static void bucket_enable(void); 225 static void bucket_init(void); 226 static uma_bucket_t bucket_alloc(int, int); 227 static void bucket_free(uma_bucket_t); 228 static void bucket_zone_drain(void); 229 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 230 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 231 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 232 static void zone_drain(uma_zone_t); 233 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 234 uma_fini fini, int align, u_int16_t flags); 235 236 void uma_print_zone(uma_zone_t); 237 void uma_print_stats(void); 238 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 239 240 #ifdef WITNESS 241 static int nosleepwithlocks = 1; 242 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 243 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 244 #else 245 static int nosleepwithlocks = 0; 246 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 247 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 248 #endif 249 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 250 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 251 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 252 253 /* 254 * This routine checks to see whether or not it's safe to enable buckets. 255 */ 256 257 static void 258 bucket_enable(void) 259 { 260 if (cnt.v_free_count < cnt.v_free_min) 261 bucketdisable = 1; 262 else 263 bucketdisable = 0; 264 } 265 266 /* 267 * Initialize bucket_zones, the array of zones of buckets of various sizes. 268 * 269 * For each zone, calculate the memory required for each bucket, consisting 270 * of the header and an array of pointers. Initialize bucket_size[] to point 271 * the range of appropriate bucket sizes at the zone. 272 */ 273 static void 274 bucket_init(void) 275 { 276 struct uma_bucket_zone *ubz; 277 int i; 278 int j; 279 280 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 281 int size; 282 283 ubz = &bucket_zones[j]; 284 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 285 size += sizeof(void *) * ubz->ubz_entries; 286 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 287 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 288 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 289 bucket_size[i >> BUCKET_SHIFT] = j; 290 } 291 } 292 293 /* 294 * Given a desired number of entries for a bucket, return the zone from which 295 * to allocate the bucket. 296 */ 297 static struct uma_bucket_zone * 298 bucket_zone_lookup(int entries) 299 { 300 int idx; 301 302 idx = howmany(entries, 1 << BUCKET_SHIFT); 303 return (&bucket_zones[bucket_size[idx]]); 304 } 305 306 static uma_bucket_t 307 bucket_alloc(int entries, int bflags) 308 { 309 struct uma_bucket_zone *ubz; 310 uma_bucket_t bucket; 311 312 /* 313 * This is to stop us from allocating per cpu buckets while we're 314 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 315 * boot pages. This also prevents us from allocating buckets in 316 * low memory situations. 317 */ 318 if (bucketdisable) 319 return (NULL); 320 321 ubz = bucket_zone_lookup(entries); 322 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 323 if (bucket) { 324 #ifdef INVARIANTS 325 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 326 #endif 327 bucket->ub_cnt = 0; 328 bucket->ub_entries = ubz->ubz_entries; 329 } 330 331 return (bucket); 332 } 333 334 static void 335 bucket_free(uma_bucket_t bucket) 336 { 337 struct uma_bucket_zone *ubz; 338 339 ubz = bucket_zone_lookup(bucket->ub_entries); 340 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE); 341 } 342 343 static void 344 bucket_zone_drain(void) 345 { 346 struct uma_bucket_zone *ubz; 347 348 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 349 zone_drain(ubz->ubz_zone); 350 } 351 352 353 /* 354 * Routine called by timeout which is used to fire off some time interval 355 * based calculations. (stats, hash size, etc.) 356 * 357 * Arguments: 358 * arg Unused 359 * 360 * Returns: 361 * Nothing 362 */ 363 static void 364 uma_timeout(void *unused) 365 { 366 bucket_enable(); 367 zone_foreach(zone_timeout); 368 369 /* Reschedule this event */ 370 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 371 } 372 373 /* 374 * Routine to perform timeout driven calculations. This expands the 375 * hashes and does per cpu statistics aggregation. 376 * 377 * Arguments: 378 * zone The zone to operate on 379 * 380 * Returns: 381 * Nothing 382 */ 383 static void 384 zone_timeout(uma_zone_t zone) 385 { 386 uma_keg_t keg; 387 uma_cache_t cache; 388 u_int64_t alloc; 389 int cpu; 390 391 keg = zone->uz_keg; 392 alloc = 0; 393 394 /* 395 * Aggregate per cpu cache statistics back to the zone. 396 * 397 * XXX This should be done in the sysctl handler. 398 * 399 * I may rewrite this to set a flag in the per cpu cache instead of 400 * locking. If the flag is not cleared on the next round I will have 401 * to lock and do it here instead so that the statistics don't get too 402 * far out of sync. 403 */ 404 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) { 405 for (cpu = 0; cpu <= mp_maxid; cpu++) { 406 if (CPU_ABSENT(cpu)) 407 continue; 408 CPU_LOCK(cpu); 409 cache = &zone->uz_cpu[cpu]; 410 /* Add them up, and reset */ 411 alloc += cache->uc_allocs; 412 cache->uc_allocs = 0; 413 CPU_UNLOCK(cpu); 414 } 415 } 416 417 /* Now push these stats back into the zone.. */ 418 ZONE_LOCK(zone); 419 zone->uz_allocs += alloc; 420 421 /* 422 * Expand the zone hash table. 423 * 424 * This is done if the number of slabs is larger than the hash size. 425 * What I'm trying to do here is completely reduce collisions. This 426 * may be a little aggressive. Should I allow for two collisions max? 427 */ 428 429 if (keg->uk_flags & UMA_ZONE_HASH && 430 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 431 struct uma_hash newhash; 432 struct uma_hash oldhash; 433 int ret; 434 435 /* 436 * This is so involved because allocating and freeing 437 * while the zone lock is held will lead to deadlock. 438 * I have to do everything in stages and check for 439 * races. 440 */ 441 newhash = keg->uk_hash; 442 ZONE_UNLOCK(zone); 443 ret = hash_alloc(&newhash); 444 ZONE_LOCK(zone); 445 if (ret) { 446 if (hash_expand(&keg->uk_hash, &newhash)) { 447 oldhash = keg->uk_hash; 448 keg->uk_hash = newhash; 449 } else 450 oldhash = newhash; 451 452 ZONE_UNLOCK(zone); 453 hash_free(&oldhash); 454 ZONE_LOCK(zone); 455 } 456 } 457 ZONE_UNLOCK(zone); 458 } 459 460 /* 461 * Allocate and zero fill the next sized hash table from the appropriate 462 * backing store. 463 * 464 * Arguments: 465 * hash A new hash structure with the old hash size in uh_hashsize 466 * 467 * Returns: 468 * 1 on sucess and 0 on failure. 469 */ 470 static int 471 hash_alloc(struct uma_hash *hash) 472 { 473 int oldsize; 474 int alloc; 475 476 oldsize = hash->uh_hashsize; 477 478 /* We're just going to go to a power of two greater */ 479 if (oldsize) { 480 hash->uh_hashsize = oldsize * 2; 481 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 482 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 483 M_UMAHASH, M_NOWAIT); 484 } else { 485 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 486 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 487 M_WAITOK); 488 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 489 } 490 if (hash->uh_slab_hash) { 491 bzero(hash->uh_slab_hash, alloc); 492 hash->uh_hashmask = hash->uh_hashsize - 1; 493 return (1); 494 } 495 496 return (0); 497 } 498 499 /* 500 * Expands the hash table for HASH zones. This is done from zone_timeout 501 * to reduce collisions. This must not be done in the regular allocation 502 * path, otherwise, we can recurse on the vm while allocating pages. 503 * 504 * Arguments: 505 * oldhash The hash you want to expand 506 * newhash The hash structure for the new table 507 * 508 * Returns: 509 * Nothing 510 * 511 * Discussion: 512 */ 513 static int 514 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 515 { 516 uma_slab_t slab; 517 int hval; 518 int i; 519 520 if (!newhash->uh_slab_hash) 521 return (0); 522 523 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 524 return (0); 525 526 /* 527 * I need to investigate hash algorithms for resizing without a 528 * full rehash. 529 */ 530 531 for (i = 0; i < oldhash->uh_hashsize; i++) 532 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 533 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 534 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 535 hval = UMA_HASH(newhash, slab->us_data); 536 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 537 slab, us_hlink); 538 } 539 540 return (1); 541 } 542 543 /* 544 * Free the hash bucket to the appropriate backing store. 545 * 546 * Arguments: 547 * slab_hash The hash bucket we're freeing 548 * hashsize The number of entries in that hash bucket 549 * 550 * Returns: 551 * Nothing 552 */ 553 static void 554 hash_free(struct uma_hash *hash) 555 { 556 if (hash->uh_slab_hash == NULL) 557 return; 558 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 559 uma_zfree_internal(hashzone, 560 hash->uh_slab_hash, NULL, SKIP_NONE); 561 else 562 free(hash->uh_slab_hash, M_UMAHASH); 563 } 564 565 /* 566 * Frees all outstanding items in a bucket 567 * 568 * Arguments: 569 * zone The zone to free to, must be unlocked. 570 * bucket The free/alloc bucket with items, cpu queue must be locked. 571 * 572 * Returns: 573 * Nothing 574 */ 575 576 static void 577 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 578 { 579 uma_slab_t slab; 580 int mzone; 581 void *item; 582 583 if (bucket == NULL) 584 return; 585 586 slab = NULL; 587 mzone = 0; 588 589 /* We have to lookup the slab again for malloc.. */ 590 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 591 mzone = 1; 592 593 while (bucket->ub_cnt > 0) { 594 bucket->ub_cnt--; 595 item = bucket->ub_bucket[bucket->ub_cnt]; 596 #ifdef INVARIANTS 597 bucket->ub_bucket[bucket->ub_cnt] = NULL; 598 KASSERT(item != NULL, 599 ("bucket_drain: botched ptr, item is NULL")); 600 #endif 601 /* 602 * This is extremely inefficient. The slab pointer was passed 603 * to uma_zfree_arg, but we lost it because the buckets don't 604 * hold them. This will go away when free() gets a size passed 605 * to it. 606 */ 607 if (mzone) 608 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 609 uma_zfree_internal(zone, item, slab, SKIP_DTOR); 610 } 611 } 612 613 /* 614 * Drains the per cpu caches for a zone. 615 * 616 * Arguments: 617 * zone The zone to drain, must be unlocked. 618 * 619 * Returns: 620 * Nothing 621 */ 622 static void 623 cache_drain(uma_zone_t zone) 624 { 625 uma_cache_t cache; 626 int cpu; 627 628 /* 629 * We have to lock each cpu cache before locking the zone 630 */ 631 for (cpu = 0; cpu <= mp_maxid; cpu++) { 632 if (CPU_ABSENT(cpu)) 633 continue; 634 CPU_LOCK(cpu); 635 cache = &zone->uz_cpu[cpu]; 636 bucket_drain(zone, cache->uc_allocbucket); 637 bucket_drain(zone, cache->uc_freebucket); 638 if (cache->uc_allocbucket != NULL) 639 bucket_free(cache->uc_allocbucket); 640 if (cache->uc_freebucket != NULL) 641 bucket_free(cache->uc_freebucket); 642 cache->uc_allocbucket = cache->uc_freebucket = NULL; 643 } 644 ZONE_LOCK(zone); 645 bucket_cache_drain(zone); 646 ZONE_UNLOCK(zone); 647 for (cpu = 0; cpu <= mp_maxid; cpu++) { 648 if (CPU_ABSENT(cpu)) 649 continue; 650 CPU_UNLOCK(cpu); 651 } 652 } 653 654 /* 655 * Drain the cached buckets from a zone. Expects a locked zone on entry. 656 */ 657 static void 658 bucket_cache_drain(uma_zone_t zone) 659 { 660 uma_bucket_t bucket; 661 662 /* 663 * Drain the bucket queues and free the buckets, we just keep two per 664 * cpu (alloc/free). 665 */ 666 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 667 LIST_REMOVE(bucket, ub_link); 668 ZONE_UNLOCK(zone); 669 bucket_drain(zone, bucket); 670 bucket_free(bucket); 671 ZONE_LOCK(zone); 672 } 673 674 /* Now we do the free queue.. */ 675 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 676 LIST_REMOVE(bucket, ub_link); 677 bucket_free(bucket); 678 } 679 } 680 681 /* 682 * Frees pages from a zone back to the system. This is done on demand from 683 * the pageout daemon. 684 * 685 * Arguments: 686 * zone The zone to free pages from 687 * all Should we drain all items? 688 * 689 * Returns: 690 * Nothing. 691 */ 692 static void 693 zone_drain(uma_zone_t zone) 694 { 695 struct slabhead freeslabs = { 0 }; 696 uma_keg_t keg; 697 uma_slab_t slab; 698 uma_slab_t n; 699 u_int8_t flags; 700 u_int8_t *mem; 701 int i; 702 703 keg = zone->uz_keg; 704 705 /* 706 * We don't want to take pages from statically allocated zones at this 707 * time 708 */ 709 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 710 return; 711 712 ZONE_LOCK(zone); 713 714 #ifdef UMA_DEBUG 715 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 716 #endif 717 bucket_cache_drain(zone); 718 if (keg->uk_free == 0) 719 goto finished; 720 721 slab = LIST_FIRST(&keg->uk_free_slab); 722 while (slab) { 723 n = LIST_NEXT(slab, us_link); 724 725 /* We have no where to free these to */ 726 if (slab->us_flags & UMA_SLAB_BOOT) { 727 slab = n; 728 continue; 729 } 730 731 LIST_REMOVE(slab, us_link); 732 keg->uk_pages -= keg->uk_ppera; 733 keg->uk_free -= keg->uk_ipers; 734 735 if (keg->uk_flags & UMA_ZONE_HASH) 736 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 737 738 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 739 740 slab = n; 741 } 742 finished: 743 ZONE_UNLOCK(zone); 744 745 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 746 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 747 if (keg->uk_fini) 748 for (i = 0; i < keg->uk_ipers; i++) 749 keg->uk_fini( 750 slab->us_data + (keg->uk_rsize * i), 751 keg->uk_size); 752 flags = slab->us_flags; 753 mem = slab->us_data; 754 755 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 756 (keg->uk_flags & UMA_ZONE_REFCNT)) { 757 vm_object_t obj; 758 759 if (flags & UMA_SLAB_KMEM) 760 obj = kmem_object; 761 else 762 obj = NULL; 763 for (i = 0; i < keg->uk_ppera; i++) 764 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 765 obj); 766 } 767 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 768 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 769 SKIP_NONE); 770 #ifdef UMA_DEBUG 771 printf("%s: Returning %d bytes.\n", 772 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 773 #endif 774 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 775 } 776 } 777 778 /* 779 * Allocate a new slab for a zone. This does not insert the slab onto a list. 780 * 781 * Arguments: 782 * zone The zone to allocate slabs for 783 * wait Shall we wait? 784 * 785 * Returns: 786 * The slab that was allocated or NULL if there is no memory and the 787 * caller specified M_NOWAIT. 788 */ 789 static uma_slab_t 790 slab_zalloc(uma_zone_t zone, int wait) 791 { 792 uma_slabrefcnt_t slabref; 793 uma_slab_t slab; 794 uma_keg_t keg; 795 u_int8_t *mem; 796 u_int8_t flags; 797 int i; 798 799 slab = NULL; 800 keg = zone->uz_keg; 801 802 #ifdef UMA_DEBUG 803 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 804 #endif 805 ZONE_UNLOCK(zone); 806 807 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 808 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 809 if (slab == NULL) { 810 ZONE_LOCK(zone); 811 return NULL; 812 } 813 } 814 815 /* 816 * This reproduces the old vm_zone behavior of zero filling pages the 817 * first time they are added to a zone. 818 * 819 * Malloced items are zeroed in uma_zalloc. 820 */ 821 822 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 823 wait |= M_ZERO; 824 else 825 wait &= ~M_ZERO; 826 827 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 828 &flags, wait); 829 if (mem == NULL) { 830 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 831 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0); 832 ZONE_LOCK(zone); 833 return (NULL); 834 } 835 836 /* Point the slab into the allocated memory */ 837 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 838 slab = (uma_slab_t )(mem + keg->uk_pgoff); 839 840 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 841 (keg->uk_flags & UMA_ZONE_REFCNT)) 842 for (i = 0; i < keg->uk_ppera; i++) 843 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 844 845 slab->us_keg = keg; 846 slab->us_data = mem; 847 slab->us_freecount = keg->uk_ipers; 848 slab->us_firstfree = 0; 849 slab->us_flags = flags; 850 851 if (keg->uk_flags & UMA_ZONE_REFCNT) { 852 slabref = (uma_slabrefcnt_t)slab; 853 for (i = 0; i < keg->uk_ipers; i++) { 854 slabref->us_freelist[i].us_refcnt = 0; 855 slabref->us_freelist[i].us_item = i+1; 856 } 857 } else { 858 for (i = 0; i < keg->uk_ipers; i++) 859 slab->us_freelist[i].us_item = i+1; 860 } 861 862 if (keg->uk_init != NULL) { 863 for (i = 0; i < keg->uk_ipers; i++) 864 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 865 keg->uk_size, wait) != 0) 866 break; 867 if (i != keg->uk_ipers) { 868 if (keg->uk_fini != NULL) { 869 for (i--; i > -1; i--) 870 keg->uk_fini(slab->us_data + 871 (keg->uk_rsize * i), 872 keg->uk_size); 873 } 874 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 875 (keg->uk_flags & UMA_ZONE_REFCNT)) { 876 vm_object_t obj; 877 878 if (flags & UMA_SLAB_KMEM) 879 obj = kmem_object; 880 else 881 obj = NULL; 882 for (i = 0; i < keg->uk_ppera; i++) 883 vsetobj((vm_offset_t)mem + 884 (i * PAGE_SIZE), obj); 885 } 886 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 887 uma_zfree_internal(keg->uk_slabzone, slab, 888 NULL, SKIP_NONE); 889 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 890 flags); 891 ZONE_LOCK(zone); 892 return (NULL); 893 } 894 } 895 ZONE_LOCK(zone); 896 897 if (keg->uk_flags & UMA_ZONE_HASH) 898 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 899 900 keg->uk_pages += keg->uk_ppera; 901 keg->uk_free += keg->uk_ipers; 902 903 return (slab); 904 } 905 906 /* 907 * This function is intended to be used early on in place of page_alloc() so 908 * that we may use the boot time page cache to satisfy allocations before 909 * the VM is ready. 910 */ 911 static void * 912 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 913 { 914 uma_keg_t keg; 915 916 keg = zone->uz_keg; 917 918 /* 919 * Check our small startup cache to see if it has pages remaining. 920 */ 921 mtx_lock(&uma_mtx); 922 if (uma_boot_free != 0) { 923 uma_slab_t tmps; 924 925 tmps = LIST_FIRST(&uma_boot_pages); 926 LIST_REMOVE(tmps, us_link); 927 uma_boot_free--; 928 mtx_unlock(&uma_mtx); 929 *pflag = tmps->us_flags; 930 return (tmps->us_data); 931 } 932 mtx_unlock(&uma_mtx); 933 if (booted == 0) 934 panic("UMA: Increase UMA_BOOT_PAGES"); 935 /* 936 * Now that we've booted reset these users to their real allocator. 937 */ 938 #ifdef UMA_MD_SMALL_ALLOC 939 keg->uk_allocf = uma_small_alloc; 940 #else 941 keg->uk_allocf = page_alloc; 942 #endif 943 return keg->uk_allocf(zone, bytes, pflag, wait); 944 } 945 946 /* 947 * Allocates a number of pages from the system 948 * 949 * Arguments: 950 * zone Unused 951 * bytes The number of bytes requested 952 * wait Shall we wait? 953 * 954 * Returns: 955 * A pointer to the alloced memory or possibly 956 * NULL if M_NOWAIT is set. 957 */ 958 static void * 959 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 960 { 961 void *p; /* Returned page */ 962 963 *pflag = UMA_SLAB_KMEM; 964 p = (void *) kmem_malloc(kmem_map, bytes, wait); 965 966 return (p); 967 } 968 969 /* 970 * Allocates a number of pages from within an object 971 * 972 * Arguments: 973 * zone Unused 974 * bytes The number of bytes requested 975 * wait Shall we wait? 976 * 977 * Returns: 978 * A pointer to the alloced memory or possibly 979 * NULL if M_NOWAIT is set. 980 */ 981 static void * 982 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 983 { 984 vm_object_t object; 985 vm_offset_t retkva, zkva; 986 vm_page_t p; 987 int pages, startpages; 988 989 object = zone->uz_keg->uk_obj; 990 retkva = 0; 991 992 /* 993 * This looks a little weird since we're getting one page at a time. 994 */ 995 VM_OBJECT_LOCK(object); 996 p = TAILQ_LAST(&object->memq, pglist); 997 pages = p != NULL ? p->pindex + 1 : 0; 998 startpages = pages; 999 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 1000 for (; bytes > 0; bytes -= PAGE_SIZE) { 1001 p = vm_page_alloc(object, pages, 1002 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 1003 if (p == NULL) { 1004 if (pages != startpages) 1005 pmap_qremove(retkva, pages - startpages); 1006 while (pages != startpages) { 1007 pages--; 1008 p = TAILQ_LAST(&object->memq, pglist); 1009 vm_page_lock_queues(); 1010 vm_page_unwire(p, 0); 1011 vm_page_free(p); 1012 vm_page_unlock_queues(); 1013 } 1014 retkva = 0; 1015 goto done; 1016 } 1017 pmap_qenter(zkva, &p, 1); 1018 if (retkva == 0) 1019 retkva = zkva; 1020 zkva += PAGE_SIZE; 1021 pages += 1; 1022 } 1023 done: 1024 VM_OBJECT_UNLOCK(object); 1025 *flags = UMA_SLAB_PRIV; 1026 1027 return ((void *)retkva); 1028 } 1029 1030 /* 1031 * Frees a number of pages to the system 1032 * 1033 * Arguments: 1034 * mem A pointer to the memory to be freed 1035 * size The size of the memory being freed 1036 * flags The original p->us_flags field 1037 * 1038 * Returns: 1039 * Nothing 1040 */ 1041 static void 1042 page_free(void *mem, int size, u_int8_t flags) 1043 { 1044 vm_map_t map; 1045 1046 if (flags & UMA_SLAB_KMEM) 1047 map = kmem_map; 1048 else 1049 panic("UMA: page_free used with invalid flags %d\n", flags); 1050 1051 kmem_free(map, (vm_offset_t)mem, size); 1052 } 1053 1054 /* 1055 * Zero fill initializer 1056 * 1057 * Arguments/Returns follow uma_init specifications 1058 */ 1059 static int 1060 zero_init(void *mem, int size, int flags) 1061 { 1062 bzero(mem, size); 1063 return (0); 1064 } 1065 1066 /* 1067 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1068 * 1069 * Arguments 1070 * zone The zone we should initialize 1071 * 1072 * Returns 1073 * Nothing 1074 */ 1075 static void 1076 zone_small_init(uma_zone_t zone) 1077 { 1078 uma_keg_t keg; 1079 u_int rsize; 1080 u_int memused; 1081 u_int wastedspace; 1082 u_int shsize; 1083 1084 keg = zone->uz_keg; 1085 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1086 rsize = keg->uk_size; 1087 1088 if (rsize < UMA_SMALLEST_UNIT) 1089 rsize = UMA_SMALLEST_UNIT; 1090 if (rsize & keg->uk_align) 1091 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1092 1093 keg->uk_rsize = rsize; 1094 keg->uk_ppera = 1; 1095 1096 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1097 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1098 shsize = sizeof(struct uma_slab_refcnt); 1099 } else { 1100 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1101 shsize = sizeof(struct uma_slab); 1102 } 1103 1104 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1105 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1106 memused = keg->uk_ipers * rsize + shsize; 1107 wastedspace = UMA_SLAB_SIZE - memused; 1108 1109 /* 1110 * We can't do OFFPAGE if we're internal or if we've been 1111 * asked to not go to the VM for buckets. If we do this we 1112 * may end up going to the VM (kmem_map) for slabs which we 1113 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1114 * result of UMA_ZONE_VM, which clearly forbids it. 1115 */ 1116 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1117 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1118 return; 1119 1120 if ((wastedspace >= UMA_MAX_WASTE) && 1121 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1122 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1123 KASSERT(keg->uk_ipers <= 255, 1124 ("zone_small_init: keg->uk_ipers too high!")); 1125 #ifdef UMA_DEBUG 1126 printf("UMA decided we need offpage slab headers for " 1127 "zone: %s, calculated wastedspace = %d, " 1128 "maximum wasted space allowed = %d, " 1129 "calculated ipers = %d, " 1130 "new wasted space = %d\n", zone->uz_name, wastedspace, 1131 UMA_MAX_WASTE, keg->uk_ipers, 1132 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1133 #endif 1134 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1135 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1136 keg->uk_flags |= UMA_ZONE_HASH; 1137 } 1138 } 1139 1140 /* 1141 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1142 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1143 * more complicated. 1144 * 1145 * Arguments 1146 * zone The zone we should initialize 1147 * 1148 * Returns 1149 * Nothing 1150 */ 1151 static void 1152 zone_large_init(uma_zone_t zone) 1153 { 1154 uma_keg_t keg; 1155 int pages; 1156 1157 keg = zone->uz_keg; 1158 1159 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1160 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1161 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1162 1163 pages = keg->uk_size / UMA_SLAB_SIZE; 1164 1165 /* Account for remainder */ 1166 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1167 pages++; 1168 1169 keg->uk_ppera = pages; 1170 keg->uk_ipers = 1; 1171 1172 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1173 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1174 keg->uk_flags |= UMA_ZONE_HASH; 1175 1176 keg->uk_rsize = keg->uk_size; 1177 } 1178 1179 /* 1180 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1181 * the keg onto the global keg list. 1182 * 1183 * Arguments/Returns follow uma_ctor specifications 1184 * udata Actually uma_kctor_args 1185 */ 1186 static int 1187 keg_ctor(void *mem, int size, void *udata, int flags) 1188 { 1189 struct uma_kctor_args *arg = udata; 1190 uma_keg_t keg = mem; 1191 uma_zone_t zone; 1192 1193 bzero(keg, size); 1194 keg->uk_size = arg->size; 1195 keg->uk_init = arg->uminit; 1196 keg->uk_fini = arg->fini; 1197 keg->uk_align = arg->align; 1198 keg->uk_free = 0; 1199 keg->uk_pages = 0; 1200 keg->uk_flags = arg->flags; 1201 keg->uk_allocf = page_alloc; 1202 keg->uk_freef = page_free; 1203 keg->uk_recurse = 0; 1204 keg->uk_slabzone = NULL; 1205 1206 /* 1207 * The master zone is passed to us at keg-creation time. 1208 */ 1209 zone = arg->zone; 1210 zone->uz_keg = keg; 1211 1212 if (arg->flags & UMA_ZONE_VM) 1213 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1214 1215 if (arg->flags & UMA_ZONE_ZINIT) 1216 keg->uk_init = zero_init; 1217 1218 /* 1219 * The +UMA_FRITM_SZ added to uk_size is to account for the 1220 * linkage that is added to the size in zone_small_init(). If 1221 * we don't account for this here then we may end up in 1222 * zone_small_init() with a calculated 'ipers' of 0. 1223 */ 1224 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1225 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1226 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1227 zone_large_init(zone); 1228 else 1229 zone_small_init(zone); 1230 } else { 1231 if ((keg->uk_size+UMA_FRITM_SZ) > 1232 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1233 zone_large_init(zone); 1234 else 1235 zone_small_init(zone); 1236 } 1237 1238 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1239 if (keg->uk_flags & UMA_ZONE_REFCNT) 1240 keg->uk_slabzone = slabrefzone; 1241 else 1242 keg->uk_slabzone = slabzone; 1243 } 1244 1245 /* 1246 * If we haven't booted yet we need allocations to go through the 1247 * startup cache until the vm is ready. 1248 */ 1249 if (keg->uk_ppera == 1) { 1250 #ifdef UMA_MD_SMALL_ALLOC 1251 keg->uk_allocf = uma_small_alloc; 1252 keg->uk_freef = uma_small_free; 1253 #endif 1254 if (booted == 0) 1255 keg->uk_allocf = startup_alloc; 1256 } 1257 1258 /* 1259 * Initialize keg's lock (shared among zones) through 1260 * Master zone 1261 */ 1262 zone->uz_lock = &keg->uk_lock; 1263 if (arg->flags & UMA_ZONE_MTXCLASS) 1264 ZONE_LOCK_INIT(zone, 1); 1265 else 1266 ZONE_LOCK_INIT(zone, 0); 1267 1268 /* 1269 * If we're putting the slab header in the actual page we need to 1270 * figure out where in each page it goes. This calculates a right 1271 * justified offset into the memory on an ALIGN_PTR boundary. 1272 */ 1273 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1274 u_int totsize; 1275 1276 /* Size of the slab struct and free list */ 1277 if (keg->uk_flags & UMA_ZONE_REFCNT) 1278 totsize = sizeof(struct uma_slab_refcnt) + 1279 keg->uk_ipers * UMA_FRITMREF_SZ; 1280 else 1281 totsize = sizeof(struct uma_slab) + 1282 keg->uk_ipers * UMA_FRITM_SZ; 1283 1284 if (totsize & UMA_ALIGN_PTR) 1285 totsize = (totsize & ~UMA_ALIGN_PTR) + 1286 (UMA_ALIGN_PTR + 1); 1287 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1288 1289 if (keg->uk_flags & UMA_ZONE_REFCNT) 1290 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1291 + keg->uk_ipers * UMA_FRITMREF_SZ; 1292 else 1293 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1294 + keg->uk_ipers * UMA_FRITM_SZ; 1295 1296 /* 1297 * The only way the following is possible is if with our 1298 * UMA_ALIGN_PTR adjustments we are now bigger than 1299 * UMA_SLAB_SIZE. I haven't checked whether this is 1300 * mathematically possible for all cases, so we make 1301 * sure here anyway. 1302 */ 1303 if (totsize > UMA_SLAB_SIZE) { 1304 printf("zone %s ipers %d rsize %d size %d\n", 1305 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1306 keg->uk_size); 1307 panic("UMA slab won't fit.\n"); 1308 } 1309 } 1310 1311 if (keg->uk_flags & UMA_ZONE_HASH) 1312 hash_alloc(&keg->uk_hash); 1313 1314 #ifdef UMA_DEBUG 1315 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1316 zone->uz_name, zone, 1317 keg->uk_size, keg->uk_ipers, 1318 keg->uk_ppera, keg->uk_pgoff); 1319 #endif 1320 1321 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1322 1323 mtx_lock(&uma_mtx); 1324 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1325 mtx_unlock(&uma_mtx); 1326 return (0); 1327 } 1328 1329 /* 1330 * Zone header ctor. This initializes all fields, locks, etc. 1331 * 1332 * Arguments/Returns follow uma_ctor specifications 1333 * udata Actually uma_zctor_args 1334 */ 1335 1336 static int 1337 zone_ctor(void *mem, int size, void *udata, int flags) 1338 { 1339 struct uma_zctor_args *arg = udata; 1340 uma_zone_t zone = mem; 1341 uma_zone_t z; 1342 uma_keg_t keg; 1343 1344 bzero(zone, size); 1345 zone->uz_name = arg->name; 1346 zone->uz_ctor = arg->ctor; 1347 zone->uz_dtor = arg->dtor; 1348 zone->uz_init = NULL; 1349 zone->uz_fini = NULL; 1350 zone->uz_allocs = 0; 1351 zone->uz_fills = zone->uz_count = 0; 1352 1353 if (arg->flags & UMA_ZONE_SECONDARY) { 1354 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1355 keg = arg->keg; 1356 zone->uz_keg = keg; 1357 zone->uz_init = arg->uminit; 1358 zone->uz_fini = arg->fini; 1359 zone->uz_lock = &keg->uk_lock; 1360 mtx_lock(&uma_mtx); 1361 ZONE_LOCK(zone); 1362 keg->uk_flags |= UMA_ZONE_SECONDARY; 1363 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1364 if (LIST_NEXT(z, uz_link) == NULL) { 1365 LIST_INSERT_AFTER(z, zone, uz_link); 1366 break; 1367 } 1368 } 1369 ZONE_UNLOCK(zone); 1370 mtx_unlock(&uma_mtx); 1371 } else if (arg->keg == NULL) { 1372 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1373 arg->align, arg->flags) == NULL) 1374 return (ENOMEM); 1375 } else { 1376 struct uma_kctor_args karg; 1377 int error; 1378 1379 /* We should only be here from uma_startup() */ 1380 karg.size = arg->size; 1381 karg.uminit = arg->uminit; 1382 karg.fini = arg->fini; 1383 karg.align = arg->align; 1384 karg.flags = arg->flags; 1385 karg.zone = zone; 1386 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1387 flags); 1388 if (error) 1389 return (error); 1390 } 1391 keg = zone->uz_keg; 1392 zone->uz_lock = &keg->uk_lock; 1393 1394 /* 1395 * Some internal zones don't have room allocated for the per cpu 1396 * caches. If we're internal, bail out here. 1397 */ 1398 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1399 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1400 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1401 return (0); 1402 } 1403 1404 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1405 zone->uz_count = BUCKET_MAX; 1406 else if (keg->uk_ipers <= BUCKET_MAX) 1407 zone->uz_count = keg->uk_ipers; 1408 else 1409 zone->uz_count = BUCKET_MAX; 1410 return (0); 1411 } 1412 1413 /* 1414 * Keg header dtor. This frees all data, destroys locks, frees the hash 1415 * table and removes the keg from the global list. 1416 * 1417 * Arguments/Returns follow uma_dtor specifications 1418 * udata unused 1419 */ 1420 static void 1421 keg_dtor(void *arg, int size, void *udata) 1422 { 1423 uma_keg_t keg; 1424 1425 keg = (uma_keg_t)arg; 1426 mtx_lock(&keg->uk_lock); 1427 if (keg->uk_free != 0) { 1428 printf("Freed UMA keg was not empty (%d items). " 1429 " Lost %d pages of memory.\n", 1430 keg->uk_free, keg->uk_pages); 1431 } 1432 mtx_unlock(&keg->uk_lock); 1433 1434 if (keg->uk_flags & UMA_ZONE_HASH) 1435 hash_free(&keg->uk_hash); 1436 1437 mtx_destroy(&keg->uk_lock); 1438 } 1439 1440 /* 1441 * Zone header dtor. 1442 * 1443 * Arguments/Returns follow uma_dtor specifications 1444 * udata unused 1445 */ 1446 static void 1447 zone_dtor(void *arg, int size, void *udata) 1448 { 1449 uma_zone_t zone; 1450 uma_keg_t keg; 1451 1452 zone = (uma_zone_t)arg; 1453 keg = zone->uz_keg; 1454 1455 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1456 cache_drain(zone); 1457 1458 mtx_lock(&uma_mtx); 1459 zone_drain(zone); 1460 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1461 LIST_REMOVE(zone, uz_link); 1462 /* 1463 * XXX there are some races here where 1464 * the zone can be drained but zone lock 1465 * released and then refilled before we 1466 * remove it... we dont care for now 1467 */ 1468 ZONE_LOCK(zone); 1469 if (LIST_EMPTY(&keg->uk_zones)) 1470 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1471 ZONE_UNLOCK(zone); 1472 mtx_unlock(&uma_mtx); 1473 } else { 1474 LIST_REMOVE(keg, uk_link); 1475 LIST_REMOVE(zone, uz_link); 1476 mtx_unlock(&uma_mtx); 1477 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE); 1478 } 1479 zone->uz_keg = NULL; 1480 } 1481 1482 /* 1483 * Traverses every zone in the system and calls a callback 1484 * 1485 * Arguments: 1486 * zfunc A pointer to a function which accepts a zone 1487 * as an argument. 1488 * 1489 * Returns: 1490 * Nothing 1491 */ 1492 static void 1493 zone_foreach(void (*zfunc)(uma_zone_t)) 1494 { 1495 uma_keg_t keg; 1496 uma_zone_t zone; 1497 1498 mtx_lock(&uma_mtx); 1499 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1500 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1501 zfunc(zone); 1502 } 1503 mtx_unlock(&uma_mtx); 1504 } 1505 1506 /* Public functions */ 1507 /* See uma.h */ 1508 void 1509 uma_startup(void *bootmem) 1510 { 1511 struct uma_zctor_args args; 1512 uma_slab_t slab; 1513 u_int slabsize; 1514 u_int objsize, totsize, wsize; 1515 int i; 1516 1517 #ifdef UMA_DEBUG 1518 printf("Creating uma keg headers zone and keg.\n"); 1519 #endif 1520 /* 1521 * The general UMA lock is a recursion-allowed lock because 1522 * there is a code path where, while we're still configured 1523 * to use startup_alloc() for backend page allocations, we 1524 * may end up in uma_reclaim() which calls zone_foreach(zone_drain), 1525 * which grabs uma_mtx, only to later call into startup_alloc() 1526 * because while freeing we needed to allocate a bucket. Since 1527 * startup_alloc() also takes uma_mtx, we need to be able to 1528 * recurse on it. 1529 */ 1530 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE); 1531 1532 /* 1533 * Figure out the maximum number of items-per-slab we'll have if 1534 * we're using the OFFPAGE slab header to track free items, given 1535 * all possible object sizes and the maximum desired wastage 1536 * (UMA_MAX_WASTE). 1537 * 1538 * We iterate until we find an object size for 1539 * which the calculated wastage in zone_small_init() will be 1540 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1541 * is an overall increasing see-saw function, we find the smallest 1542 * objsize such that the wastage is always acceptable for objects 1543 * with that objsize or smaller. Since a smaller objsize always 1544 * generates a larger possible uma_max_ipers, we use this computed 1545 * objsize to calculate the largest ipers possible. Since the 1546 * ipers calculated for OFFPAGE slab headers is always larger than 1547 * the ipers initially calculated in zone_small_init(), we use 1548 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1549 * obtain the maximum ipers possible for offpage slab headers. 1550 * 1551 * It should be noted that ipers versus objsize is an inversly 1552 * proportional function which drops off rather quickly so as 1553 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1554 * falls into the portion of the inverse relation AFTER the steep 1555 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1556 * 1557 * Note that we have 8-bits (1 byte) to use as a freelist index 1558 * inside the actual slab header itself and this is enough to 1559 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1560 * object with offpage slab header would have ipers = 1561 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1562 * 1 greater than what our byte-integer freelist index can 1563 * accomodate, but we know that this situation never occurs as 1564 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1565 * that we need to go to offpage slab headers. Or, if we do, 1566 * then we trap that condition below and panic in the INVARIANTS case. 1567 */ 1568 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1569 totsize = wsize; 1570 objsize = UMA_SMALLEST_UNIT; 1571 while (totsize >= wsize) { 1572 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1573 (objsize + UMA_FRITM_SZ); 1574 totsize *= (UMA_FRITM_SZ + objsize); 1575 objsize++; 1576 } 1577 if (objsize > UMA_SMALLEST_UNIT) 1578 objsize--; 1579 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1580 1581 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1582 totsize = wsize; 1583 objsize = UMA_SMALLEST_UNIT; 1584 while (totsize >= wsize) { 1585 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1586 (objsize + UMA_FRITMREF_SZ); 1587 totsize *= (UMA_FRITMREF_SZ + objsize); 1588 objsize++; 1589 } 1590 if (objsize > UMA_SMALLEST_UNIT) 1591 objsize--; 1592 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1593 1594 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1595 ("uma_startup: calculated uma_max_ipers values too large!")); 1596 1597 #ifdef UMA_DEBUG 1598 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1599 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1600 uma_max_ipers_ref); 1601 #endif 1602 1603 /* "manually" create the initial zone */ 1604 args.name = "UMA Kegs"; 1605 args.size = sizeof(struct uma_keg); 1606 args.ctor = keg_ctor; 1607 args.dtor = keg_dtor; 1608 args.uminit = zero_init; 1609 args.fini = NULL; 1610 args.keg = &masterkeg; 1611 args.align = 32 - 1; 1612 args.flags = UMA_ZFLAG_INTERNAL; 1613 /* The initial zone has no Per cpu queues so it's smaller */ 1614 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1615 1616 #ifdef UMA_DEBUG 1617 printf("Filling boot free list.\n"); 1618 #endif 1619 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1620 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1621 slab->us_data = (u_int8_t *)slab; 1622 slab->us_flags = UMA_SLAB_BOOT; 1623 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1624 uma_boot_free++; 1625 } 1626 1627 #ifdef UMA_DEBUG 1628 printf("Creating uma zone headers zone and keg.\n"); 1629 #endif 1630 args.name = "UMA Zones"; 1631 args.size = sizeof(struct uma_zone) + 1632 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1633 args.ctor = zone_ctor; 1634 args.dtor = zone_dtor; 1635 args.uminit = zero_init; 1636 args.fini = NULL; 1637 args.keg = NULL; 1638 args.align = 32 - 1; 1639 args.flags = UMA_ZFLAG_INTERNAL; 1640 /* The initial zone has no Per cpu queues so it's smaller */ 1641 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1642 1643 #ifdef UMA_DEBUG 1644 printf("Initializing pcpu cache locks.\n"); 1645 #endif 1646 /* Initialize the pcpu cache lock set once and for all */ 1647 for (i = 0; i <= mp_maxid; i++) 1648 CPU_LOCK_INIT(i); 1649 1650 #ifdef UMA_DEBUG 1651 printf("Creating slab and hash zones.\n"); 1652 #endif 1653 1654 /* 1655 * This is the max number of free list items we'll have with 1656 * offpage slabs. 1657 */ 1658 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1659 slabsize += sizeof(struct uma_slab); 1660 1661 /* Now make a zone for slab headers */ 1662 slabzone = uma_zcreate("UMA Slabs", 1663 slabsize, 1664 NULL, NULL, NULL, NULL, 1665 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1666 1667 /* 1668 * We also create a zone for the bigger slabs with reference 1669 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1670 */ 1671 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1672 slabsize += sizeof(struct uma_slab_refcnt); 1673 slabrefzone = uma_zcreate("UMA RCntSlabs", 1674 slabsize, 1675 NULL, NULL, NULL, NULL, 1676 UMA_ALIGN_PTR, 1677 UMA_ZFLAG_INTERNAL); 1678 1679 hashzone = uma_zcreate("UMA Hash", 1680 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1681 NULL, NULL, NULL, NULL, 1682 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1683 1684 bucket_init(); 1685 1686 #ifdef UMA_MD_SMALL_ALLOC 1687 booted = 1; 1688 #endif 1689 1690 #ifdef UMA_DEBUG 1691 printf("UMA startup complete.\n"); 1692 #endif 1693 } 1694 1695 /* see uma.h */ 1696 void 1697 uma_startup2(void) 1698 { 1699 booted = 1; 1700 bucket_enable(); 1701 #ifdef UMA_DEBUG 1702 printf("UMA startup2 complete.\n"); 1703 #endif 1704 } 1705 1706 /* 1707 * Initialize our callout handle 1708 * 1709 */ 1710 1711 static void 1712 uma_startup3(void) 1713 { 1714 #ifdef UMA_DEBUG 1715 printf("Starting callout.\n"); 1716 #endif 1717 callout_init(&uma_callout, CALLOUT_MPSAFE); 1718 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1719 #ifdef UMA_DEBUG 1720 printf("UMA startup3 complete.\n"); 1721 #endif 1722 } 1723 1724 static uma_zone_t 1725 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1726 int align, u_int16_t flags) 1727 { 1728 struct uma_kctor_args args; 1729 1730 args.size = size; 1731 args.uminit = uminit; 1732 args.fini = fini; 1733 args.align = align; 1734 args.flags = flags; 1735 args.zone = zone; 1736 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1737 } 1738 1739 /* See uma.h */ 1740 uma_zone_t 1741 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1742 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1743 1744 { 1745 struct uma_zctor_args args; 1746 1747 /* This stuff is essential for the zone ctor */ 1748 args.name = name; 1749 args.size = size; 1750 args.ctor = ctor; 1751 args.dtor = dtor; 1752 args.uminit = uminit; 1753 args.fini = fini; 1754 args.align = align; 1755 args.flags = flags; 1756 args.keg = NULL; 1757 1758 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1759 } 1760 1761 /* See uma.h */ 1762 uma_zone_t 1763 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1764 uma_init zinit, uma_fini zfini, uma_zone_t master) 1765 { 1766 struct uma_zctor_args args; 1767 1768 args.name = name; 1769 args.size = master->uz_keg->uk_size; 1770 args.ctor = ctor; 1771 args.dtor = dtor; 1772 args.uminit = zinit; 1773 args.fini = zfini; 1774 args.align = master->uz_keg->uk_align; 1775 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1776 args.keg = master->uz_keg; 1777 1778 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1779 } 1780 1781 /* See uma.h */ 1782 void 1783 uma_zdestroy(uma_zone_t zone) 1784 { 1785 uma_zfree_internal(zones, zone, NULL, SKIP_NONE); 1786 } 1787 1788 /* See uma.h */ 1789 void * 1790 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1791 { 1792 void *item; 1793 uma_cache_t cache; 1794 uma_bucket_t bucket; 1795 int cpu; 1796 int badness; 1797 1798 /* This is the fast path allocation */ 1799 #ifdef UMA_DEBUG_ALLOC_1 1800 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1801 #endif 1802 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread, 1803 zone->uz_name, flags); 1804 1805 if (!(flags & M_NOWAIT)) { 1806 KASSERT(curthread->td_intr_nesting_level == 0, 1807 ("malloc(M_WAITOK) in interrupt context")); 1808 if (nosleepwithlocks) { 1809 #ifdef WITNESS 1810 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1811 NULL, 1812 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1813 zone->uz_name); 1814 #else 1815 badness = 1; 1816 #endif 1817 } else { 1818 badness = 0; 1819 #ifdef WITNESS 1820 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1821 "malloc(M_WAITOK) of \"%s\"", zone->uz_name); 1822 #endif 1823 } 1824 if (badness) { 1825 flags &= ~M_WAITOK; 1826 flags |= M_NOWAIT; 1827 } 1828 } 1829 1830 zalloc_restart: 1831 cpu = PCPU_GET(cpuid); 1832 CPU_LOCK(cpu); 1833 cache = &zone->uz_cpu[cpu]; 1834 1835 zalloc_start: 1836 bucket = cache->uc_allocbucket; 1837 1838 if (bucket) { 1839 if (bucket->ub_cnt > 0) { 1840 bucket->ub_cnt--; 1841 item = bucket->ub_bucket[bucket->ub_cnt]; 1842 #ifdef INVARIANTS 1843 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1844 #endif 1845 KASSERT(item != NULL, 1846 ("uma_zalloc: Bucket pointer mangled.")); 1847 cache->uc_allocs++; 1848 #ifdef INVARIANTS 1849 ZONE_LOCK(zone); 1850 uma_dbg_alloc(zone, NULL, item); 1851 ZONE_UNLOCK(zone); 1852 #endif 1853 CPU_UNLOCK(cpu); 1854 if (zone->uz_ctor != NULL) { 1855 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1856 udata, flags) != 0) { 1857 uma_zfree_internal(zone, item, udata, 1858 SKIP_DTOR); 1859 return (NULL); 1860 } 1861 } 1862 if (flags & M_ZERO) 1863 bzero(item, zone->uz_keg->uk_size); 1864 return (item); 1865 } else if (cache->uc_freebucket) { 1866 /* 1867 * We have run out of items in our allocbucket. 1868 * See if we can switch with our free bucket. 1869 */ 1870 if (cache->uc_freebucket->ub_cnt > 0) { 1871 #ifdef UMA_DEBUG_ALLOC 1872 printf("uma_zalloc: Swapping empty with" 1873 " alloc.\n"); 1874 #endif 1875 bucket = cache->uc_freebucket; 1876 cache->uc_freebucket = cache->uc_allocbucket; 1877 cache->uc_allocbucket = bucket; 1878 1879 goto zalloc_start; 1880 } 1881 } 1882 } 1883 ZONE_LOCK(zone); 1884 /* Since we have locked the zone we may as well send back our stats */ 1885 zone->uz_allocs += cache->uc_allocs; 1886 cache->uc_allocs = 0; 1887 1888 /* Our old one is now a free bucket */ 1889 if (cache->uc_allocbucket) { 1890 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1891 ("uma_zalloc_arg: Freeing a non free bucket.")); 1892 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1893 cache->uc_allocbucket, ub_link); 1894 cache->uc_allocbucket = NULL; 1895 } 1896 1897 /* Check the free list for a new alloc bucket */ 1898 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1899 KASSERT(bucket->ub_cnt != 0, 1900 ("uma_zalloc_arg: Returning an empty bucket.")); 1901 1902 LIST_REMOVE(bucket, ub_link); 1903 cache->uc_allocbucket = bucket; 1904 ZONE_UNLOCK(zone); 1905 goto zalloc_start; 1906 } 1907 /* We are no longer associated with this cpu!!! */ 1908 CPU_UNLOCK(cpu); 1909 1910 /* Bump up our uz_count so we get here less */ 1911 if (zone->uz_count < BUCKET_MAX) 1912 zone->uz_count++; 1913 1914 /* 1915 * Now lets just fill a bucket and put it on the free list. If that 1916 * works we'll restart the allocation from the begining. 1917 */ 1918 if (uma_zalloc_bucket(zone, flags)) { 1919 ZONE_UNLOCK(zone); 1920 goto zalloc_restart; 1921 } 1922 ZONE_UNLOCK(zone); 1923 /* 1924 * We may not be able to get a bucket so return an actual item. 1925 */ 1926 #ifdef UMA_DEBUG 1927 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1928 #endif 1929 1930 return (uma_zalloc_internal(zone, udata, flags)); 1931 } 1932 1933 static uma_slab_t 1934 uma_zone_slab(uma_zone_t zone, int flags) 1935 { 1936 uma_slab_t slab; 1937 uma_keg_t keg; 1938 1939 keg = zone->uz_keg; 1940 1941 /* 1942 * This is to prevent us from recursively trying to allocate 1943 * buckets. The problem is that if an allocation forces us to 1944 * grab a new bucket we will call page_alloc, which will go off 1945 * and cause the vm to allocate vm_map_entries. If we need new 1946 * buckets there too we will recurse in kmem_alloc and bad 1947 * things happen. So instead we return a NULL bucket, and make 1948 * the code that allocates buckets smart enough to deal with it 1949 * 1950 * XXX: While we want this protection for the bucket zones so that 1951 * recursion from the VM is handled (and the calling code that 1952 * allocates buckets knows how to deal with it), we do not want 1953 * to prevent allocation from the slab header zones (slabzone 1954 * and slabrefzone) if uk_recurse is not zero for them. The 1955 * reason is that it could lead to NULL being returned for 1956 * slab header allocations even in the M_WAITOK case, and the 1957 * caller can't handle that. 1958 */ 1959 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1960 if ((zone != slabzone) && (zone != slabrefzone)) 1961 return (NULL); 1962 1963 slab = NULL; 1964 1965 for (;;) { 1966 /* 1967 * Find a slab with some space. Prefer slabs that are partially 1968 * used over those that are totally full. This helps to reduce 1969 * fragmentation. 1970 */ 1971 if (keg->uk_free != 0) { 1972 if (!LIST_EMPTY(&keg->uk_part_slab)) { 1973 slab = LIST_FIRST(&keg->uk_part_slab); 1974 } else { 1975 slab = LIST_FIRST(&keg->uk_free_slab); 1976 LIST_REMOVE(slab, us_link); 1977 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 1978 us_link); 1979 } 1980 return (slab); 1981 } 1982 1983 /* 1984 * M_NOVM means don't ask at all! 1985 */ 1986 if (flags & M_NOVM) 1987 break; 1988 1989 if (keg->uk_maxpages && 1990 keg->uk_pages >= keg->uk_maxpages) { 1991 keg->uk_flags |= UMA_ZFLAG_FULL; 1992 1993 if (flags & M_NOWAIT) 1994 break; 1995 else 1996 msleep(keg, &keg->uk_lock, PVM, 1997 "zonelimit", 0); 1998 continue; 1999 } 2000 keg->uk_recurse++; 2001 slab = slab_zalloc(zone, flags); 2002 keg->uk_recurse--; 2003 2004 /* 2005 * If we got a slab here it's safe to mark it partially used 2006 * and return. We assume that the caller is going to remove 2007 * at least one item. 2008 */ 2009 if (slab) { 2010 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2011 return (slab); 2012 } 2013 /* 2014 * We might not have been able to get a slab but another cpu 2015 * could have while we were unlocked. Check again before we 2016 * fail. 2017 */ 2018 if (flags & M_NOWAIT) 2019 flags |= M_NOVM; 2020 } 2021 return (slab); 2022 } 2023 2024 static void * 2025 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 2026 { 2027 uma_keg_t keg; 2028 uma_slabrefcnt_t slabref; 2029 void *item; 2030 u_int8_t freei; 2031 2032 keg = zone->uz_keg; 2033 2034 freei = slab->us_firstfree; 2035 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2036 slabref = (uma_slabrefcnt_t)slab; 2037 slab->us_firstfree = slabref->us_freelist[freei].us_item; 2038 } else { 2039 slab->us_firstfree = slab->us_freelist[freei].us_item; 2040 } 2041 item = slab->us_data + (keg->uk_rsize * freei); 2042 2043 slab->us_freecount--; 2044 keg->uk_free--; 2045 #ifdef INVARIANTS 2046 uma_dbg_alloc(zone, slab, item); 2047 #endif 2048 /* Move this slab to the full list */ 2049 if (slab->us_freecount == 0) { 2050 LIST_REMOVE(slab, us_link); 2051 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2052 } 2053 2054 return (item); 2055 } 2056 2057 static int 2058 uma_zalloc_bucket(uma_zone_t zone, int flags) 2059 { 2060 uma_bucket_t bucket; 2061 uma_slab_t slab; 2062 int16_t saved; 2063 int max, origflags = flags; 2064 2065 /* 2066 * Try this zone's free list first so we don't allocate extra buckets. 2067 */ 2068 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2069 KASSERT(bucket->ub_cnt == 0, 2070 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2071 LIST_REMOVE(bucket, ub_link); 2072 } else { 2073 int bflags; 2074 2075 bflags = (flags & ~M_ZERO); 2076 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2077 bflags |= M_NOVM; 2078 2079 ZONE_UNLOCK(zone); 2080 bucket = bucket_alloc(zone->uz_count, bflags); 2081 ZONE_LOCK(zone); 2082 } 2083 2084 if (bucket == NULL) 2085 return (0); 2086 2087 #ifdef SMP 2088 /* 2089 * This code is here to limit the number of simultaneous bucket fills 2090 * for any given zone to the number of per cpu caches in this zone. This 2091 * is done so that we don't allocate more memory than we really need. 2092 */ 2093 if (zone->uz_fills >= mp_ncpus) 2094 goto done; 2095 2096 #endif 2097 zone->uz_fills++; 2098 2099 max = MIN(bucket->ub_entries, zone->uz_count); 2100 /* Try to keep the buckets totally full */ 2101 saved = bucket->ub_cnt; 2102 while (bucket->ub_cnt < max && 2103 (slab = uma_zone_slab(zone, flags)) != NULL) { 2104 while (slab->us_freecount && bucket->ub_cnt < max) { 2105 bucket->ub_bucket[bucket->ub_cnt++] = 2106 uma_slab_alloc(zone, slab); 2107 } 2108 2109 /* Don't block on the next fill */ 2110 flags |= M_NOWAIT; 2111 } 2112 2113 /* 2114 * We unlock here because we need to call the zone's init. 2115 * It should be safe to unlock because the slab dealt with 2116 * above is already on the appropriate list within the keg 2117 * and the bucket we filled is not yet on any list, so we 2118 * own it. 2119 */ 2120 if (zone->uz_init != NULL) { 2121 int i; 2122 2123 ZONE_UNLOCK(zone); 2124 for (i = saved; i < bucket->ub_cnt; i++) 2125 if (zone->uz_init(bucket->ub_bucket[i], 2126 zone->uz_keg->uk_size, origflags) != 0) 2127 break; 2128 /* 2129 * If we couldn't initialize the whole bucket, put the 2130 * rest back onto the freelist. 2131 */ 2132 if (i != bucket->ub_cnt) { 2133 int j; 2134 2135 for (j = i; j < bucket->ub_cnt; j++) { 2136 uma_zfree_internal(zone, bucket->ub_bucket[j], 2137 NULL, SKIP_FINI); 2138 #ifdef INVARIANTS 2139 bucket->ub_bucket[j] = NULL; 2140 #endif 2141 } 2142 bucket->ub_cnt = i; 2143 } 2144 ZONE_LOCK(zone); 2145 } 2146 2147 zone->uz_fills--; 2148 if (bucket->ub_cnt != 0) { 2149 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2150 bucket, ub_link); 2151 return (1); 2152 } 2153 #ifdef SMP 2154 done: 2155 #endif 2156 bucket_free(bucket); 2157 2158 return (0); 2159 } 2160 /* 2161 * Allocates an item for an internal zone 2162 * 2163 * Arguments 2164 * zone The zone to alloc for. 2165 * udata The data to be passed to the constructor. 2166 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2167 * 2168 * Returns 2169 * NULL if there is no memory and M_NOWAIT is set 2170 * An item if successful 2171 */ 2172 2173 static void * 2174 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2175 { 2176 uma_keg_t keg; 2177 uma_slab_t slab; 2178 void *item; 2179 2180 item = NULL; 2181 keg = zone->uz_keg; 2182 2183 #ifdef UMA_DEBUG_ALLOC 2184 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2185 #endif 2186 ZONE_LOCK(zone); 2187 2188 slab = uma_zone_slab(zone, flags); 2189 if (slab == NULL) { 2190 ZONE_UNLOCK(zone); 2191 return (NULL); 2192 } 2193 2194 item = uma_slab_alloc(zone, slab); 2195 2196 ZONE_UNLOCK(zone); 2197 2198 /* 2199 * We have to call both the zone's init (not the keg's init) 2200 * and the zone's ctor. This is because the item is going from 2201 * a keg slab directly to the user, and the user is expecting it 2202 * to be both zone-init'd as well as zone-ctor'd. 2203 */ 2204 if (zone->uz_init != NULL) { 2205 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2206 uma_zfree_internal(zone, item, udata, SKIP_FINI); 2207 return (NULL); 2208 } 2209 } 2210 if (zone->uz_ctor != NULL) { 2211 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2212 uma_zfree_internal(zone, item, udata, SKIP_DTOR); 2213 return (NULL); 2214 } 2215 } 2216 if (flags & M_ZERO) 2217 bzero(item, keg->uk_size); 2218 2219 return (item); 2220 } 2221 2222 /* See uma.h */ 2223 void 2224 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2225 { 2226 uma_keg_t keg; 2227 uma_cache_t cache; 2228 uma_bucket_t bucket; 2229 int bflags; 2230 int cpu; 2231 enum zfreeskip skip; 2232 2233 /* This is the fast path free */ 2234 skip = SKIP_NONE; 2235 keg = zone->uz_keg; 2236 2237 #ifdef UMA_DEBUG_ALLOC_1 2238 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2239 #endif 2240 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, 2241 zone->uz_name); 2242 2243 /* 2244 * The race here is acceptable. If we miss it we'll just have to wait 2245 * a little longer for the limits to be reset. 2246 */ 2247 2248 if (keg->uk_flags & UMA_ZFLAG_FULL) 2249 goto zfree_internal; 2250 2251 if (zone->uz_dtor) { 2252 zone->uz_dtor(item, keg->uk_size, udata); 2253 skip = SKIP_DTOR; 2254 } 2255 2256 zfree_restart: 2257 cpu = PCPU_GET(cpuid); 2258 CPU_LOCK(cpu); 2259 cache = &zone->uz_cpu[cpu]; 2260 2261 zfree_start: 2262 bucket = cache->uc_freebucket; 2263 2264 if (bucket) { 2265 /* 2266 * Do we have room in our bucket? It is OK for this uz count 2267 * check to be slightly out of sync. 2268 */ 2269 2270 if (bucket->ub_cnt < bucket->ub_entries) { 2271 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2272 ("uma_zfree: Freeing to non free bucket index.")); 2273 bucket->ub_bucket[bucket->ub_cnt] = item; 2274 bucket->ub_cnt++; 2275 #ifdef INVARIANTS 2276 ZONE_LOCK(zone); 2277 if (keg->uk_flags & UMA_ZONE_MALLOC) 2278 uma_dbg_free(zone, udata, item); 2279 else 2280 uma_dbg_free(zone, NULL, item); 2281 ZONE_UNLOCK(zone); 2282 #endif 2283 CPU_UNLOCK(cpu); 2284 return; 2285 } else if (cache->uc_allocbucket) { 2286 #ifdef UMA_DEBUG_ALLOC 2287 printf("uma_zfree: Swapping buckets.\n"); 2288 #endif 2289 /* 2290 * We have run out of space in our freebucket. 2291 * See if we can switch with our alloc bucket. 2292 */ 2293 if (cache->uc_allocbucket->ub_cnt < 2294 cache->uc_freebucket->ub_cnt) { 2295 bucket = cache->uc_freebucket; 2296 cache->uc_freebucket = cache->uc_allocbucket; 2297 cache->uc_allocbucket = bucket; 2298 goto zfree_start; 2299 } 2300 } 2301 } 2302 /* 2303 * We can get here for two reasons: 2304 * 2305 * 1) The buckets are NULL 2306 * 2) The alloc and free buckets are both somewhat full. 2307 */ 2308 2309 ZONE_LOCK(zone); 2310 2311 bucket = cache->uc_freebucket; 2312 cache->uc_freebucket = NULL; 2313 2314 /* Can we throw this on the zone full list? */ 2315 if (bucket != NULL) { 2316 #ifdef UMA_DEBUG_ALLOC 2317 printf("uma_zfree: Putting old bucket on the free list.\n"); 2318 #endif 2319 /* ub_cnt is pointing to the last free item */ 2320 KASSERT(bucket->ub_cnt != 0, 2321 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2322 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2323 bucket, ub_link); 2324 } 2325 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2326 LIST_REMOVE(bucket, ub_link); 2327 ZONE_UNLOCK(zone); 2328 cache->uc_freebucket = bucket; 2329 goto zfree_start; 2330 } 2331 /* We're done with this CPU now */ 2332 CPU_UNLOCK(cpu); 2333 2334 /* And the zone.. */ 2335 ZONE_UNLOCK(zone); 2336 2337 #ifdef UMA_DEBUG_ALLOC 2338 printf("uma_zfree: Allocating new free bucket.\n"); 2339 #endif 2340 bflags = M_NOWAIT; 2341 2342 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2343 bflags |= M_NOVM; 2344 bucket = bucket_alloc(zone->uz_count, bflags); 2345 if (bucket) { 2346 ZONE_LOCK(zone); 2347 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2348 bucket, ub_link); 2349 ZONE_UNLOCK(zone); 2350 goto zfree_restart; 2351 } 2352 2353 /* 2354 * If nothing else caught this, we'll just do an internal free. 2355 */ 2356 2357 zfree_internal: 2358 2359 #ifdef INVARIANTS 2360 /* 2361 * If we need to skip the dtor and the uma_dbg_free in 2362 * uma_zfree_internal because we've already called the dtor 2363 * above, but we ended up here, then we need to make sure 2364 * that we take care of the uma_dbg_free immediately. 2365 */ 2366 if (skip) { 2367 ZONE_LOCK(zone); 2368 if (keg->uk_flags & UMA_ZONE_MALLOC) 2369 uma_dbg_free(zone, udata, item); 2370 else 2371 uma_dbg_free(zone, NULL, item); 2372 ZONE_UNLOCK(zone); 2373 } 2374 #endif 2375 uma_zfree_internal(zone, item, udata, skip); 2376 2377 return; 2378 } 2379 2380 /* 2381 * Frees an item to an INTERNAL zone or allocates a free bucket 2382 * 2383 * Arguments: 2384 * zone The zone to free to 2385 * item The item we're freeing 2386 * udata User supplied data for the dtor 2387 * skip Skip dtors and finis 2388 */ 2389 static void 2390 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2391 enum zfreeskip skip) 2392 { 2393 uma_slab_t slab; 2394 uma_slabrefcnt_t slabref; 2395 uma_keg_t keg; 2396 u_int8_t *mem; 2397 u_int8_t freei; 2398 2399 keg = zone->uz_keg; 2400 2401 if (skip < SKIP_DTOR && zone->uz_dtor) 2402 zone->uz_dtor(item, keg->uk_size, udata); 2403 if (skip < SKIP_FINI && zone->uz_fini) 2404 zone->uz_fini(item, keg->uk_size); 2405 2406 ZONE_LOCK(zone); 2407 2408 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2409 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2410 if (keg->uk_flags & UMA_ZONE_HASH) 2411 slab = hash_sfind(&keg->uk_hash, mem); 2412 else { 2413 mem += keg->uk_pgoff; 2414 slab = (uma_slab_t)mem; 2415 } 2416 } else { 2417 slab = (uma_slab_t)udata; 2418 } 2419 2420 /* Do we need to remove from any lists? */ 2421 if (slab->us_freecount+1 == keg->uk_ipers) { 2422 LIST_REMOVE(slab, us_link); 2423 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2424 } else if (slab->us_freecount == 0) { 2425 LIST_REMOVE(slab, us_link); 2426 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2427 } 2428 2429 /* Slab management stuff */ 2430 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2431 / keg->uk_rsize; 2432 2433 #ifdef INVARIANTS 2434 if (!skip) 2435 uma_dbg_free(zone, slab, item); 2436 #endif 2437 2438 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2439 slabref = (uma_slabrefcnt_t)slab; 2440 slabref->us_freelist[freei].us_item = slab->us_firstfree; 2441 } else { 2442 slab->us_freelist[freei].us_item = slab->us_firstfree; 2443 } 2444 slab->us_firstfree = freei; 2445 slab->us_freecount++; 2446 2447 /* Zone statistics */ 2448 keg->uk_free++; 2449 2450 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2451 if (keg->uk_pages < keg->uk_maxpages) 2452 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2453 2454 /* We can handle one more allocation */ 2455 wakeup_one(keg); 2456 } 2457 2458 ZONE_UNLOCK(zone); 2459 } 2460 2461 /* See uma.h */ 2462 void 2463 uma_zone_set_max(uma_zone_t zone, int nitems) 2464 { 2465 uma_keg_t keg; 2466 2467 keg = zone->uz_keg; 2468 ZONE_LOCK(zone); 2469 if (keg->uk_ppera > 1) 2470 keg->uk_maxpages = nitems * keg->uk_ppera; 2471 else 2472 keg->uk_maxpages = nitems / keg->uk_ipers; 2473 2474 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2475 keg->uk_maxpages++; 2476 2477 ZONE_UNLOCK(zone); 2478 } 2479 2480 /* See uma.h */ 2481 void 2482 uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2483 { 2484 ZONE_LOCK(zone); 2485 KASSERT(zone->uz_keg->uk_pages == 0, 2486 ("uma_zone_set_init on non-empty keg")); 2487 zone->uz_keg->uk_init = uminit; 2488 ZONE_UNLOCK(zone); 2489 } 2490 2491 /* See uma.h */ 2492 void 2493 uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2494 { 2495 ZONE_LOCK(zone); 2496 KASSERT(zone->uz_keg->uk_pages == 0, 2497 ("uma_zone_set_fini on non-empty keg")); 2498 zone->uz_keg->uk_fini = fini; 2499 ZONE_UNLOCK(zone); 2500 } 2501 2502 /* See uma.h */ 2503 void 2504 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2505 { 2506 ZONE_LOCK(zone); 2507 KASSERT(zone->uz_keg->uk_pages == 0, 2508 ("uma_zone_set_zinit on non-empty keg")); 2509 zone->uz_init = zinit; 2510 ZONE_UNLOCK(zone); 2511 } 2512 2513 /* See uma.h */ 2514 void 2515 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2516 { 2517 ZONE_LOCK(zone); 2518 KASSERT(zone->uz_keg->uk_pages == 0, 2519 ("uma_zone_set_zfini on non-empty keg")); 2520 zone->uz_fini = zfini; 2521 ZONE_UNLOCK(zone); 2522 } 2523 2524 /* See uma.h */ 2525 /* XXX uk_freef is not actually used with the zone locked */ 2526 void 2527 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2528 { 2529 ZONE_LOCK(zone); 2530 zone->uz_keg->uk_freef = freef; 2531 ZONE_UNLOCK(zone); 2532 } 2533 2534 /* See uma.h */ 2535 /* XXX uk_allocf is not actually used with the zone locked */ 2536 void 2537 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2538 { 2539 ZONE_LOCK(zone); 2540 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2541 zone->uz_keg->uk_allocf = allocf; 2542 ZONE_UNLOCK(zone); 2543 } 2544 2545 /* See uma.h */ 2546 int 2547 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2548 { 2549 uma_keg_t keg; 2550 vm_offset_t kva; 2551 int pages; 2552 2553 keg = zone->uz_keg; 2554 pages = count / keg->uk_ipers; 2555 2556 if (pages * keg->uk_ipers < count) 2557 pages++; 2558 2559 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2560 2561 if (kva == 0) 2562 return (0); 2563 if (obj == NULL) { 2564 obj = vm_object_allocate(OBJT_DEFAULT, 2565 pages); 2566 } else { 2567 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2568 _vm_object_allocate(OBJT_DEFAULT, 2569 pages, obj); 2570 } 2571 ZONE_LOCK(zone); 2572 keg->uk_kva = kva; 2573 keg->uk_obj = obj; 2574 keg->uk_maxpages = pages; 2575 keg->uk_allocf = obj_alloc; 2576 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2577 ZONE_UNLOCK(zone); 2578 return (1); 2579 } 2580 2581 /* See uma.h */ 2582 void 2583 uma_prealloc(uma_zone_t zone, int items) 2584 { 2585 int slabs; 2586 uma_slab_t slab; 2587 uma_keg_t keg; 2588 2589 keg = zone->uz_keg; 2590 ZONE_LOCK(zone); 2591 slabs = items / keg->uk_ipers; 2592 if (slabs * keg->uk_ipers < items) 2593 slabs++; 2594 while (slabs > 0) { 2595 slab = slab_zalloc(zone, M_WAITOK); 2596 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2597 slabs--; 2598 } 2599 ZONE_UNLOCK(zone); 2600 } 2601 2602 /* See uma.h */ 2603 u_int32_t * 2604 uma_find_refcnt(uma_zone_t zone, void *item) 2605 { 2606 uma_slabrefcnt_t slabref; 2607 uma_keg_t keg; 2608 u_int32_t *refcnt; 2609 int idx; 2610 2611 keg = zone->uz_keg; 2612 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & 2613 (~UMA_SLAB_MASK)); 2614 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, 2615 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2616 idx = ((unsigned long)item - (unsigned long)slabref->us_data) 2617 / keg->uk_rsize; 2618 refcnt = &slabref->us_freelist[idx].us_refcnt; 2619 return refcnt; 2620 } 2621 2622 /* See uma.h */ 2623 void 2624 uma_reclaim(void) 2625 { 2626 #ifdef UMA_DEBUG 2627 printf("UMA: vm asked us to release pages!\n"); 2628 #endif 2629 bucket_enable(); 2630 zone_foreach(zone_drain); 2631 /* 2632 * Some slabs may have been freed but this zone will be visited early 2633 * we visit again so that we can free pages that are empty once other 2634 * zones are drained. We have to do the same for buckets. 2635 */ 2636 zone_drain(slabzone); 2637 zone_drain(slabrefzone); 2638 bucket_zone_drain(); 2639 } 2640 2641 void * 2642 uma_large_malloc(int size, int wait) 2643 { 2644 void *mem; 2645 uma_slab_t slab; 2646 u_int8_t flags; 2647 2648 slab = uma_zalloc_internal(slabzone, NULL, wait); 2649 if (slab == NULL) 2650 return (NULL); 2651 mem = page_alloc(NULL, size, &flags, wait); 2652 if (mem) { 2653 vsetslab((vm_offset_t)mem, slab); 2654 slab->us_data = mem; 2655 slab->us_flags = flags | UMA_SLAB_MALLOC; 2656 slab->us_size = size; 2657 } else { 2658 uma_zfree_internal(slabzone, slab, NULL, 0); 2659 } 2660 2661 return (mem); 2662 } 2663 2664 void 2665 uma_large_free(uma_slab_t slab) 2666 { 2667 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2668 page_free(slab->us_data, slab->us_size, slab->us_flags); 2669 uma_zfree_internal(slabzone, slab, NULL, 0); 2670 } 2671 2672 void 2673 uma_print_stats(void) 2674 { 2675 zone_foreach(uma_print_zone); 2676 } 2677 2678 static void 2679 slab_print(uma_slab_t slab) 2680 { 2681 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2682 slab->us_keg, slab->us_data, slab->us_freecount, 2683 slab->us_firstfree); 2684 } 2685 2686 static void 2687 cache_print(uma_cache_t cache) 2688 { 2689 printf("alloc: %p(%d), free: %p(%d)\n", 2690 cache->uc_allocbucket, 2691 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2692 cache->uc_freebucket, 2693 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2694 } 2695 2696 void 2697 uma_print_zone(uma_zone_t zone) 2698 { 2699 uma_cache_t cache; 2700 uma_keg_t keg; 2701 uma_slab_t slab; 2702 int i; 2703 2704 keg = zone->uz_keg; 2705 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2706 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2707 keg->uk_ipers, keg->uk_ppera, 2708 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2709 printf("Part slabs:\n"); 2710 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2711 slab_print(slab); 2712 printf("Free slabs:\n"); 2713 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2714 slab_print(slab); 2715 printf("Full slabs:\n"); 2716 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2717 slab_print(slab); 2718 for (i = 0; i <= mp_maxid; i++) { 2719 if (CPU_ABSENT(i)) 2720 continue; 2721 cache = &zone->uz_cpu[i]; 2722 printf("CPU %d Cache:\n", i); 2723 cache_print(cache); 2724 } 2725 } 2726 2727 /* 2728 * Sysctl handler for vm.zone 2729 * 2730 * stolen from vm_zone.c 2731 */ 2732 static int 2733 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2734 { 2735 int error, len, cnt; 2736 const int linesize = 128; /* conservative */ 2737 int totalfree; 2738 char *tmpbuf, *offset; 2739 uma_zone_t z; 2740 uma_keg_t zk; 2741 char *p; 2742 int cpu; 2743 int cachefree; 2744 uma_bucket_t bucket; 2745 uma_cache_t cache; 2746 2747 cnt = 0; 2748 mtx_lock(&uma_mtx); 2749 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2750 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2751 cnt++; 2752 } 2753 mtx_unlock(&uma_mtx); 2754 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2755 M_TEMP, M_WAITOK); 2756 len = snprintf(tmpbuf, linesize, 2757 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2758 if (cnt == 0) 2759 tmpbuf[len - 1] = '\0'; 2760 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2761 if (error || cnt == 0) 2762 goto out; 2763 offset = tmpbuf; 2764 mtx_lock(&uma_mtx); 2765 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2766 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2767 if (cnt == 0) /* list may have changed size */ 2768 break; 2769 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2770 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2771 if (CPU_ABSENT(cpu)) 2772 continue; 2773 CPU_LOCK(cpu); 2774 } 2775 } 2776 ZONE_LOCK(z); 2777 cachefree = 0; 2778 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2779 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2780 if (CPU_ABSENT(cpu)) 2781 continue; 2782 cache = &z->uz_cpu[cpu]; 2783 if (cache->uc_allocbucket != NULL) 2784 cachefree += cache->uc_allocbucket->ub_cnt; 2785 if (cache->uc_freebucket != NULL) 2786 cachefree += cache->uc_freebucket->ub_cnt; 2787 CPU_UNLOCK(cpu); 2788 } 2789 } 2790 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2791 cachefree += bucket->ub_cnt; 2792 } 2793 totalfree = zk->uk_free + cachefree; 2794 len = snprintf(offset, linesize, 2795 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2796 z->uz_name, zk->uk_size, 2797 zk->uk_maxpages * zk->uk_ipers, 2798 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2799 totalfree, 2800 (unsigned long long)z->uz_allocs); 2801 ZONE_UNLOCK(z); 2802 for (p = offset + 12; p > offset && *p == ' '; --p) 2803 /* nothing */ ; 2804 p[1] = ':'; 2805 cnt--; 2806 offset += len; 2807 } 2808 } 2809 mtx_unlock(&uma_mtx); 2810 *offset++ = '\0'; 2811 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2812 out: 2813 FREE(tmpbuf, M_TEMP); 2814 return (error); 2815 } 2816