1 /* 2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * uma_core.c Implementation of the Universal Memory allocator 29 * 30 * This allocator is intended to replace the multitude of similar object caches 31 * in the standard FreeBSD kernel. The intent is to be flexible as well as 32 * effecient. A primary design goal is to return unused memory to the rest of 33 * the system. This will make the system as a whole more flexible due to the 34 * ability to move memory to subsystems which most need it instead of leaving 35 * pools of reserved memory unused. 36 * 37 * The basic ideas stem from similar slab/zone based allocators whose algorithms 38 * are well known. 39 * 40 */ 41 42 /* 43 * TODO: 44 * - Improve memory usage for large allocations 45 * - Investigate cache size adjustments 46 */ 47 48 #include <sys/cdefs.h> 49 __FBSDID("$FreeBSD$"); 50 51 /* I should really use ktr.. */ 52 /* 53 #define UMA_DEBUG 1 54 #define UMA_DEBUG_ALLOC 1 55 #define UMA_DEBUG_ALLOC_1 1 56 */ 57 58 #include "opt_param.h" 59 #include <sys/param.h> 60 #include <sys/systm.h> 61 #include <sys/kernel.h> 62 #include <sys/types.h> 63 #include <sys/queue.h> 64 #include <sys/malloc.h> 65 #include <sys/lock.h> 66 #include <sys/sysctl.h> 67 #include <sys/mutex.h> 68 #include <sys/proc.h> 69 #include <sys/smp.h> 70 #include <sys/vmmeter.h> 71 72 #include <vm/vm.h> 73 #include <vm/vm_object.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_param.h> 76 #include <vm/vm_map.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_extern.h> 79 #include <vm/uma.h> 80 #include <vm/uma_int.h> 81 #include <vm/uma_dbg.h> 82 83 #include <machine/vmparam.h> 84 85 /* 86 * This is the zone and keg from which all zones are spawned. The idea is that 87 * even the zone & keg heads are allocated from the allocator, so we use the 88 * bss section to bootstrap us. 89 */ 90 static struct uma_keg masterkeg; 91 static struct uma_zone masterzone_k; 92 static struct uma_zone masterzone_z; 93 static uma_zone_t kegs = &masterzone_k; 94 static uma_zone_t zones = &masterzone_z; 95 96 /* This is the zone from which all of uma_slab_t's are allocated. */ 97 static uma_zone_t slabzone; 98 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 99 100 /* 101 * The initial hash tables come out of this zone so they can be allocated 102 * prior to malloc coming up. 103 */ 104 static uma_zone_t hashzone; 105 106 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 107 108 /* 109 * Are we allowed to allocate buckets? 110 */ 111 static int bucketdisable = 1; 112 113 /* Linked list of all kegs in the system */ 114 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 115 116 /* This mutex protects the keg list */ 117 static struct mtx uma_mtx; 118 119 /* These are the pcpu cache locks */ 120 static struct mtx uma_pcpu_mtx[MAXCPU]; 121 122 /* Linked list of boot time pages */ 123 static LIST_HEAD(,uma_slab) uma_boot_pages = 124 LIST_HEAD_INITIALIZER(&uma_boot_pages); 125 126 /* Count of free boottime pages */ 127 static int uma_boot_free = 0; 128 129 /* Is the VM done starting up? */ 130 static int booted = 0; 131 132 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 133 static u_int uma_max_ipers; 134 static u_int uma_max_ipers_ref; 135 136 /* 137 * This is the handle used to schedule events that need to happen 138 * outside of the allocation fast path. 139 */ 140 static struct callout uma_callout; 141 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 142 143 /* 144 * This structure is passed as the zone ctor arg so that I don't have to create 145 * a special allocation function just for zones. 146 */ 147 struct uma_zctor_args { 148 char *name; 149 size_t size; 150 uma_ctor ctor; 151 uma_dtor dtor; 152 uma_init uminit; 153 uma_fini fini; 154 uma_keg_t keg; 155 int align; 156 u_int16_t flags; 157 }; 158 159 struct uma_kctor_args { 160 uma_zone_t zone; 161 size_t size; 162 uma_init uminit; 163 uma_fini fini; 164 int align; 165 u_int16_t flags; 166 }; 167 168 struct uma_bucket_zone { 169 uma_zone_t ubz_zone; 170 char *ubz_name; 171 int ubz_entries; 172 }; 173 174 #define BUCKET_MAX 128 175 176 struct uma_bucket_zone bucket_zones[] = { 177 { NULL, "16 Bucket", 16 }, 178 { NULL, "32 Bucket", 32 }, 179 { NULL, "64 Bucket", 64 }, 180 { NULL, "128 Bucket", 128 }, 181 { NULL, NULL, 0} 182 }; 183 184 #define BUCKET_SHIFT 4 185 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 186 187 uint8_t bucket_size[BUCKET_ZONES]; 188 189 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 190 191 /* Prototypes.. */ 192 193 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 194 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 195 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 196 static void page_free(void *, int, u_int8_t); 197 static uma_slab_t slab_zalloc(uma_zone_t, int); 198 static void cache_drain(uma_zone_t); 199 static void bucket_drain(uma_zone_t, uma_bucket_t); 200 static void bucket_cache_drain(uma_zone_t zone); 201 static int keg_ctor(void *, int, void *, int); 202 static void keg_dtor(void *, int, void *); 203 static int zone_ctor(void *, int, void *, int); 204 static void zone_dtor(void *, int, void *); 205 static int zero_init(void *, int, int); 206 static void zone_small_init(uma_zone_t zone); 207 static void zone_large_init(uma_zone_t zone); 208 static void zone_foreach(void (*zfunc)(uma_zone_t)); 209 static void zone_timeout(uma_zone_t zone); 210 static int hash_alloc(struct uma_hash *); 211 static int hash_expand(struct uma_hash *, struct uma_hash *); 212 static void hash_free(struct uma_hash *hash); 213 static void uma_timeout(void *); 214 static void uma_startup3(void); 215 static void *uma_zalloc_internal(uma_zone_t, void *, int); 216 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip); 217 static void bucket_enable(void); 218 static void bucket_init(void); 219 static uma_bucket_t bucket_alloc(int, int); 220 static void bucket_free(uma_bucket_t); 221 static void bucket_zone_drain(void); 222 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 223 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 224 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 225 static void zone_drain(uma_zone_t); 226 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 227 uma_fini fini, int align, u_int16_t flags); 228 229 void uma_print_zone(uma_zone_t); 230 void uma_print_stats(void); 231 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 232 233 #ifdef WITNESS 234 static int nosleepwithlocks = 1; 235 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 236 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 237 #else 238 static int nosleepwithlocks = 0; 239 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 240 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 241 #endif 242 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 243 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 244 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 245 246 /* 247 * This routine checks to see whether or not it's safe to enable buckets. 248 */ 249 250 static void 251 bucket_enable(void) 252 { 253 if (cnt.v_free_count < cnt.v_free_min) 254 bucketdisable = 1; 255 else 256 bucketdisable = 0; 257 } 258 259 static void 260 bucket_init(void) 261 { 262 struct uma_bucket_zone *ubz; 263 int i; 264 int j; 265 266 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 267 int size; 268 269 ubz = &bucket_zones[j]; 270 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 271 size += sizeof(void *) * ubz->ubz_entries; 272 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 273 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 274 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 275 bucket_size[i >> BUCKET_SHIFT] = j; 276 } 277 } 278 279 static uma_bucket_t 280 bucket_alloc(int entries, int bflags) 281 { 282 struct uma_bucket_zone *ubz; 283 uma_bucket_t bucket; 284 int idx; 285 286 /* 287 * This is to stop us from allocating per cpu buckets while we're 288 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 289 * boot pages. This also prevents us from allocating buckets in 290 * low memory situations. 291 */ 292 293 if (bucketdisable) 294 return (NULL); 295 idx = howmany(entries, 1 << BUCKET_SHIFT); 296 ubz = &bucket_zones[bucket_size[idx]]; 297 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 298 if (bucket) { 299 #ifdef INVARIANTS 300 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 301 #endif 302 bucket->ub_cnt = 0; 303 bucket->ub_entries = ubz->ubz_entries; 304 } 305 306 return (bucket); 307 } 308 309 static void 310 bucket_free(uma_bucket_t bucket) 311 { 312 struct uma_bucket_zone *ubz; 313 int idx; 314 315 idx = howmany(bucket->ub_entries, 1 << BUCKET_SHIFT); 316 ubz = &bucket_zones[bucket_size[idx]]; 317 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE); 318 } 319 320 static void 321 bucket_zone_drain(void) 322 { 323 struct uma_bucket_zone *ubz; 324 325 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 326 zone_drain(ubz->ubz_zone); 327 } 328 329 330 /* 331 * Routine called by timeout which is used to fire off some time interval 332 * based calculations. (stats, hash size, etc.) 333 * 334 * Arguments: 335 * arg Unused 336 * 337 * Returns: 338 * Nothing 339 */ 340 static void 341 uma_timeout(void *unused) 342 { 343 bucket_enable(); 344 zone_foreach(zone_timeout); 345 346 /* Reschedule this event */ 347 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 348 } 349 350 /* 351 * Routine to perform timeout driven calculations. This expands the 352 * hashes and does per cpu statistics aggregation. 353 * 354 * Arguments: 355 * zone The zone to operate on 356 * 357 * Returns: 358 * Nothing 359 */ 360 static void 361 zone_timeout(uma_zone_t zone) 362 { 363 uma_keg_t keg; 364 uma_cache_t cache; 365 u_int64_t alloc; 366 int cpu; 367 368 keg = zone->uz_keg; 369 alloc = 0; 370 371 /* 372 * Aggregate per cpu cache statistics back to the zone. 373 * 374 * XXX This should be done in the sysctl handler. 375 * 376 * I may rewrite this to set a flag in the per cpu cache instead of 377 * locking. If the flag is not cleared on the next round I will have 378 * to lock and do it here instead so that the statistics don't get too 379 * far out of sync. 380 */ 381 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) { 382 for (cpu = 0; cpu <= mp_maxid; cpu++) { 383 if (CPU_ABSENT(cpu)) 384 continue; 385 CPU_LOCK(cpu); 386 cache = &zone->uz_cpu[cpu]; 387 /* Add them up, and reset */ 388 alloc += cache->uc_allocs; 389 cache->uc_allocs = 0; 390 CPU_UNLOCK(cpu); 391 } 392 } 393 394 /* Now push these stats back into the zone.. */ 395 ZONE_LOCK(zone); 396 zone->uz_allocs += alloc; 397 398 /* 399 * Expand the zone hash table. 400 * 401 * This is done if the number of slabs is larger than the hash size. 402 * What I'm trying to do here is completely reduce collisions. This 403 * may be a little aggressive. Should I allow for two collisions max? 404 */ 405 406 if (keg->uk_flags & UMA_ZONE_HASH && 407 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 408 struct uma_hash newhash; 409 struct uma_hash oldhash; 410 int ret; 411 412 /* 413 * This is so involved because allocating and freeing 414 * while the zone lock is held will lead to deadlock. 415 * I have to do everything in stages and check for 416 * races. 417 */ 418 newhash = keg->uk_hash; 419 ZONE_UNLOCK(zone); 420 ret = hash_alloc(&newhash); 421 ZONE_LOCK(zone); 422 if (ret) { 423 if (hash_expand(&keg->uk_hash, &newhash)) { 424 oldhash = keg->uk_hash; 425 keg->uk_hash = newhash; 426 } else 427 oldhash = newhash; 428 429 ZONE_UNLOCK(zone); 430 hash_free(&oldhash); 431 ZONE_LOCK(zone); 432 } 433 } 434 ZONE_UNLOCK(zone); 435 } 436 437 /* 438 * Allocate and zero fill the next sized hash table from the appropriate 439 * backing store. 440 * 441 * Arguments: 442 * hash A new hash structure with the old hash size in uh_hashsize 443 * 444 * Returns: 445 * 1 on sucess and 0 on failure. 446 */ 447 static int 448 hash_alloc(struct uma_hash *hash) 449 { 450 int oldsize; 451 int alloc; 452 453 oldsize = hash->uh_hashsize; 454 455 /* We're just going to go to a power of two greater */ 456 if (oldsize) { 457 hash->uh_hashsize = oldsize * 2; 458 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 459 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 460 M_UMAHASH, M_NOWAIT); 461 } else { 462 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 463 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 464 M_WAITOK); 465 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 466 } 467 if (hash->uh_slab_hash) { 468 bzero(hash->uh_slab_hash, alloc); 469 hash->uh_hashmask = hash->uh_hashsize - 1; 470 return (1); 471 } 472 473 return (0); 474 } 475 476 /* 477 * Expands the hash table for HASH zones. This is done from zone_timeout 478 * to reduce collisions. This must not be done in the regular allocation 479 * path, otherwise, we can recurse on the vm while allocating pages. 480 * 481 * Arguments: 482 * oldhash The hash you want to expand 483 * newhash The hash structure for the new table 484 * 485 * Returns: 486 * Nothing 487 * 488 * Discussion: 489 */ 490 static int 491 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 492 { 493 uma_slab_t slab; 494 int hval; 495 int i; 496 497 if (!newhash->uh_slab_hash) 498 return (0); 499 500 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 501 return (0); 502 503 /* 504 * I need to investigate hash algorithms for resizing without a 505 * full rehash. 506 */ 507 508 for (i = 0; i < oldhash->uh_hashsize; i++) 509 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 510 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 511 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 512 hval = UMA_HASH(newhash, slab->us_data); 513 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 514 slab, us_hlink); 515 } 516 517 return (1); 518 } 519 520 /* 521 * Free the hash bucket to the appropriate backing store. 522 * 523 * Arguments: 524 * slab_hash The hash bucket we're freeing 525 * hashsize The number of entries in that hash bucket 526 * 527 * Returns: 528 * Nothing 529 */ 530 static void 531 hash_free(struct uma_hash *hash) 532 { 533 if (hash->uh_slab_hash == NULL) 534 return; 535 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 536 uma_zfree_internal(hashzone, 537 hash->uh_slab_hash, NULL, SKIP_NONE); 538 else 539 free(hash->uh_slab_hash, M_UMAHASH); 540 } 541 542 /* 543 * Frees all outstanding items in a bucket 544 * 545 * Arguments: 546 * zone The zone to free to, must be unlocked. 547 * bucket The free/alloc bucket with items, cpu queue must be locked. 548 * 549 * Returns: 550 * Nothing 551 */ 552 553 static void 554 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 555 { 556 uma_slab_t slab; 557 int mzone; 558 void *item; 559 560 if (bucket == NULL) 561 return; 562 563 slab = NULL; 564 mzone = 0; 565 566 /* We have to lookup the slab again for malloc.. */ 567 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 568 mzone = 1; 569 570 while (bucket->ub_cnt > 0) { 571 bucket->ub_cnt--; 572 item = bucket->ub_bucket[bucket->ub_cnt]; 573 #ifdef INVARIANTS 574 bucket->ub_bucket[bucket->ub_cnt] = NULL; 575 KASSERT(item != NULL, 576 ("bucket_drain: botched ptr, item is NULL")); 577 #endif 578 /* 579 * This is extremely inefficient. The slab pointer was passed 580 * to uma_zfree_arg, but we lost it because the buckets don't 581 * hold them. This will go away when free() gets a size passed 582 * to it. 583 */ 584 if (mzone) 585 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 586 uma_zfree_internal(zone, item, slab, SKIP_DTOR); 587 } 588 } 589 590 /* 591 * Drains the per cpu caches for a zone. 592 * 593 * Arguments: 594 * zone The zone to drain, must be unlocked. 595 * 596 * Returns: 597 * Nothing 598 */ 599 static void 600 cache_drain(uma_zone_t zone) 601 { 602 uma_cache_t cache; 603 int cpu; 604 605 /* 606 * We have to lock each cpu cache before locking the zone 607 */ 608 for (cpu = 0; cpu <= mp_maxid; cpu++) { 609 if (CPU_ABSENT(cpu)) 610 continue; 611 CPU_LOCK(cpu); 612 cache = &zone->uz_cpu[cpu]; 613 bucket_drain(zone, cache->uc_allocbucket); 614 bucket_drain(zone, cache->uc_freebucket); 615 if (cache->uc_allocbucket != NULL) 616 bucket_free(cache->uc_allocbucket); 617 if (cache->uc_freebucket != NULL) 618 bucket_free(cache->uc_freebucket); 619 cache->uc_allocbucket = cache->uc_freebucket = NULL; 620 } 621 ZONE_LOCK(zone); 622 bucket_cache_drain(zone); 623 ZONE_UNLOCK(zone); 624 for (cpu = 0; cpu <= mp_maxid; cpu++) { 625 if (CPU_ABSENT(cpu)) 626 continue; 627 CPU_UNLOCK(cpu); 628 } 629 } 630 631 /* 632 * Drain the cached buckets from a zone. Expects a locked zone on entry. 633 */ 634 static void 635 bucket_cache_drain(uma_zone_t zone) 636 { 637 uma_bucket_t bucket; 638 639 /* 640 * Drain the bucket queues and free the buckets, we just keep two per 641 * cpu (alloc/free). 642 */ 643 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 644 LIST_REMOVE(bucket, ub_link); 645 ZONE_UNLOCK(zone); 646 bucket_drain(zone, bucket); 647 bucket_free(bucket); 648 ZONE_LOCK(zone); 649 } 650 651 /* Now we do the free queue.. */ 652 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 653 LIST_REMOVE(bucket, ub_link); 654 bucket_free(bucket); 655 } 656 } 657 658 /* 659 * Frees pages from a zone back to the system. This is done on demand from 660 * the pageout daemon. 661 * 662 * Arguments: 663 * zone The zone to free pages from 664 * all Should we drain all items? 665 * 666 * Returns: 667 * Nothing. 668 */ 669 static void 670 zone_drain(uma_zone_t zone) 671 { 672 struct slabhead freeslabs = {}; 673 uma_keg_t keg; 674 uma_slab_t slab; 675 uma_slab_t n; 676 u_int8_t flags; 677 u_int8_t *mem; 678 int i; 679 680 keg = zone->uz_keg; 681 682 /* 683 * We don't want to take pages from statically allocated zones at this 684 * time 685 */ 686 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 687 return; 688 689 ZONE_LOCK(zone); 690 691 #ifdef UMA_DEBUG 692 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 693 #endif 694 bucket_cache_drain(zone); 695 if (keg->uk_free == 0) 696 goto finished; 697 698 slab = LIST_FIRST(&keg->uk_free_slab); 699 while (slab) { 700 n = LIST_NEXT(slab, us_link); 701 702 /* We have no where to free these to */ 703 if (slab->us_flags & UMA_SLAB_BOOT) { 704 slab = n; 705 continue; 706 } 707 708 LIST_REMOVE(slab, us_link); 709 keg->uk_pages -= keg->uk_ppera; 710 keg->uk_free -= keg->uk_ipers; 711 712 if (keg->uk_flags & UMA_ZONE_HASH) 713 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 714 715 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 716 717 slab = n; 718 } 719 finished: 720 ZONE_UNLOCK(zone); 721 722 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 723 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 724 if (keg->uk_fini) 725 for (i = 0; i < keg->uk_ipers; i++) 726 keg->uk_fini( 727 slab->us_data + (keg->uk_rsize * i), 728 keg->uk_size); 729 flags = slab->us_flags; 730 mem = slab->us_data; 731 732 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 733 (keg->uk_flags & UMA_ZONE_REFCNT)) { 734 vm_object_t obj; 735 736 if (flags & UMA_SLAB_KMEM) 737 obj = kmem_object; 738 else 739 obj = NULL; 740 for (i = 0; i < keg->uk_ppera; i++) 741 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 742 obj); 743 } 744 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 745 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 746 SKIP_NONE); 747 #ifdef UMA_DEBUG 748 printf("%s: Returning %d bytes.\n", 749 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 750 #endif 751 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 752 } 753 } 754 755 /* 756 * Allocate a new slab for a zone. This does not insert the slab onto a list. 757 * 758 * Arguments: 759 * zone The zone to allocate slabs for 760 * wait Shall we wait? 761 * 762 * Returns: 763 * The slab that was allocated or NULL if there is no memory and the 764 * caller specified M_NOWAIT. 765 */ 766 static uma_slab_t 767 slab_zalloc(uma_zone_t zone, int wait) 768 { 769 uma_slabrefcnt_t slabref; 770 uma_slab_t slab; 771 uma_keg_t keg; 772 u_int8_t *mem; 773 u_int8_t flags; 774 int i; 775 776 slab = NULL; 777 keg = zone->uz_keg; 778 779 #ifdef UMA_DEBUG 780 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 781 #endif 782 ZONE_UNLOCK(zone); 783 784 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 785 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 786 if (slab == NULL) { 787 ZONE_LOCK(zone); 788 return NULL; 789 } 790 } 791 792 /* 793 * This reproduces the old vm_zone behavior of zero filling pages the 794 * first time they are added to a zone. 795 * 796 * Malloced items are zeroed in uma_zalloc. 797 */ 798 799 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 800 wait |= M_ZERO; 801 else 802 wait &= ~M_ZERO; 803 804 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 805 &flags, wait); 806 if (mem == NULL) { 807 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 808 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 0); 809 ZONE_LOCK(zone); 810 return (NULL); 811 } 812 813 /* Point the slab into the allocated memory */ 814 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 815 slab = (uma_slab_t )(mem + keg->uk_pgoff); 816 817 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 818 (keg->uk_flags & UMA_ZONE_REFCNT)) 819 for (i = 0; i < keg->uk_ppera; i++) 820 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 821 822 slab->us_keg = keg; 823 slab->us_data = mem; 824 slab->us_freecount = keg->uk_ipers; 825 slab->us_firstfree = 0; 826 slab->us_flags = flags; 827 for (i = 0; i < keg->uk_ipers; i++) 828 slab->us_freelist[i].us_item = i+1; 829 830 if (keg->uk_flags & UMA_ZONE_REFCNT) { 831 slabref = (uma_slabrefcnt_t)slab; 832 for (i = 0; i < keg->uk_ipers; i++) 833 slabref->us_freelist[i].us_refcnt = 0; 834 } 835 836 if (keg->uk_init != NULL) { 837 for (i = 0; i < keg->uk_ipers; i++) 838 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 839 keg->uk_size, wait) != 0) 840 break; 841 if (i != keg->uk_ipers) { 842 if (keg->uk_fini != NULL) { 843 for (i--; i > -1; i--) 844 keg->uk_fini(slab->us_data + 845 (keg->uk_rsize * i), 846 keg->uk_size); 847 } 848 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 849 (keg->uk_flags & UMA_ZONE_REFCNT)) 850 for (i = 0; i < keg->uk_ppera; i++) 851 vsetobj((vm_offset_t)mem + 852 (i * PAGE_SIZE), NULL); 853 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 854 uma_zfree_internal(keg->uk_slabzone, slab, 855 NULL, SKIP_NONE); 856 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 857 flags); 858 ZONE_LOCK(zone); 859 return (NULL); 860 } 861 } 862 ZONE_LOCK(zone); 863 864 if (keg->uk_flags & UMA_ZONE_HASH) 865 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 866 867 keg->uk_pages += keg->uk_ppera; 868 keg->uk_free += keg->uk_ipers; 869 870 return (slab); 871 } 872 873 /* 874 * This function is intended to be used early on in place of page_alloc() so 875 * that we may use the boot time page cache to satisfy allocations before 876 * the VM is ready. 877 */ 878 static void * 879 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 880 { 881 uma_keg_t keg; 882 883 keg = zone->uz_keg; 884 885 /* 886 * Check our small startup cache to see if it has pages remaining. 887 */ 888 mtx_lock(&uma_mtx); 889 if (uma_boot_free != 0) { 890 uma_slab_t tmps; 891 892 tmps = LIST_FIRST(&uma_boot_pages); 893 LIST_REMOVE(tmps, us_link); 894 uma_boot_free--; 895 mtx_unlock(&uma_mtx); 896 *pflag = tmps->us_flags; 897 return (tmps->us_data); 898 } 899 mtx_unlock(&uma_mtx); 900 if (booted == 0) 901 panic("UMA: Increase UMA_BOOT_PAGES"); 902 /* 903 * Now that we've booted reset these users to their real allocator. 904 */ 905 #ifdef UMA_MD_SMALL_ALLOC 906 keg->uk_allocf = uma_small_alloc; 907 #else 908 keg->uk_allocf = page_alloc; 909 #endif 910 return keg->uk_allocf(zone, bytes, pflag, wait); 911 } 912 913 /* 914 * Allocates a number of pages from the system 915 * 916 * Arguments: 917 * zone Unused 918 * bytes The number of bytes requested 919 * wait Shall we wait? 920 * 921 * Returns: 922 * A pointer to the alloced memory or possibly 923 * NULL if M_NOWAIT is set. 924 */ 925 static void * 926 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 927 { 928 void *p; /* Returned page */ 929 930 *pflag = UMA_SLAB_KMEM; 931 p = (void *) kmem_malloc(kmem_map, bytes, wait); 932 933 return (p); 934 } 935 936 /* 937 * Allocates a number of pages from within an object 938 * 939 * Arguments: 940 * zone Unused 941 * bytes The number of bytes requested 942 * wait Shall we wait? 943 * 944 * Returns: 945 * A pointer to the alloced memory or possibly 946 * NULL if M_NOWAIT is set. 947 */ 948 static void * 949 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 950 { 951 vm_object_t object; 952 vm_offset_t retkva, zkva; 953 vm_page_t p; 954 int pages, startpages; 955 956 object = zone->uz_keg->uk_obj; 957 retkva = 0; 958 959 /* 960 * This looks a little weird since we're getting one page at a time. 961 */ 962 VM_OBJECT_LOCK(object); 963 p = TAILQ_LAST(&object->memq, pglist); 964 pages = p != NULL ? p->pindex + 1 : 0; 965 startpages = pages; 966 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 967 for (; bytes > 0; bytes -= PAGE_SIZE) { 968 p = vm_page_alloc(object, pages, 969 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 970 if (p == NULL) { 971 if (pages != startpages) 972 pmap_qremove(retkva, pages - startpages); 973 while (pages != startpages) { 974 pages--; 975 p = TAILQ_LAST(&object->memq, pglist); 976 vm_page_lock_queues(); 977 vm_page_unwire(p, 0); 978 vm_page_free(p); 979 vm_page_unlock_queues(); 980 } 981 retkva = 0; 982 goto done; 983 } 984 pmap_qenter(zkva, &p, 1); 985 if (retkva == 0) 986 retkva = zkva; 987 zkva += PAGE_SIZE; 988 pages += 1; 989 } 990 done: 991 VM_OBJECT_UNLOCK(object); 992 *flags = UMA_SLAB_PRIV; 993 994 return ((void *)retkva); 995 } 996 997 /* 998 * Frees a number of pages to the system 999 * 1000 * Arguments: 1001 * mem A pointer to the memory to be freed 1002 * size The size of the memory being freed 1003 * flags The original p->us_flags field 1004 * 1005 * Returns: 1006 * Nothing 1007 */ 1008 static void 1009 page_free(void *mem, int size, u_int8_t flags) 1010 { 1011 vm_map_t map; 1012 1013 if (flags & UMA_SLAB_KMEM) 1014 map = kmem_map; 1015 else 1016 panic("UMA: page_free used with invalid flags %d\n", flags); 1017 1018 kmem_free(map, (vm_offset_t)mem, size); 1019 } 1020 1021 /* 1022 * Zero fill initializer 1023 * 1024 * Arguments/Returns follow uma_init specifications 1025 */ 1026 static int 1027 zero_init(void *mem, int size, int flags) 1028 { 1029 bzero(mem, size); 1030 return (0); 1031 } 1032 1033 /* 1034 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1035 * 1036 * Arguments 1037 * zone The zone we should initialize 1038 * 1039 * Returns 1040 * Nothing 1041 */ 1042 static void 1043 zone_small_init(uma_zone_t zone) 1044 { 1045 uma_keg_t keg; 1046 u_int rsize; 1047 u_int memused; 1048 u_int wastedspace; 1049 u_int shsize; 1050 1051 keg = zone->uz_keg; 1052 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1053 rsize = keg->uk_size; 1054 1055 if (rsize < UMA_SMALLEST_UNIT) 1056 rsize = UMA_SMALLEST_UNIT; 1057 if (rsize & keg->uk_align) 1058 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1059 1060 keg->uk_rsize = rsize; 1061 keg->uk_ppera = 1; 1062 1063 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1064 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1065 shsize = sizeof(struct uma_slab_refcnt); 1066 } else { 1067 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1068 shsize = sizeof(struct uma_slab); 1069 } 1070 1071 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1072 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1073 memused = keg->uk_ipers * rsize + shsize; 1074 wastedspace = UMA_SLAB_SIZE - memused; 1075 1076 /* 1077 * We can't do OFFPAGE if we're internal or if we've been 1078 * asked to not go to the VM for buckets. If we do this we 1079 * may end up going to the VM (kmem_map) for slabs which we 1080 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1081 * result of UMA_ZONE_VM, which clearly forbids it. 1082 */ 1083 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1084 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1085 return; 1086 1087 if ((wastedspace >= UMA_MAX_WASTE) && 1088 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1089 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1090 KASSERT(keg->uk_ipers <= 255, 1091 ("zone_small_init: keg->uk_ipers too high!")); 1092 #ifdef UMA_DEBUG 1093 printf("UMA decided we need offpage slab headers for " 1094 "zone: %s, calculated wastedspace = %d, " 1095 "maximum wasted space allowed = %d, " 1096 "calculated ipers = %d, " 1097 "new wasted space = %d\n", zone->uz_name, wastedspace, 1098 UMA_MAX_WASTE, keg->uk_ipers, 1099 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1100 #endif 1101 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1102 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1103 keg->uk_flags |= UMA_ZONE_HASH; 1104 } 1105 } 1106 1107 /* 1108 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1109 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1110 * more complicated. 1111 * 1112 * Arguments 1113 * zone The zone we should initialize 1114 * 1115 * Returns 1116 * Nothing 1117 */ 1118 static void 1119 zone_large_init(uma_zone_t zone) 1120 { 1121 uma_keg_t keg; 1122 int pages; 1123 1124 keg = zone->uz_keg; 1125 1126 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1127 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1128 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1129 1130 pages = keg->uk_size / UMA_SLAB_SIZE; 1131 1132 /* Account for remainder */ 1133 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1134 pages++; 1135 1136 keg->uk_ppera = pages; 1137 keg->uk_ipers = 1; 1138 1139 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1140 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1141 keg->uk_flags |= UMA_ZONE_HASH; 1142 1143 keg->uk_rsize = keg->uk_size; 1144 } 1145 1146 /* 1147 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1148 * the keg onto the global keg list. 1149 * 1150 * Arguments/Returns follow uma_ctor specifications 1151 * udata Actually uma_kctor_args 1152 */ 1153 static int 1154 keg_ctor(void *mem, int size, void *udata, int flags) 1155 { 1156 struct uma_kctor_args *arg = udata; 1157 uma_keg_t keg = mem; 1158 uma_zone_t zone; 1159 1160 bzero(keg, size); 1161 keg->uk_size = arg->size; 1162 keg->uk_init = arg->uminit; 1163 keg->uk_fini = arg->fini; 1164 keg->uk_align = arg->align; 1165 keg->uk_free = 0; 1166 keg->uk_pages = 0; 1167 keg->uk_flags = arg->flags; 1168 keg->uk_allocf = page_alloc; 1169 keg->uk_freef = page_free; 1170 keg->uk_recurse = 0; 1171 keg->uk_slabzone = NULL; 1172 1173 /* 1174 * The master zone is passed to us at keg-creation time. 1175 */ 1176 zone = arg->zone; 1177 zone->uz_keg = keg; 1178 1179 if (arg->flags & UMA_ZONE_VM) 1180 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1181 1182 if (arg->flags & UMA_ZONE_ZINIT) 1183 keg->uk_init = zero_init; 1184 1185 /* 1186 * The +UMA_FRITM_SZ added to uk_size is to account for the 1187 * linkage that is added to the size in zone_small_init(). If 1188 * we don't account for this here then we may end up in 1189 * zone_small_init() with a calculated 'ipers' of 0. 1190 */ 1191 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1192 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1193 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1194 zone_large_init(zone); 1195 else 1196 zone_small_init(zone); 1197 } else { 1198 if ((keg->uk_size+UMA_FRITM_SZ) > 1199 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1200 zone_large_init(zone); 1201 else 1202 zone_small_init(zone); 1203 } 1204 1205 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1206 if (keg->uk_flags & UMA_ZONE_REFCNT) 1207 keg->uk_slabzone = slabrefzone; 1208 else 1209 keg->uk_slabzone = slabzone; 1210 } 1211 1212 /* 1213 * If we haven't booted yet we need allocations to go through the 1214 * startup cache until the vm is ready. 1215 */ 1216 if (keg->uk_ppera == 1) { 1217 #ifdef UMA_MD_SMALL_ALLOC 1218 keg->uk_allocf = uma_small_alloc; 1219 keg->uk_freef = uma_small_free; 1220 #endif 1221 if (booted == 0) 1222 keg->uk_allocf = startup_alloc; 1223 } 1224 1225 /* 1226 * Initialize keg's lock (shared among zones) through 1227 * Master zone 1228 */ 1229 zone->uz_lock = &keg->uk_lock; 1230 if (arg->flags & UMA_ZONE_MTXCLASS) 1231 ZONE_LOCK_INIT(zone, 1); 1232 else 1233 ZONE_LOCK_INIT(zone, 0); 1234 1235 /* 1236 * If we're putting the slab header in the actual page we need to 1237 * figure out where in each page it goes. This calculates a right 1238 * justified offset into the memory on an ALIGN_PTR boundary. 1239 */ 1240 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1241 u_int totsize; 1242 1243 /* Size of the slab struct and free list */ 1244 if (keg->uk_flags & UMA_ZONE_REFCNT) 1245 totsize = sizeof(struct uma_slab_refcnt) + 1246 keg->uk_ipers * UMA_FRITMREF_SZ; 1247 else 1248 totsize = sizeof(struct uma_slab) + 1249 keg->uk_ipers * UMA_FRITM_SZ; 1250 1251 if (totsize & UMA_ALIGN_PTR) 1252 totsize = (totsize & ~UMA_ALIGN_PTR) + 1253 (UMA_ALIGN_PTR + 1); 1254 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1255 1256 if (keg->uk_flags & UMA_ZONE_REFCNT) 1257 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1258 + keg->uk_ipers * UMA_FRITMREF_SZ; 1259 else 1260 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1261 + keg->uk_ipers * UMA_FRITM_SZ; 1262 1263 /* 1264 * The only way the following is possible is if with our 1265 * UMA_ALIGN_PTR adjustments we are now bigger than 1266 * UMA_SLAB_SIZE. I haven't checked whether this is 1267 * mathematically possible for all cases, so we make 1268 * sure here anyway. 1269 */ 1270 if (totsize > UMA_SLAB_SIZE) { 1271 printf("zone %s ipers %d rsize %d size %d\n", 1272 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1273 keg->uk_size); 1274 panic("UMA slab won't fit.\n"); 1275 } 1276 } 1277 1278 if (keg->uk_flags & UMA_ZONE_HASH) 1279 hash_alloc(&keg->uk_hash); 1280 1281 #ifdef UMA_DEBUG 1282 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1283 zone->uz_name, zone, 1284 keg->uk_size, keg->uk_ipers, 1285 keg->uk_ppera, keg->uk_pgoff); 1286 #endif 1287 1288 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1289 1290 mtx_lock(&uma_mtx); 1291 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1292 mtx_unlock(&uma_mtx); 1293 return (0); 1294 } 1295 1296 /* 1297 * Zone header ctor. This initializes all fields, locks, etc. 1298 * 1299 * Arguments/Returns follow uma_ctor specifications 1300 * udata Actually uma_zctor_args 1301 */ 1302 1303 static int 1304 zone_ctor(void *mem, int size, void *udata, int flags) 1305 { 1306 struct uma_zctor_args *arg = udata; 1307 uma_zone_t zone = mem; 1308 uma_zone_t z; 1309 uma_keg_t keg; 1310 1311 bzero(zone, size); 1312 zone->uz_name = arg->name; 1313 zone->uz_ctor = arg->ctor; 1314 zone->uz_dtor = arg->dtor; 1315 zone->uz_init = NULL; 1316 zone->uz_fini = NULL; 1317 zone->uz_allocs = 0; 1318 zone->uz_fills = zone->uz_count = 0; 1319 1320 if (arg->flags & UMA_ZONE_SECONDARY) { 1321 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1322 keg = arg->keg; 1323 zone->uz_keg = keg; 1324 zone->uz_init = arg->uminit; 1325 zone->uz_fini = arg->fini; 1326 zone->uz_lock = &keg->uk_lock; 1327 mtx_lock(&uma_mtx); 1328 ZONE_LOCK(zone); 1329 keg->uk_flags |= UMA_ZONE_SECONDARY; 1330 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1331 if (LIST_NEXT(z, uz_link) == NULL) { 1332 LIST_INSERT_AFTER(z, zone, uz_link); 1333 break; 1334 } 1335 } 1336 ZONE_UNLOCK(zone); 1337 mtx_unlock(&uma_mtx); 1338 } else if (arg->keg == NULL) { 1339 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1340 arg->align, arg->flags) == NULL) 1341 return (ENOMEM); 1342 } else { 1343 struct uma_kctor_args karg; 1344 int error; 1345 1346 /* We should only be here from uma_startup() */ 1347 karg.size = arg->size; 1348 karg.uminit = arg->uminit; 1349 karg.fini = arg->fini; 1350 karg.align = arg->align; 1351 karg.flags = arg->flags; 1352 karg.zone = zone; 1353 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1354 flags); 1355 if (error) 1356 return (error); 1357 } 1358 keg = zone->uz_keg; 1359 zone->uz_lock = &keg->uk_lock; 1360 1361 /* 1362 * Some internal zones don't have room allocated for the per cpu 1363 * caches. If we're internal, bail out here. 1364 */ 1365 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1366 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1367 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1368 return (0); 1369 } 1370 1371 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1372 zone->uz_count = BUCKET_MAX; 1373 else if (keg->uk_ipers <= BUCKET_MAX) 1374 zone->uz_count = keg->uk_ipers; 1375 else 1376 zone->uz_count = BUCKET_MAX; 1377 return (0); 1378 } 1379 1380 /* 1381 * Keg header dtor. This frees all data, destroys locks, frees the hash 1382 * table and removes the keg from the global list. 1383 * 1384 * Arguments/Returns follow uma_dtor specifications 1385 * udata unused 1386 */ 1387 static void 1388 keg_dtor(void *arg, int size, void *udata) 1389 { 1390 uma_keg_t keg; 1391 1392 keg = (uma_keg_t)arg; 1393 mtx_lock(&keg->uk_lock); 1394 if (keg->uk_free != 0) { 1395 printf("Freed UMA keg was not empty (%d items). " 1396 " Lost %d pages of memory.\n", 1397 keg->uk_free, keg->uk_pages); 1398 } 1399 mtx_unlock(&keg->uk_lock); 1400 1401 if (keg->uk_flags & UMA_ZONE_HASH) 1402 hash_free(&keg->uk_hash); 1403 1404 mtx_destroy(&keg->uk_lock); 1405 } 1406 1407 /* 1408 * Zone header dtor. 1409 * 1410 * Arguments/Returns follow uma_dtor specifications 1411 * udata unused 1412 */ 1413 static void 1414 zone_dtor(void *arg, int size, void *udata) 1415 { 1416 uma_zone_t zone; 1417 uma_keg_t keg; 1418 1419 zone = (uma_zone_t)arg; 1420 keg = zone->uz_keg; 1421 1422 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1423 cache_drain(zone); 1424 1425 mtx_lock(&uma_mtx); 1426 zone_drain(zone); 1427 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1428 LIST_REMOVE(zone, uz_link); 1429 /* 1430 * XXX there are some races here where 1431 * the zone can be drained but zone lock 1432 * released and then refilled before we 1433 * remove it... we dont care for now 1434 */ 1435 ZONE_LOCK(zone); 1436 if (LIST_EMPTY(&keg->uk_zones)) 1437 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1438 ZONE_UNLOCK(zone); 1439 mtx_unlock(&uma_mtx); 1440 } else { 1441 LIST_REMOVE(keg, uk_link); 1442 LIST_REMOVE(zone, uz_link); 1443 mtx_unlock(&uma_mtx); 1444 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE); 1445 } 1446 zone->uz_keg = NULL; 1447 } 1448 1449 /* 1450 * Traverses every zone in the system and calls a callback 1451 * 1452 * Arguments: 1453 * zfunc A pointer to a function which accepts a zone 1454 * as an argument. 1455 * 1456 * Returns: 1457 * Nothing 1458 */ 1459 static void 1460 zone_foreach(void (*zfunc)(uma_zone_t)) 1461 { 1462 uma_keg_t keg; 1463 uma_zone_t zone; 1464 1465 mtx_lock(&uma_mtx); 1466 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1467 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1468 zfunc(zone); 1469 } 1470 mtx_unlock(&uma_mtx); 1471 } 1472 1473 /* Public functions */ 1474 /* See uma.h */ 1475 void 1476 uma_startup(void *bootmem) 1477 { 1478 struct uma_zctor_args args; 1479 uma_slab_t slab; 1480 u_int slabsize; 1481 u_int objsize, totsize, wsize; 1482 int i; 1483 1484 #ifdef UMA_DEBUG 1485 printf("Creating uma keg headers zone and keg.\n"); 1486 #endif 1487 /* 1488 * The general UMA lock is a recursion-allowed lock because 1489 * there is a code path where, while we're still configured 1490 * to use startup_alloc() for backend page allocations, we 1491 * may end up in uma_reclaim() which calls zone_foreach(zone_drain), 1492 * which grabs uma_mtx, only to later call into startup_alloc() 1493 * because while freeing we needed to allocate a bucket. Since 1494 * startup_alloc() also takes uma_mtx, we need to be able to 1495 * recurse on it. 1496 */ 1497 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE); 1498 1499 /* 1500 * Figure out the maximum number of items-per-slab we'll have if 1501 * we're using the OFFPAGE slab header to track free items, given 1502 * all possible object sizes and the maximum desired wastage 1503 * (UMA_MAX_WASTE). 1504 * 1505 * We iterate until we find an object size for 1506 * which the calculated wastage in zone_small_init() will be 1507 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1508 * is an overall increasing see-saw function, we find the smallest 1509 * objsize such that the wastage is always acceptable for objects 1510 * with that objsize or smaller. Since a smaller objsize always 1511 * generates a larger possible uma_max_ipers, we use this computed 1512 * objsize to calculate the largest ipers possible. Since the 1513 * ipers calculated for OFFPAGE slab headers is always larger than 1514 * the ipers initially calculated in zone_small_init(), we use 1515 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1516 * obtain the maximum ipers possible for offpage slab headers. 1517 * 1518 * It should be noted that ipers versus objsize is an inversly 1519 * proportional function which drops off rather quickly so as 1520 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1521 * falls into the portion of the inverse relation AFTER the steep 1522 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1523 * 1524 * Note that we have 8-bits (1 byte) to use as a freelist index 1525 * inside the actual slab header itself and this is enough to 1526 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1527 * object with offpage slab header would have ipers = 1528 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1529 * 1 greater than what our byte-integer freelist index can 1530 * accomodate, but we know that this situation never occurs as 1531 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1532 * that we need to go to offpage slab headers. Or, if we do, 1533 * then we trap that condition below and panic in the INVARIANTS case. 1534 */ 1535 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1536 totsize = wsize; 1537 objsize = UMA_SMALLEST_UNIT; 1538 while (totsize >= wsize) { 1539 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1540 (objsize + UMA_FRITM_SZ); 1541 totsize *= (UMA_FRITM_SZ + objsize); 1542 objsize++; 1543 } 1544 if (objsize > UMA_SMALLEST_UNIT) 1545 objsize--; 1546 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1547 1548 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1549 totsize = wsize; 1550 objsize = UMA_SMALLEST_UNIT; 1551 while (totsize >= wsize) { 1552 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1553 (objsize + UMA_FRITMREF_SZ); 1554 totsize *= (UMA_FRITMREF_SZ + objsize); 1555 objsize++; 1556 } 1557 if (objsize > UMA_SMALLEST_UNIT) 1558 objsize--; 1559 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1560 1561 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1562 ("uma_startup: calculated uma_max_ipers values too large!")); 1563 1564 #ifdef UMA_DEBUG 1565 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1566 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1567 uma_max_ipers_ref); 1568 #endif 1569 1570 /* "manually" create the initial zone */ 1571 args.name = "UMA Kegs"; 1572 args.size = sizeof(struct uma_keg); 1573 args.ctor = keg_ctor; 1574 args.dtor = keg_dtor; 1575 args.uminit = zero_init; 1576 args.fini = NULL; 1577 args.keg = &masterkeg; 1578 args.align = 32 - 1; 1579 args.flags = UMA_ZFLAG_INTERNAL; 1580 /* The initial zone has no Per cpu queues so it's smaller */ 1581 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1582 1583 #ifdef UMA_DEBUG 1584 printf("Filling boot free list.\n"); 1585 #endif 1586 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1587 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1588 slab->us_data = (u_int8_t *)slab; 1589 slab->us_flags = UMA_SLAB_BOOT; 1590 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1591 uma_boot_free++; 1592 } 1593 1594 #ifdef UMA_DEBUG 1595 printf("Creating uma zone headers zone and keg.\n"); 1596 #endif 1597 args.name = "UMA Zones"; 1598 args.size = sizeof(struct uma_zone) + 1599 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1600 args.ctor = zone_ctor; 1601 args.dtor = zone_dtor; 1602 args.uminit = zero_init; 1603 args.fini = NULL; 1604 args.keg = NULL; 1605 args.align = 32 - 1; 1606 args.flags = UMA_ZFLAG_INTERNAL; 1607 /* The initial zone has no Per cpu queues so it's smaller */ 1608 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1609 1610 #ifdef UMA_DEBUG 1611 printf("Initializing pcpu cache locks.\n"); 1612 #endif 1613 /* Initialize the pcpu cache lock set once and for all */ 1614 for (i = 0; i <= mp_maxid; i++) 1615 CPU_LOCK_INIT(i); 1616 1617 #ifdef UMA_DEBUG 1618 printf("Creating slab and hash zones.\n"); 1619 #endif 1620 1621 /* 1622 * This is the max number of free list items we'll have with 1623 * offpage slabs. 1624 */ 1625 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1626 slabsize += sizeof(struct uma_slab); 1627 1628 /* Now make a zone for slab headers */ 1629 slabzone = uma_zcreate("UMA Slabs", 1630 slabsize, 1631 NULL, NULL, NULL, NULL, 1632 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1633 1634 /* 1635 * We also create a zone for the bigger slabs with reference 1636 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1637 */ 1638 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1639 slabsize += sizeof(struct uma_slab_refcnt); 1640 slabrefzone = uma_zcreate("UMA RCntSlabs", 1641 slabsize, 1642 NULL, NULL, NULL, NULL, 1643 UMA_ALIGN_PTR, 1644 UMA_ZFLAG_INTERNAL); 1645 1646 hashzone = uma_zcreate("UMA Hash", 1647 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1648 NULL, NULL, NULL, NULL, 1649 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1650 1651 bucket_init(); 1652 1653 #ifdef UMA_MD_SMALL_ALLOC 1654 booted = 1; 1655 #endif 1656 1657 #ifdef UMA_DEBUG 1658 printf("UMA startup complete.\n"); 1659 #endif 1660 } 1661 1662 /* see uma.h */ 1663 void 1664 uma_startup2(void) 1665 { 1666 booted = 1; 1667 bucket_enable(); 1668 #ifdef UMA_DEBUG 1669 printf("UMA startup2 complete.\n"); 1670 #endif 1671 } 1672 1673 /* 1674 * Initialize our callout handle 1675 * 1676 */ 1677 1678 static void 1679 uma_startup3(void) 1680 { 1681 #ifdef UMA_DEBUG 1682 printf("Starting callout.\n"); 1683 #endif 1684 callout_init(&uma_callout, CALLOUT_MPSAFE); 1685 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1686 #ifdef UMA_DEBUG 1687 printf("UMA startup3 complete.\n"); 1688 #endif 1689 } 1690 1691 static uma_zone_t 1692 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1693 int align, u_int16_t flags) 1694 { 1695 struct uma_kctor_args args; 1696 1697 args.size = size; 1698 args.uminit = uminit; 1699 args.fini = fini; 1700 args.align = align; 1701 args.flags = flags; 1702 args.zone = zone; 1703 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1704 } 1705 1706 /* See uma.h */ 1707 uma_zone_t 1708 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1709 uma_init uminit, uma_fini fini, int align, u_int16_t flags) 1710 1711 { 1712 struct uma_zctor_args args; 1713 1714 /* This stuff is essential for the zone ctor */ 1715 args.name = name; 1716 args.size = size; 1717 args.ctor = ctor; 1718 args.dtor = dtor; 1719 args.uminit = uminit; 1720 args.fini = fini; 1721 args.align = align; 1722 args.flags = flags; 1723 args.keg = NULL; 1724 1725 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1726 } 1727 1728 /* See uma.h */ 1729 uma_zone_t 1730 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1731 uma_init zinit, uma_fini zfini, uma_zone_t master) 1732 { 1733 struct uma_zctor_args args; 1734 1735 args.name = name; 1736 args.size = master->uz_keg->uk_size; 1737 args.ctor = ctor; 1738 args.dtor = dtor; 1739 args.uminit = zinit; 1740 args.fini = zfini; 1741 args.align = master->uz_keg->uk_align; 1742 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1743 args.keg = master->uz_keg; 1744 1745 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1746 } 1747 1748 /* See uma.h */ 1749 void 1750 uma_zdestroy(uma_zone_t zone) 1751 { 1752 uma_zfree_internal(zones, zone, NULL, SKIP_NONE); 1753 } 1754 1755 /* See uma.h */ 1756 void * 1757 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1758 { 1759 void *item; 1760 uma_cache_t cache; 1761 uma_bucket_t bucket; 1762 int cpu; 1763 int badness; 1764 1765 /* This is the fast path allocation */ 1766 #ifdef UMA_DEBUG_ALLOC_1 1767 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1768 #endif 1769 1770 if (!(flags & M_NOWAIT)) { 1771 KASSERT(curthread->td_intr_nesting_level == 0, 1772 ("malloc(M_WAITOK) in interrupt context")); 1773 if (nosleepwithlocks) { 1774 #ifdef WITNESS 1775 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1776 NULL, 1777 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1778 zone->uz_name); 1779 #else 1780 badness = 1; 1781 #endif 1782 } else { 1783 badness = 0; 1784 #ifdef WITNESS 1785 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1786 "malloc(M_WAITOK) of \"%s\"", zone->uz_name); 1787 #endif 1788 } 1789 if (badness) { 1790 flags &= ~M_WAITOK; 1791 flags |= M_NOWAIT; 1792 } 1793 } 1794 1795 zalloc_restart: 1796 cpu = PCPU_GET(cpuid); 1797 CPU_LOCK(cpu); 1798 cache = &zone->uz_cpu[cpu]; 1799 1800 zalloc_start: 1801 bucket = cache->uc_allocbucket; 1802 1803 if (bucket) { 1804 if (bucket->ub_cnt > 0) { 1805 bucket->ub_cnt--; 1806 item = bucket->ub_bucket[bucket->ub_cnt]; 1807 #ifdef INVARIANTS 1808 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1809 #endif 1810 KASSERT(item != NULL, 1811 ("uma_zalloc: Bucket pointer mangled.")); 1812 cache->uc_allocs++; 1813 #ifdef INVARIANTS 1814 ZONE_LOCK(zone); 1815 uma_dbg_alloc(zone, NULL, item); 1816 ZONE_UNLOCK(zone); 1817 #endif 1818 CPU_UNLOCK(cpu); 1819 if (zone->uz_ctor != NULL) { 1820 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1821 udata, flags) != 0) { 1822 uma_zfree_internal(zone, item, udata, 1823 SKIP_DTOR); 1824 return (NULL); 1825 } 1826 } 1827 if (flags & M_ZERO) 1828 bzero(item, zone->uz_keg->uk_size); 1829 return (item); 1830 } else if (cache->uc_freebucket) { 1831 /* 1832 * We have run out of items in our allocbucket. 1833 * See if we can switch with our free bucket. 1834 */ 1835 if (cache->uc_freebucket->ub_cnt > 0) { 1836 #ifdef UMA_DEBUG_ALLOC 1837 printf("uma_zalloc: Swapping empty with" 1838 " alloc.\n"); 1839 #endif 1840 bucket = cache->uc_freebucket; 1841 cache->uc_freebucket = cache->uc_allocbucket; 1842 cache->uc_allocbucket = bucket; 1843 1844 goto zalloc_start; 1845 } 1846 } 1847 } 1848 ZONE_LOCK(zone); 1849 /* Since we have locked the zone we may as well send back our stats */ 1850 zone->uz_allocs += cache->uc_allocs; 1851 cache->uc_allocs = 0; 1852 1853 /* Our old one is now a free bucket */ 1854 if (cache->uc_allocbucket) { 1855 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1856 ("uma_zalloc_arg: Freeing a non free bucket.")); 1857 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1858 cache->uc_allocbucket, ub_link); 1859 cache->uc_allocbucket = NULL; 1860 } 1861 1862 /* Check the free list for a new alloc bucket */ 1863 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1864 KASSERT(bucket->ub_cnt != 0, 1865 ("uma_zalloc_arg: Returning an empty bucket.")); 1866 1867 LIST_REMOVE(bucket, ub_link); 1868 cache->uc_allocbucket = bucket; 1869 ZONE_UNLOCK(zone); 1870 goto zalloc_start; 1871 } 1872 /* We are no longer associated with this cpu!!! */ 1873 CPU_UNLOCK(cpu); 1874 1875 /* Bump up our uz_count so we get here less */ 1876 if (zone->uz_count < BUCKET_MAX) 1877 zone->uz_count++; 1878 1879 /* 1880 * Now lets just fill a bucket and put it on the free list. If that 1881 * works we'll restart the allocation from the begining. 1882 */ 1883 if (uma_zalloc_bucket(zone, flags)) { 1884 ZONE_UNLOCK(zone); 1885 goto zalloc_restart; 1886 } 1887 ZONE_UNLOCK(zone); 1888 /* 1889 * We may not be able to get a bucket so return an actual item. 1890 */ 1891 #ifdef UMA_DEBUG 1892 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1893 #endif 1894 1895 return (uma_zalloc_internal(zone, udata, flags)); 1896 } 1897 1898 static uma_slab_t 1899 uma_zone_slab(uma_zone_t zone, int flags) 1900 { 1901 uma_slab_t slab; 1902 uma_keg_t keg; 1903 1904 keg = zone->uz_keg; 1905 1906 /* 1907 * This is to prevent us from recursively trying to allocate 1908 * buckets. The problem is that if an allocation forces us to 1909 * grab a new bucket we will call page_alloc, which will go off 1910 * and cause the vm to allocate vm_map_entries. If we need new 1911 * buckets there too we will recurse in kmem_alloc and bad 1912 * things happen. So instead we return a NULL bucket, and make 1913 * the code that allocates buckets smart enough to deal with it 1914 */ 1915 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1916 return (NULL); 1917 1918 slab = NULL; 1919 1920 for (;;) { 1921 /* 1922 * Find a slab with some space. Prefer slabs that are partially 1923 * used over those that are totally full. This helps to reduce 1924 * fragmentation. 1925 */ 1926 if (keg->uk_free != 0) { 1927 if (!LIST_EMPTY(&keg->uk_part_slab)) { 1928 slab = LIST_FIRST(&keg->uk_part_slab); 1929 } else { 1930 slab = LIST_FIRST(&keg->uk_free_slab); 1931 LIST_REMOVE(slab, us_link); 1932 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 1933 us_link); 1934 } 1935 return (slab); 1936 } 1937 1938 /* 1939 * M_NOVM means don't ask at all! 1940 */ 1941 if (flags & M_NOVM) 1942 break; 1943 1944 if (keg->uk_maxpages && 1945 keg->uk_pages >= keg->uk_maxpages) { 1946 keg->uk_flags |= UMA_ZFLAG_FULL; 1947 1948 if (flags & M_NOWAIT) 1949 break; 1950 else 1951 msleep(keg, &keg->uk_lock, PVM, 1952 "zonelimit", 0); 1953 continue; 1954 } 1955 keg->uk_recurse++; 1956 slab = slab_zalloc(zone, flags); 1957 keg->uk_recurse--; 1958 1959 /* 1960 * If we got a slab here it's safe to mark it partially used 1961 * and return. We assume that the caller is going to remove 1962 * at least one item. 1963 */ 1964 if (slab) { 1965 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 1966 return (slab); 1967 } 1968 /* 1969 * We might not have been able to get a slab but another cpu 1970 * could have while we were unlocked. Check again before we 1971 * fail. 1972 */ 1973 if (flags & M_NOWAIT) 1974 flags |= M_NOVM; 1975 } 1976 return (slab); 1977 } 1978 1979 static void * 1980 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 1981 { 1982 uma_keg_t keg; 1983 void *item; 1984 u_int8_t freei; 1985 1986 keg = zone->uz_keg; 1987 1988 freei = slab->us_firstfree; 1989 slab->us_firstfree = slab->us_freelist[freei].us_item; 1990 item = slab->us_data + (keg->uk_rsize * freei); 1991 1992 slab->us_freecount--; 1993 keg->uk_free--; 1994 #ifdef INVARIANTS 1995 uma_dbg_alloc(zone, slab, item); 1996 #endif 1997 /* Move this slab to the full list */ 1998 if (slab->us_freecount == 0) { 1999 LIST_REMOVE(slab, us_link); 2000 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2001 } 2002 2003 return (item); 2004 } 2005 2006 static int 2007 uma_zalloc_bucket(uma_zone_t zone, int flags) 2008 { 2009 uma_bucket_t bucket; 2010 uma_slab_t slab; 2011 int16_t saved; 2012 int max, origflags = flags; 2013 2014 /* 2015 * Try this zone's free list first so we don't allocate extra buckets. 2016 */ 2017 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2018 KASSERT(bucket->ub_cnt == 0, 2019 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2020 LIST_REMOVE(bucket, ub_link); 2021 } else { 2022 int bflags; 2023 2024 bflags = (flags & ~M_ZERO); 2025 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2026 bflags |= M_NOVM; 2027 2028 ZONE_UNLOCK(zone); 2029 bucket = bucket_alloc(zone->uz_count, bflags); 2030 ZONE_LOCK(zone); 2031 } 2032 2033 if (bucket == NULL) 2034 return (0); 2035 2036 #ifdef SMP 2037 /* 2038 * This code is here to limit the number of simultaneous bucket fills 2039 * for any given zone to the number of per cpu caches in this zone. This 2040 * is done so that we don't allocate more memory than we really need. 2041 */ 2042 if (zone->uz_fills >= mp_ncpus) 2043 goto done; 2044 2045 #endif 2046 zone->uz_fills++; 2047 2048 max = MIN(bucket->ub_entries, zone->uz_count); 2049 /* Try to keep the buckets totally full */ 2050 saved = bucket->ub_cnt; 2051 while (bucket->ub_cnt < max && 2052 (slab = uma_zone_slab(zone, flags)) != NULL) { 2053 while (slab->us_freecount && bucket->ub_cnt < max) { 2054 bucket->ub_bucket[bucket->ub_cnt++] = 2055 uma_slab_alloc(zone, slab); 2056 } 2057 2058 /* Don't block on the next fill */ 2059 flags |= M_NOWAIT; 2060 } 2061 2062 /* 2063 * We unlock here because we need to call the zone's init. 2064 * It should be safe to unlock because the slab dealt with 2065 * above is already on the appropriate list within the keg 2066 * and the bucket we filled is not yet on any list, so we 2067 * own it. 2068 */ 2069 if (zone->uz_init != NULL) { 2070 int i; 2071 2072 ZONE_UNLOCK(zone); 2073 for (i = saved; i < bucket->ub_cnt; i++) 2074 if (zone->uz_init(bucket->ub_bucket[i], 2075 zone->uz_keg->uk_size, origflags) != 0) 2076 break; 2077 /* 2078 * If we couldn't initialize the whole bucket, put the 2079 * rest back onto the freelist. 2080 */ 2081 if (i != bucket->ub_cnt) { 2082 int j; 2083 2084 for (j = i; j < bucket->ub_cnt; j++) 2085 uma_zfree_internal(zone, bucket->ub_bucket[j], 2086 NULL, SKIP_FINI); 2087 bucket->ub_cnt = i; 2088 } 2089 ZONE_LOCK(zone); 2090 } 2091 2092 zone->uz_fills--; 2093 if (bucket->ub_cnt != 0) { 2094 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2095 bucket, ub_link); 2096 return (1); 2097 } 2098 #ifdef SMP 2099 done: 2100 #endif 2101 bucket_free(bucket); 2102 2103 return (0); 2104 } 2105 /* 2106 * Allocates an item for an internal zone 2107 * 2108 * Arguments 2109 * zone The zone to alloc for. 2110 * udata The data to be passed to the constructor. 2111 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2112 * 2113 * Returns 2114 * NULL if there is no memory and M_NOWAIT is set 2115 * An item if successful 2116 */ 2117 2118 static void * 2119 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2120 { 2121 uma_keg_t keg; 2122 uma_slab_t slab; 2123 void *item; 2124 2125 item = NULL; 2126 keg = zone->uz_keg; 2127 2128 #ifdef UMA_DEBUG_ALLOC 2129 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2130 #endif 2131 ZONE_LOCK(zone); 2132 2133 slab = uma_zone_slab(zone, flags); 2134 if (slab == NULL) { 2135 ZONE_UNLOCK(zone); 2136 return (NULL); 2137 } 2138 2139 item = uma_slab_alloc(zone, slab); 2140 2141 ZONE_UNLOCK(zone); 2142 2143 /* 2144 * We have to call both the zone's init (not the keg's init) 2145 * and the zone's ctor. This is because the item is going from 2146 * a keg slab directly to the user, and the user is expecting it 2147 * to be both zone-init'd as well as zone-ctor'd. 2148 */ 2149 if (zone->uz_init != NULL) { 2150 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2151 uma_zfree_internal(zone, item, udata, SKIP_FINI); 2152 return (NULL); 2153 } 2154 } 2155 if (zone->uz_ctor != NULL) { 2156 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2157 uma_zfree_internal(zone, item, udata, SKIP_DTOR); 2158 return (NULL); 2159 } 2160 } 2161 if (flags & M_ZERO) 2162 bzero(item, keg->uk_size); 2163 2164 return (item); 2165 } 2166 2167 /* See uma.h */ 2168 void 2169 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2170 { 2171 uma_keg_t keg; 2172 uma_cache_t cache; 2173 uma_bucket_t bucket; 2174 int bflags; 2175 int cpu; 2176 enum zfreeskip skip; 2177 2178 /* This is the fast path free */ 2179 skip = SKIP_NONE; 2180 keg = zone->uz_keg; 2181 2182 #ifdef UMA_DEBUG_ALLOC_1 2183 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2184 #endif 2185 /* 2186 * The race here is acceptable. If we miss it we'll just have to wait 2187 * a little longer for the limits to be reset. 2188 */ 2189 2190 if (keg->uk_flags & UMA_ZFLAG_FULL) 2191 goto zfree_internal; 2192 2193 if (zone->uz_dtor) { 2194 zone->uz_dtor(item, keg->uk_size, udata); 2195 skip = SKIP_DTOR; 2196 } 2197 2198 zfree_restart: 2199 cpu = PCPU_GET(cpuid); 2200 CPU_LOCK(cpu); 2201 cache = &zone->uz_cpu[cpu]; 2202 2203 zfree_start: 2204 bucket = cache->uc_freebucket; 2205 2206 if (bucket) { 2207 /* 2208 * Do we have room in our bucket? It is OK for this uz count 2209 * check to be slightly out of sync. 2210 */ 2211 2212 if (bucket->ub_cnt < bucket->ub_entries) { 2213 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2214 ("uma_zfree: Freeing to non free bucket index.")); 2215 bucket->ub_bucket[bucket->ub_cnt] = item; 2216 bucket->ub_cnt++; 2217 #ifdef INVARIANTS 2218 ZONE_LOCK(zone); 2219 if (keg->uk_flags & UMA_ZONE_MALLOC) 2220 uma_dbg_free(zone, udata, item); 2221 else 2222 uma_dbg_free(zone, NULL, item); 2223 ZONE_UNLOCK(zone); 2224 #endif 2225 CPU_UNLOCK(cpu); 2226 return; 2227 } else if (cache->uc_allocbucket) { 2228 #ifdef UMA_DEBUG_ALLOC 2229 printf("uma_zfree: Swapping buckets.\n"); 2230 #endif 2231 /* 2232 * We have run out of space in our freebucket. 2233 * See if we can switch with our alloc bucket. 2234 */ 2235 if (cache->uc_allocbucket->ub_cnt < 2236 cache->uc_freebucket->ub_cnt) { 2237 bucket = cache->uc_freebucket; 2238 cache->uc_freebucket = cache->uc_allocbucket; 2239 cache->uc_allocbucket = bucket; 2240 goto zfree_start; 2241 } 2242 } 2243 } 2244 /* 2245 * We can get here for two reasons: 2246 * 2247 * 1) The buckets are NULL 2248 * 2) The alloc and free buckets are both somewhat full. 2249 */ 2250 2251 ZONE_LOCK(zone); 2252 2253 bucket = cache->uc_freebucket; 2254 cache->uc_freebucket = NULL; 2255 2256 /* Can we throw this on the zone full list? */ 2257 if (bucket != NULL) { 2258 #ifdef UMA_DEBUG_ALLOC 2259 printf("uma_zfree: Putting old bucket on the free list.\n"); 2260 #endif 2261 /* ub_cnt is pointing to the last free item */ 2262 KASSERT(bucket->ub_cnt != 0, 2263 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2264 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2265 bucket, ub_link); 2266 } 2267 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2268 LIST_REMOVE(bucket, ub_link); 2269 ZONE_UNLOCK(zone); 2270 cache->uc_freebucket = bucket; 2271 goto zfree_start; 2272 } 2273 /* We're done with this CPU now */ 2274 CPU_UNLOCK(cpu); 2275 2276 /* And the zone.. */ 2277 ZONE_UNLOCK(zone); 2278 2279 #ifdef UMA_DEBUG_ALLOC 2280 printf("uma_zfree: Allocating new free bucket.\n"); 2281 #endif 2282 bflags = M_NOWAIT; 2283 2284 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2285 bflags |= M_NOVM; 2286 bucket = bucket_alloc(zone->uz_count, bflags); 2287 if (bucket) { 2288 ZONE_LOCK(zone); 2289 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2290 bucket, ub_link); 2291 ZONE_UNLOCK(zone); 2292 goto zfree_restart; 2293 } 2294 2295 /* 2296 * If nothing else caught this, we'll just do an internal free. 2297 */ 2298 2299 zfree_internal: 2300 2301 #ifdef INVARIANTS 2302 /* 2303 * If we need to skip the dtor and the uma_dbg_free in 2304 * uma_zfree_internal because we've already called the dtor 2305 * above, but we ended up here, then we need to make sure 2306 * that we take care of the uma_dbg_free immediately. 2307 */ 2308 if (skip) { 2309 ZONE_LOCK(zone); 2310 if (keg->uk_flags & UMA_ZONE_MALLOC) 2311 uma_dbg_free(zone, udata, item); 2312 else 2313 uma_dbg_free(zone, NULL, item); 2314 ZONE_UNLOCK(zone); 2315 } 2316 #endif 2317 uma_zfree_internal(zone, item, udata, skip); 2318 2319 return; 2320 } 2321 2322 /* 2323 * Frees an item to an INTERNAL zone or allocates a free bucket 2324 * 2325 * Arguments: 2326 * zone The zone to free to 2327 * item The item we're freeing 2328 * udata User supplied data for the dtor 2329 * skip Skip dtors and finis 2330 */ 2331 static void 2332 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2333 enum zfreeskip skip) 2334 { 2335 uma_slab_t slab; 2336 uma_keg_t keg; 2337 u_int8_t *mem; 2338 u_int8_t freei; 2339 2340 keg = zone->uz_keg; 2341 2342 if (skip < SKIP_DTOR && zone->uz_dtor) 2343 zone->uz_dtor(item, keg->uk_size, udata); 2344 if (skip < SKIP_FINI && zone->uz_fini) 2345 zone->uz_fini(item, keg->uk_size); 2346 2347 ZONE_LOCK(zone); 2348 2349 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2350 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2351 if (keg->uk_flags & UMA_ZONE_HASH) 2352 slab = hash_sfind(&keg->uk_hash, mem); 2353 else { 2354 mem += keg->uk_pgoff; 2355 slab = (uma_slab_t)mem; 2356 } 2357 } else { 2358 slab = (uma_slab_t)udata; 2359 } 2360 2361 /* Do we need to remove from any lists? */ 2362 if (slab->us_freecount+1 == keg->uk_ipers) { 2363 LIST_REMOVE(slab, us_link); 2364 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2365 } else if (slab->us_freecount == 0) { 2366 LIST_REMOVE(slab, us_link); 2367 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2368 } 2369 2370 /* Slab management stuff */ 2371 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2372 / keg->uk_rsize; 2373 2374 #ifdef INVARIANTS 2375 if (!skip) 2376 uma_dbg_free(zone, slab, item); 2377 #endif 2378 2379 slab->us_freelist[freei].us_item = slab->us_firstfree; 2380 slab->us_firstfree = freei; 2381 slab->us_freecount++; 2382 2383 /* Zone statistics */ 2384 keg->uk_free++; 2385 2386 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2387 if (keg->uk_pages < keg->uk_maxpages) 2388 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2389 2390 /* We can handle one more allocation */ 2391 wakeup_one(keg); 2392 } 2393 2394 ZONE_UNLOCK(zone); 2395 } 2396 2397 /* See uma.h */ 2398 void 2399 uma_zone_set_max(uma_zone_t zone, int nitems) 2400 { 2401 uma_keg_t keg; 2402 2403 keg = zone->uz_keg; 2404 ZONE_LOCK(zone); 2405 if (keg->uk_ppera > 1) 2406 keg->uk_maxpages = nitems * keg->uk_ppera; 2407 else 2408 keg->uk_maxpages = nitems / keg->uk_ipers; 2409 2410 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2411 keg->uk_maxpages++; 2412 2413 ZONE_UNLOCK(zone); 2414 } 2415 2416 /* See uma.h */ 2417 void 2418 uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2419 { 2420 ZONE_LOCK(zone); 2421 KASSERT(zone->uz_keg->uk_pages == 0, 2422 ("uma_zone_set_init on non-empty keg")); 2423 zone->uz_keg->uk_init = uminit; 2424 ZONE_UNLOCK(zone); 2425 } 2426 2427 /* See uma.h */ 2428 void 2429 uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2430 { 2431 ZONE_LOCK(zone); 2432 KASSERT(zone->uz_keg->uk_pages == 0, 2433 ("uma_zone_set_fini on non-empty keg")); 2434 zone->uz_keg->uk_fini = fini; 2435 ZONE_UNLOCK(zone); 2436 } 2437 2438 /* See uma.h */ 2439 void 2440 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2441 { 2442 ZONE_LOCK(zone); 2443 KASSERT(zone->uz_keg->uk_pages == 0, 2444 ("uma_zone_set_zinit on non-empty keg")); 2445 zone->uz_init = zinit; 2446 ZONE_UNLOCK(zone); 2447 } 2448 2449 /* See uma.h */ 2450 void 2451 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2452 { 2453 ZONE_LOCK(zone); 2454 KASSERT(zone->uz_keg->uk_pages == 0, 2455 ("uma_zone_set_zfini on non-empty keg")); 2456 zone->uz_fini = zfini; 2457 ZONE_UNLOCK(zone); 2458 } 2459 2460 /* See uma.h */ 2461 /* XXX uk_freef is not actually used with the zone locked */ 2462 void 2463 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2464 { 2465 ZONE_LOCK(zone); 2466 zone->uz_keg->uk_freef = freef; 2467 ZONE_UNLOCK(zone); 2468 } 2469 2470 /* See uma.h */ 2471 /* XXX uk_allocf is not actually used with the zone locked */ 2472 void 2473 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2474 { 2475 ZONE_LOCK(zone); 2476 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2477 zone->uz_keg->uk_allocf = allocf; 2478 ZONE_UNLOCK(zone); 2479 } 2480 2481 /* See uma.h */ 2482 int 2483 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2484 { 2485 uma_keg_t keg; 2486 vm_offset_t kva; 2487 int pages; 2488 2489 keg = zone->uz_keg; 2490 pages = count / keg->uk_ipers; 2491 2492 if (pages * keg->uk_ipers < count) 2493 pages++; 2494 2495 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2496 2497 if (kva == 0) 2498 return (0); 2499 if (obj == NULL) { 2500 obj = vm_object_allocate(OBJT_DEFAULT, 2501 pages); 2502 } else { 2503 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2504 _vm_object_allocate(OBJT_DEFAULT, 2505 pages, obj); 2506 } 2507 ZONE_LOCK(zone); 2508 keg->uk_kva = kva; 2509 keg->uk_obj = obj; 2510 keg->uk_maxpages = pages; 2511 keg->uk_allocf = obj_alloc; 2512 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2513 ZONE_UNLOCK(zone); 2514 return (1); 2515 } 2516 2517 /* See uma.h */ 2518 void 2519 uma_prealloc(uma_zone_t zone, int items) 2520 { 2521 int slabs; 2522 uma_slab_t slab; 2523 uma_keg_t keg; 2524 2525 keg = zone->uz_keg; 2526 ZONE_LOCK(zone); 2527 slabs = items / keg->uk_ipers; 2528 if (slabs * keg->uk_ipers < items) 2529 slabs++; 2530 while (slabs > 0) { 2531 slab = slab_zalloc(zone, M_WAITOK); 2532 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2533 slabs--; 2534 } 2535 ZONE_UNLOCK(zone); 2536 } 2537 2538 /* See uma.h */ 2539 u_int32_t * 2540 uma_find_refcnt(uma_zone_t zone, void *item) 2541 { 2542 uma_slabrefcnt_t slab; 2543 uma_keg_t keg; 2544 u_int32_t *refcnt; 2545 int idx; 2546 2547 keg = zone->uz_keg; 2548 slab = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 2549 KASSERT(slab != NULL, 2550 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2551 idx = ((unsigned long)item - (unsigned long)slab->us_data) 2552 / keg->uk_rsize; 2553 refcnt = &(slab->us_freelist[idx].us_refcnt); 2554 return refcnt; 2555 } 2556 2557 /* See uma.h */ 2558 void 2559 uma_reclaim(void) 2560 { 2561 #ifdef UMA_DEBUG 2562 printf("UMA: vm asked us to release pages!\n"); 2563 #endif 2564 bucket_enable(); 2565 zone_foreach(zone_drain); 2566 /* 2567 * Some slabs may have been freed but this zone will be visited early 2568 * we visit again so that we can free pages that are empty once other 2569 * zones are drained. We have to do the same for buckets. 2570 */ 2571 zone_drain(slabzone); 2572 zone_drain(slabrefzone); 2573 bucket_zone_drain(); 2574 } 2575 2576 void * 2577 uma_large_malloc(int size, int wait) 2578 { 2579 void *mem; 2580 uma_slab_t slab; 2581 u_int8_t flags; 2582 2583 slab = uma_zalloc_internal(slabzone, NULL, wait); 2584 if (slab == NULL) 2585 return (NULL); 2586 mem = page_alloc(NULL, size, &flags, wait); 2587 if (mem) { 2588 vsetslab((vm_offset_t)mem, slab); 2589 slab->us_data = mem; 2590 slab->us_flags = flags | UMA_SLAB_MALLOC; 2591 slab->us_size = size; 2592 } else { 2593 uma_zfree_internal(slabzone, slab, NULL, 0); 2594 } 2595 2596 return (mem); 2597 } 2598 2599 void 2600 uma_large_free(uma_slab_t slab) 2601 { 2602 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2603 page_free(slab->us_data, slab->us_size, slab->us_flags); 2604 uma_zfree_internal(slabzone, slab, NULL, 0); 2605 } 2606 2607 void 2608 uma_print_stats(void) 2609 { 2610 zone_foreach(uma_print_zone); 2611 } 2612 2613 static void 2614 slab_print(uma_slab_t slab) 2615 { 2616 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2617 slab->us_keg, slab->us_data, slab->us_freecount, 2618 slab->us_firstfree); 2619 } 2620 2621 static void 2622 cache_print(uma_cache_t cache) 2623 { 2624 printf("alloc: %p(%d), free: %p(%d)\n", 2625 cache->uc_allocbucket, 2626 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2627 cache->uc_freebucket, 2628 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2629 } 2630 2631 void 2632 uma_print_zone(uma_zone_t zone) 2633 { 2634 uma_cache_t cache; 2635 uma_keg_t keg; 2636 uma_slab_t slab; 2637 int i; 2638 2639 keg = zone->uz_keg; 2640 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2641 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2642 keg->uk_ipers, keg->uk_ppera, 2643 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2644 printf("Part slabs:\n"); 2645 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2646 slab_print(slab); 2647 printf("Free slabs:\n"); 2648 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2649 slab_print(slab); 2650 printf("Full slabs:\n"); 2651 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2652 slab_print(slab); 2653 for (i = 0; i <= mp_maxid; i++) { 2654 if (CPU_ABSENT(i)) 2655 continue; 2656 cache = &zone->uz_cpu[i]; 2657 printf("CPU %d Cache:\n", i); 2658 cache_print(cache); 2659 } 2660 } 2661 2662 /* 2663 * Sysctl handler for vm.zone 2664 * 2665 * stolen from vm_zone.c 2666 */ 2667 static int 2668 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2669 { 2670 int error, len, cnt; 2671 const int linesize = 128; /* conservative */ 2672 int totalfree; 2673 char *tmpbuf, *offset; 2674 uma_zone_t z; 2675 uma_keg_t zk; 2676 char *p; 2677 int cpu; 2678 int cachefree; 2679 uma_bucket_t bucket; 2680 uma_cache_t cache; 2681 2682 cnt = 0; 2683 mtx_lock(&uma_mtx); 2684 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2685 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2686 cnt++; 2687 } 2688 mtx_unlock(&uma_mtx); 2689 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2690 M_TEMP, M_WAITOK); 2691 len = snprintf(tmpbuf, linesize, 2692 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2693 if (cnt == 0) 2694 tmpbuf[len - 1] = '\0'; 2695 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2696 if (error || cnt == 0) 2697 goto out; 2698 offset = tmpbuf; 2699 mtx_lock(&uma_mtx); 2700 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2701 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2702 if (cnt == 0) /* list may have changed size */ 2703 break; 2704 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2705 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2706 if (CPU_ABSENT(cpu)) 2707 continue; 2708 CPU_LOCK(cpu); 2709 } 2710 } 2711 ZONE_LOCK(z); 2712 cachefree = 0; 2713 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2714 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2715 if (CPU_ABSENT(cpu)) 2716 continue; 2717 cache = &z->uz_cpu[cpu]; 2718 if (cache->uc_allocbucket != NULL) 2719 cachefree += cache->uc_allocbucket->ub_cnt; 2720 if (cache->uc_freebucket != NULL) 2721 cachefree += cache->uc_freebucket->ub_cnt; 2722 CPU_UNLOCK(cpu); 2723 } 2724 } 2725 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2726 cachefree += bucket->ub_cnt; 2727 } 2728 totalfree = zk->uk_free + cachefree; 2729 len = snprintf(offset, linesize, 2730 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2731 z->uz_name, zk->uk_size, 2732 zk->uk_maxpages * zk->uk_ipers, 2733 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2734 totalfree, 2735 (unsigned long long)z->uz_allocs); 2736 ZONE_UNLOCK(z); 2737 for (p = offset + 12; p > offset && *p == ' '; --p) 2738 /* nothing */ ; 2739 p[1] = ':'; 2740 cnt--; 2741 offset += len; 2742 } 2743 } 2744 mtx_unlock(&uma_mtx); 2745 *offset++ = '\0'; 2746 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2747 out: 2748 FREE(tmpbuf, M_TEMP); 2749 return (error); 2750 } 2751