1 /*- 2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org> 3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org> 4 * Copyright (c) 2004-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * uma_core.c Implementation of the Universal Memory allocator 31 * 32 * This allocator is intended to replace the multitude of similar object caches 33 * in the standard FreeBSD kernel. The intent is to be flexible as well as 34 * effecient. A primary design goal is to return unused memory to the rest of 35 * the system. This will make the system as a whole more flexible due to the 36 * ability to move memory to subsystems which most need it instead of leaving 37 * pools of reserved memory unused. 38 * 39 * The basic ideas stem from similar slab/zone based allocators whose algorithms 40 * are well known. 41 * 42 */ 43 44 /* 45 * TODO: 46 * - Improve memory usage for large allocations 47 * - Investigate cache size adjustments 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 /* I should really use ktr.. */ 54 /* 55 #define UMA_DEBUG 1 56 #define UMA_DEBUG_ALLOC 1 57 #define UMA_DEBUG_ALLOC_1 1 58 */ 59 60 #include "opt_ddb.h" 61 #include "opt_param.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/kernel.h> 66 #include <sys/types.h> 67 #include <sys/queue.h> 68 #include <sys/malloc.h> 69 #include <sys/ktr.h> 70 #include <sys/lock.h> 71 #include <sys/sysctl.h> 72 #include <sys/mutex.h> 73 #include <sys/proc.h> 74 #include <sys/sbuf.h> 75 #include <sys/smp.h> 76 #include <sys/vmmeter.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_page.h> 81 #include <vm/vm_param.h> 82 #include <vm/vm_map.h> 83 #include <vm/vm_kern.h> 84 #include <vm/vm_extern.h> 85 #include <vm/uma.h> 86 #include <vm/uma_int.h> 87 #include <vm/uma_dbg.h> 88 89 #include <machine/vmparam.h> 90 91 #include <ddb/ddb.h> 92 93 /* 94 * This is the zone and keg from which all zones are spawned. The idea is that 95 * even the zone & keg heads are allocated from the allocator, so we use the 96 * bss section to bootstrap us. 97 */ 98 static struct uma_keg masterkeg; 99 static struct uma_zone masterzone_k; 100 static struct uma_zone masterzone_z; 101 static uma_zone_t kegs = &masterzone_k; 102 static uma_zone_t zones = &masterzone_z; 103 104 /* This is the zone from which all of uma_slab_t's are allocated. */ 105 static uma_zone_t slabzone; 106 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 107 108 /* 109 * The initial hash tables come out of this zone so they can be allocated 110 * prior to malloc coming up. 111 */ 112 static uma_zone_t hashzone; 113 114 /* The boot-time adjusted value for cache line alignment. */ 115 static int uma_align_cache = 16 - 1; 116 117 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 118 119 /* 120 * Are we allowed to allocate buckets? 121 */ 122 static int bucketdisable = 1; 123 124 /* Linked list of all kegs in the system */ 125 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 126 127 /* This mutex protects the keg list */ 128 static struct mtx uma_mtx; 129 130 /* Linked list of boot time pages */ 131 static LIST_HEAD(,uma_slab) uma_boot_pages = 132 LIST_HEAD_INITIALIZER(&uma_boot_pages); 133 134 /* This mutex protects the boot time pages list */ 135 static struct mtx uma_boot_pages_mtx; 136 137 /* Is the VM done starting up? */ 138 static int booted = 0; 139 140 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 141 static u_int uma_max_ipers; 142 static u_int uma_max_ipers_ref; 143 144 /* 145 * This is the handle used to schedule events that need to happen 146 * outside of the allocation fast path. 147 */ 148 static struct callout uma_callout; 149 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 150 151 /* 152 * This structure is passed as the zone ctor arg so that I don't have to create 153 * a special allocation function just for zones. 154 */ 155 struct uma_zctor_args { 156 char *name; 157 size_t size; 158 uma_ctor ctor; 159 uma_dtor dtor; 160 uma_init uminit; 161 uma_fini fini; 162 uma_keg_t keg; 163 int align; 164 u_int32_t flags; 165 }; 166 167 struct uma_kctor_args { 168 uma_zone_t zone; 169 size_t size; 170 uma_init uminit; 171 uma_fini fini; 172 int align; 173 u_int32_t flags; 174 }; 175 176 struct uma_bucket_zone { 177 uma_zone_t ubz_zone; 178 char *ubz_name; 179 int ubz_entries; 180 }; 181 182 #define BUCKET_MAX 128 183 184 struct uma_bucket_zone bucket_zones[] = { 185 { NULL, "16 Bucket", 16 }, 186 { NULL, "32 Bucket", 32 }, 187 { NULL, "64 Bucket", 64 }, 188 { NULL, "128 Bucket", 128 }, 189 { NULL, NULL, 0} 190 }; 191 192 #define BUCKET_SHIFT 4 193 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 194 195 /* 196 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket 197 * of approximately the right size. 198 */ 199 static uint8_t bucket_size[BUCKET_ZONES]; 200 201 /* 202 * Flags and enumerations to be passed to internal functions. 203 */ 204 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 205 206 #define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ 207 #define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ 208 209 /* Prototypes.. */ 210 211 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 212 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 213 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 214 static void page_free(void *, int, u_int8_t); 215 static uma_slab_t slab_zalloc(uma_zone_t, int); 216 static void cache_drain(uma_zone_t); 217 static void bucket_drain(uma_zone_t, uma_bucket_t); 218 static void bucket_cache_drain(uma_zone_t zone); 219 static int keg_ctor(void *, int, void *, int); 220 static void keg_dtor(void *, int, void *); 221 static int zone_ctor(void *, int, void *, int); 222 static void zone_dtor(void *, int, void *); 223 static int zero_init(void *, int, int); 224 static void zone_small_init(uma_zone_t zone); 225 static void zone_large_init(uma_zone_t zone); 226 static void zone_foreach(void (*zfunc)(uma_zone_t)); 227 static void zone_timeout(uma_zone_t zone); 228 static int hash_alloc(struct uma_hash *); 229 static int hash_expand(struct uma_hash *, struct uma_hash *); 230 static void hash_free(struct uma_hash *hash); 231 static void uma_timeout(void *); 232 static void uma_startup3(void); 233 static void *uma_zalloc_internal(uma_zone_t, void *, int); 234 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip, 235 int); 236 static void bucket_enable(void); 237 static void bucket_init(void); 238 static uma_bucket_t bucket_alloc(int, int); 239 static void bucket_free(uma_bucket_t); 240 static void bucket_zone_drain(void); 241 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 242 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 243 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 244 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 245 uma_fini fini, int align, u_int32_t flags); 246 247 void uma_print_zone(uma_zone_t); 248 void uma_print_stats(void); 249 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); 250 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); 251 252 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 253 254 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 255 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); 256 257 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 258 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); 259 260 /* 261 * This routine checks to see whether or not it's safe to enable buckets. 262 */ 263 264 static void 265 bucket_enable(void) 266 { 267 if (cnt.v_free_count < cnt.v_free_min) 268 bucketdisable = 1; 269 else 270 bucketdisable = 0; 271 } 272 273 /* 274 * Initialize bucket_zones, the array of zones of buckets of various sizes. 275 * 276 * For each zone, calculate the memory required for each bucket, consisting 277 * of the header and an array of pointers. Initialize bucket_size[] to point 278 * the range of appropriate bucket sizes at the zone. 279 */ 280 static void 281 bucket_init(void) 282 { 283 struct uma_bucket_zone *ubz; 284 int i; 285 int j; 286 287 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 288 int size; 289 290 ubz = &bucket_zones[j]; 291 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 292 size += sizeof(void *) * ubz->ubz_entries; 293 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 294 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 295 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 296 bucket_size[i >> BUCKET_SHIFT] = j; 297 } 298 } 299 300 /* 301 * Given a desired number of entries for a bucket, return the zone from which 302 * to allocate the bucket. 303 */ 304 static struct uma_bucket_zone * 305 bucket_zone_lookup(int entries) 306 { 307 int idx; 308 309 idx = howmany(entries, 1 << BUCKET_SHIFT); 310 return (&bucket_zones[bucket_size[idx]]); 311 } 312 313 static uma_bucket_t 314 bucket_alloc(int entries, int bflags) 315 { 316 struct uma_bucket_zone *ubz; 317 uma_bucket_t bucket; 318 319 /* 320 * This is to stop us from allocating per cpu buckets while we're 321 * running out of vm.boot_pages. Otherwise, we would exhaust the 322 * boot pages. This also prevents us from allocating buckets in 323 * low memory situations. 324 */ 325 if (bucketdisable) 326 return (NULL); 327 328 ubz = bucket_zone_lookup(entries); 329 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 330 if (bucket) { 331 #ifdef INVARIANTS 332 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 333 #endif 334 bucket->ub_cnt = 0; 335 bucket->ub_entries = ubz->ubz_entries; 336 } 337 338 return (bucket); 339 } 340 341 static void 342 bucket_free(uma_bucket_t bucket) 343 { 344 struct uma_bucket_zone *ubz; 345 346 ubz = bucket_zone_lookup(bucket->ub_entries); 347 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 348 ZFREE_STATFREE); 349 } 350 351 static void 352 bucket_zone_drain(void) 353 { 354 struct uma_bucket_zone *ubz; 355 356 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 357 zone_drain(ubz->ubz_zone); 358 } 359 360 361 /* 362 * Routine called by timeout which is used to fire off some time interval 363 * based calculations. (stats, hash size, etc.) 364 * 365 * Arguments: 366 * arg Unused 367 * 368 * Returns: 369 * Nothing 370 */ 371 static void 372 uma_timeout(void *unused) 373 { 374 bucket_enable(); 375 zone_foreach(zone_timeout); 376 377 /* Reschedule this event */ 378 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 379 } 380 381 /* 382 * Routine to perform timeout driven calculations. This expands the 383 * hashes and does per cpu statistics aggregation. 384 * 385 * Arguments: 386 * zone The zone to operate on 387 * 388 * Returns: 389 * Nothing 390 */ 391 static void 392 zone_timeout(uma_zone_t zone) 393 { 394 uma_keg_t keg; 395 u_int64_t alloc; 396 397 keg = zone->uz_keg; 398 alloc = 0; 399 400 /* 401 * Expand the zone hash table. 402 * 403 * This is done if the number of slabs is larger than the hash size. 404 * What I'm trying to do here is completely reduce collisions. This 405 * may be a little aggressive. Should I allow for two collisions max? 406 */ 407 ZONE_LOCK(zone); 408 if (keg->uk_flags & UMA_ZONE_HASH && 409 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 410 struct uma_hash newhash; 411 struct uma_hash oldhash; 412 int ret; 413 414 /* 415 * This is so involved because allocating and freeing 416 * while the zone lock is held will lead to deadlock. 417 * I have to do everything in stages and check for 418 * races. 419 */ 420 newhash = keg->uk_hash; 421 ZONE_UNLOCK(zone); 422 ret = hash_alloc(&newhash); 423 ZONE_LOCK(zone); 424 if (ret) { 425 if (hash_expand(&keg->uk_hash, &newhash)) { 426 oldhash = keg->uk_hash; 427 keg->uk_hash = newhash; 428 } else 429 oldhash = newhash; 430 431 ZONE_UNLOCK(zone); 432 hash_free(&oldhash); 433 ZONE_LOCK(zone); 434 } 435 } 436 ZONE_UNLOCK(zone); 437 } 438 439 /* 440 * Allocate and zero fill the next sized hash table from the appropriate 441 * backing store. 442 * 443 * Arguments: 444 * hash A new hash structure with the old hash size in uh_hashsize 445 * 446 * Returns: 447 * 1 on sucess and 0 on failure. 448 */ 449 static int 450 hash_alloc(struct uma_hash *hash) 451 { 452 int oldsize; 453 int alloc; 454 455 oldsize = hash->uh_hashsize; 456 457 /* We're just going to go to a power of two greater */ 458 if (oldsize) { 459 hash->uh_hashsize = oldsize * 2; 460 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 461 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 462 M_UMAHASH, M_NOWAIT); 463 } else { 464 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 465 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 466 M_WAITOK); 467 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 468 } 469 if (hash->uh_slab_hash) { 470 bzero(hash->uh_slab_hash, alloc); 471 hash->uh_hashmask = hash->uh_hashsize - 1; 472 return (1); 473 } 474 475 return (0); 476 } 477 478 /* 479 * Expands the hash table for HASH zones. This is done from zone_timeout 480 * to reduce collisions. This must not be done in the regular allocation 481 * path, otherwise, we can recurse on the vm while allocating pages. 482 * 483 * Arguments: 484 * oldhash The hash you want to expand 485 * newhash The hash structure for the new table 486 * 487 * Returns: 488 * Nothing 489 * 490 * Discussion: 491 */ 492 static int 493 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 494 { 495 uma_slab_t slab; 496 int hval; 497 int i; 498 499 if (!newhash->uh_slab_hash) 500 return (0); 501 502 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 503 return (0); 504 505 /* 506 * I need to investigate hash algorithms for resizing without a 507 * full rehash. 508 */ 509 510 for (i = 0; i < oldhash->uh_hashsize; i++) 511 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 512 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 513 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 514 hval = UMA_HASH(newhash, slab->us_data); 515 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 516 slab, us_hlink); 517 } 518 519 return (1); 520 } 521 522 /* 523 * Free the hash bucket to the appropriate backing store. 524 * 525 * Arguments: 526 * slab_hash The hash bucket we're freeing 527 * hashsize The number of entries in that hash bucket 528 * 529 * Returns: 530 * Nothing 531 */ 532 static void 533 hash_free(struct uma_hash *hash) 534 { 535 if (hash->uh_slab_hash == NULL) 536 return; 537 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 538 uma_zfree_internal(hashzone, 539 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE); 540 else 541 free(hash->uh_slab_hash, M_UMAHASH); 542 } 543 544 /* 545 * Frees all outstanding items in a bucket 546 * 547 * Arguments: 548 * zone The zone to free to, must be unlocked. 549 * bucket The free/alloc bucket with items, cpu queue must be locked. 550 * 551 * Returns: 552 * Nothing 553 */ 554 555 static void 556 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 557 { 558 uma_slab_t slab; 559 int mzone; 560 void *item; 561 562 if (bucket == NULL) 563 return; 564 565 slab = NULL; 566 mzone = 0; 567 568 /* We have to lookup the slab again for malloc.. */ 569 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 570 mzone = 1; 571 572 while (bucket->ub_cnt > 0) { 573 bucket->ub_cnt--; 574 item = bucket->ub_bucket[bucket->ub_cnt]; 575 #ifdef INVARIANTS 576 bucket->ub_bucket[bucket->ub_cnt] = NULL; 577 KASSERT(item != NULL, 578 ("bucket_drain: botched ptr, item is NULL")); 579 #endif 580 /* 581 * This is extremely inefficient. The slab pointer was passed 582 * to uma_zfree_arg, but we lost it because the buckets don't 583 * hold them. This will go away when free() gets a size passed 584 * to it. 585 */ 586 if (mzone) 587 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 588 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0); 589 } 590 } 591 592 /* 593 * Drains the per cpu caches for a zone. 594 * 595 * NOTE: This may only be called while the zone is being turn down, and not 596 * during normal operation. This is necessary in order that we do not have 597 * to migrate CPUs to drain the per-CPU caches. 598 * 599 * Arguments: 600 * zone The zone to drain, must be unlocked. 601 * 602 * Returns: 603 * Nothing 604 */ 605 static void 606 cache_drain(uma_zone_t zone) 607 { 608 uma_cache_t cache; 609 int cpu; 610 611 /* 612 * XXX: It is safe to not lock the per-CPU caches, because we're 613 * tearing down the zone anyway. I.e., there will be no further use 614 * of the caches at this point. 615 * 616 * XXX: It would good to be able to assert that the zone is being 617 * torn down to prevent improper use of cache_drain(). 618 * 619 * XXX: We lock the zone before passing into bucket_cache_drain() as 620 * it is used elsewhere. Should the tear-down path be made special 621 * there in some form? 622 */ 623 for (cpu = 0; cpu <= mp_maxid; cpu++) { 624 if (CPU_ABSENT(cpu)) 625 continue; 626 cache = &zone->uz_cpu[cpu]; 627 bucket_drain(zone, cache->uc_allocbucket); 628 bucket_drain(zone, cache->uc_freebucket); 629 if (cache->uc_allocbucket != NULL) 630 bucket_free(cache->uc_allocbucket); 631 if (cache->uc_freebucket != NULL) 632 bucket_free(cache->uc_freebucket); 633 cache->uc_allocbucket = cache->uc_freebucket = NULL; 634 } 635 ZONE_LOCK(zone); 636 bucket_cache_drain(zone); 637 ZONE_UNLOCK(zone); 638 } 639 640 /* 641 * Drain the cached buckets from a zone. Expects a locked zone on entry. 642 */ 643 static void 644 bucket_cache_drain(uma_zone_t zone) 645 { 646 uma_bucket_t bucket; 647 648 /* 649 * Drain the bucket queues and free the buckets, we just keep two per 650 * cpu (alloc/free). 651 */ 652 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 653 LIST_REMOVE(bucket, ub_link); 654 ZONE_UNLOCK(zone); 655 bucket_drain(zone, bucket); 656 bucket_free(bucket); 657 ZONE_LOCK(zone); 658 } 659 660 /* Now we do the free queue.. */ 661 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 662 LIST_REMOVE(bucket, ub_link); 663 bucket_free(bucket); 664 } 665 } 666 667 /* 668 * Frees pages from a zone back to the system. This is done on demand from 669 * the pageout daemon. 670 * 671 * Arguments: 672 * zone The zone to free pages from 673 * all Should we drain all items? 674 * 675 * Returns: 676 * Nothing. 677 */ 678 void 679 zone_drain(uma_zone_t zone) 680 { 681 struct slabhead freeslabs = { 0 }; 682 uma_keg_t keg; 683 uma_slab_t slab; 684 uma_slab_t n; 685 u_int8_t flags; 686 u_int8_t *mem; 687 int i; 688 689 keg = zone->uz_keg; 690 691 /* 692 * We don't want to take pages from statically allocated zones at this 693 * time 694 */ 695 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 696 return; 697 698 ZONE_LOCK(zone); 699 700 #ifdef UMA_DEBUG 701 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 702 #endif 703 bucket_cache_drain(zone); 704 if (keg->uk_free == 0) 705 goto finished; 706 707 slab = LIST_FIRST(&keg->uk_free_slab); 708 while (slab) { 709 n = LIST_NEXT(slab, us_link); 710 711 /* We have no where to free these to */ 712 if (slab->us_flags & UMA_SLAB_BOOT) { 713 slab = n; 714 continue; 715 } 716 717 LIST_REMOVE(slab, us_link); 718 keg->uk_pages -= keg->uk_ppera; 719 keg->uk_free -= keg->uk_ipers; 720 721 if (keg->uk_flags & UMA_ZONE_HASH) 722 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 723 724 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 725 726 slab = n; 727 } 728 finished: 729 ZONE_UNLOCK(zone); 730 731 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 732 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 733 if (keg->uk_fini) 734 for (i = 0; i < keg->uk_ipers; i++) 735 keg->uk_fini( 736 slab->us_data + (keg->uk_rsize * i), 737 keg->uk_size); 738 flags = slab->us_flags; 739 mem = slab->us_data; 740 741 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 742 (keg->uk_flags & UMA_ZONE_REFCNT)) { 743 vm_object_t obj; 744 745 if (flags & UMA_SLAB_KMEM) 746 obj = kmem_object; 747 else if (flags & UMA_SLAB_KERNEL) 748 obj = kernel_object; 749 else 750 obj = NULL; 751 for (i = 0; i < keg->uk_ppera; i++) 752 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 753 obj); 754 } 755 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 756 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 757 SKIP_NONE, ZFREE_STATFREE); 758 #ifdef UMA_DEBUG 759 printf("%s: Returning %d bytes.\n", 760 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 761 #endif 762 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 763 } 764 } 765 766 /* 767 * Allocate a new slab for a zone. This does not insert the slab onto a list. 768 * 769 * Arguments: 770 * zone The zone to allocate slabs for 771 * wait Shall we wait? 772 * 773 * Returns: 774 * The slab that was allocated or NULL if there is no memory and the 775 * caller specified M_NOWAIT. 776 */ 777 static uma_slab_t 778 slab_zalloc(uma_zone_t zone, int wait) 779 { 780 uma_slabrefcnt_t slabref; 781 uma_slab_t slab; 782 uma_keg_t keg; 783 u_int8_t *mem; 784 u_int8_t flags; 785 int i; 786 787 slab = NULL; 788 keg = zone->uz_keg; 789 790 #ifdef UMA_DEBUG 791 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 792 #endif 793 ZONE_UNLOCK(zone); 794 795 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 796 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 797 if (slab == NULL) { 798 ZONE_LOCK(zone); 799 return NULL; 800 } 801 } 802 803 /* 804 * This reproduces the old vm_zone behavior of zero filling pages the 805 * first time they are added to a zone. 806 * 807 * Malloced items are zeroed in uma_zalloc. 808 */ 809 810 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 811 wait |= M_ZERO; 812 else 813 wait &= ~M_ZERO; 814 815 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 816 &flags, wait); 817 if (mem == NULL) { 818 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 819 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 820 SKIP_NONE, ZFREE_STATFREE); 821 ZONE_LOCK(zone); 822 return (NULL); 823 } 824 825 /* Point the slab into the allocated memory */ 826 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 827 slab = (uma_slab_t )(mem + keg->uk_pgoff); 828 829 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 830 (keg->uk_flags & UMA_ZONE_REFCNT)) 831 for (i = 0; i < keg->uk_ppera; i++) 832 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 833 834 slab->us_keg = keg; 835 slab->us_data = mem; 836 slab->us_freecount = keg->uk_ipers; 837 slab->us_firstfree = 0; 838 slab->us_flags = flags; 839 840 if (keg->uk_flags & UMA_ZONE_REFCNT) { 841 slabref = (uma_slabrefcnt_t)slab; 842 for (i = 0; i < keg->uk_ipers; i++) { 843 slabref->us_freelist[i].us_refcnt = 0; 844 slabref->us_freelist[i].us_item = i+1; 845 } 846 } else { 847 for (i = 0; i < keg->uk_ipers; i++) 848 slab->us_freelist[i].us_item = i+1; 849 } 850 851 if (keg->uk_init != NULL) { 852 for (i = 0; i < keg->uk_ipers; i++) 853 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 854 keg->uk_size, wait) != 0) 855 break; 856 if (i != keg->uk_ipers) { 857 if (keg->uk_fini != NULL) { 858 for (i--; i > -1; i--) 859 keg->uk_fini(slab->us_data + 860 (keg->uk_rsize * i), 861 keg->uk_size); 862 } 863 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 864 (keg->uk_flags & UMA_ZONE_REFCNT)) { 865 vm_object_t obj; 866 867 if (flags & UMA_SLAB_KMEM) 868 obj = kmem_object; 869 else if (flags & UMA_SLAB_KERNEL) 870 obj = kernel_object; 871 else 872 obj = NULL; 873 for (i = 0; i < keg->uk_ppera; i++) 874 vsetobj((vm_offset_t)mem + 875 (i * PAGE_SIZE), obj); 876 } 877 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 878 uma_zfree_internal(keg->uk_slabzone, slab, 879 NULL, SKIP_NONE, ZFREE_STATFREE); 880 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 881 flags); 882 ZONE_LOCK(zone); 883 return (NULL); 884 } 885 } 886 ZONE_LOCK(zone); 887 888 if (keg->uk_flags & UMA_ZONE_HASH) 889 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 890 891 keg->uk_pages += keg->uk_ppera; 892 keg->uk_free += keg->uk_ipers; 893 894 return (slab); 895 } 896 897 /* 898 * This function is intended to be used early on in place of page_alloc() so 899 * that we may use the boot time page cache to satisfy allocations before 900 * the VM is ready. 901 */ 902 static void * 903 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 904 { 905 uma_keg_t keg; 906 uma_slab_t tmps; 907 908 keg = zone->uz_keg; 909 910 /* 911 * Check our small startup cache to see if it has pages remaining. 912 */ 913 mtx_lock(&uma_boot_pages_mtx); 914 if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) { 915 LIST_REMOVE(tmps, us_link); 916 mtx_unlock(&uma_boot_pages_mtx); 917 *pflag = tmps->us_flags; 918 return (tmps->us_data); 919 } 920 mtx_unlock(&uma_boot_pages_mtx); 921 if (booted == 0) 922 panic("UMA: Increase vm.boot_pages"); 923 /* 924 * Now that we've booted reset these users to their real allocator. 925 */ 926 #ifdef UMA_MD_SMALL_ALLOC 927 keg->uk_allocf = uma_small_alloc; 928 #else 929 keg->uk_allocf = page_alloc; 930 #endif 931 return keg->uk_allocf(zone, bytes, pflag, wait); 932 } 933 934 /* 935 * Allocates a number of pages from the system 936 * 937 * Arguments: 938 * zone Unused 939 * bytes The number of bytes requested 940 * wait Shall we wait? 941 * 942 * Returns: 943 * A pointer to the alloced memory or possibly 944 * NULL if M_NOWAIT is set. 945 */ 946 static void * 947 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 948 { 949 void *p; /* Returned page */ 950 951 *pflag = UMA_SLAB_KMEM; 952 p = (void *) kmem_malloc(kmem_map, bytes, wait); 953 954 return (p); 955 } 956 957 /* 958 * Allocates a number of pages from within an object 959 * 960 * Arguments: 961 * zone Unused 962 * bytes The number of bytes requested 963 * wait Shall we wait? 964 * 965 * Returns: 966 * A pointer to the alloced memory or possibly 967 * NULL if M_NOWAIT is set. 968 */ 969 static void * 970 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 971 { 972 vm_object_t object; 973 vm_offset_t retkva, zkva; 974 vm_page_t p; 975 int pages, startpages; 976 977 object = zone->uz_keg->uk_obj; 978 retkva = 0; 979 980 /* 981 * This looks a little weird since we're getting one page at a time. 982 */ 983 VM_OBJECT_LOCK(object); 984 p = TAILQ_LAST(&object->memq, pglist); 985 pages = p != NULL ? p->pindex + 1 : 0; 986 startpages = pages; 987 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 988 for (; bytes > 0; bytes -= PAGE_SIZE) { 989 p = vm_page_alloc(object, pages, 990 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 991 if (p == NULL) { 992 if (pages != startpages) 993 pmap_qremove(retkva, pages - startpages); 994 while (pages != startpages) { 995 pages--; 996 p = TAILQ_LAST(&object->memq, pglist); 997 vm_page_lock_queues(); 998 vm_page_unwire(p, 0); 999 vm_page_free(p); 1000 vm_page_unlock_queues(); 1001 } 1002 retkva = 0; 1003 goto done; 1004 } 1005 pmap_qenter(zkva, &p, 1); 1006 if (retkva == 0) 1007 retkva = zkva; 1008 zkva += PAGE_SIZE; 1009 pages += 1; 1010 } 1011 done: 1012 VM_OBJECT_UNLOCK(object); 1013 *flags = UMA_SLAB_PRIV; 1014 1015 return ((void *)retkva); 1016 } 1017 1018 /* 1019 * Frees a number of pages to the system 1020 * 1021 * Arguments: 1022 * mem A pointer to the memory to be freed 1023 * size The size of the memory being freed 1024 * flags The original p->us_flags field 1025 * 1026 * Returns: 1027 * Nothing 1028 */ 1029 static void 1030 page_free(void *mem, int size, u_int8_t flags) 1031 { 1032 vm_map_t map; 1033 1034 if (flags & UMA_SLAB_KMEM) 1035 map = kmem_map; 1036 else 1037 panic("UMA: page_free used with invalid flags %d\n", flags); 1038 1039 kmem_free(map, (vm_offset_t)mem, size); 1040 } 1041 1042 /* 1043 * Zero fill initializer 1044 * 1045 * Arguments/Returns follow uma_init specifications 1046 */ 1047 static int 1048 zero_init(void *mem, int size, int flags) 1049 { 1050 bzero(mem, size); 1051 return (0); 1052 } 1053 1054 /* 1055 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1056 * 1057 * Arguments 1058 * zone The zone we should initialize 1059 * 1060 * Returns 1061 * Nothing 1062 */ 1063 static void 1064 zone_small_init(uma_zone_t zone) 1065 { 1066 uma_keg_t keg; 1067 u_int rsize; 1068 u_int memused; 1069 u_int wastedspace; 1070 u_int shsize; 1071 1072 keg = zone->uz_keg; 1073 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1074 rsize = keg->uk_size; 1075 1076 if (rsize < UMA_SMALLEST_UNIT) 1077 rsize = UMA_SMALLEST_UNIT; 1078 if (rsize & keg->uk_align) 1079 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1080 1081 keg->uk_rsize = rsize; 1082 keg->uk_ppera = 1; 1083 1084 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1085 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1086 shsize = sizeof(struct uma_slab_refcnt); 1087 } else { 1088 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1089 shsize = sizeof(struct uma_slab); 1090 } 1091 1092 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1093 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1094 memused = keg->uk_ipers * rsize + shsize; 1095 wastedspace = UMA_SLAB_SIZE - memused; 1096 1097 /* 1098 * We can't do OFFPAGE if we're internal or if we've been 1099 * asked to not go to the VM for buckets. If we do this we 1100 * may end up going to the VM (kmem_map) for slabs which we 1101 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1102 * result of UMA_ZONE_VM, which clearly forbids it. 1103 */ 1104 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1105 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1106 return; 1107 1108 if ((wastedspace >= UMA_MAX_WASTE) && 1109 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1110 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1111 KASSERT(keg->uk_ipers <= 255, 1112 ("zone_small_init: keg->uk_ipers too high!")); 1113 #ifdef UMA_DEBUG 1114 printf("UMA decided we need offpage slab headers for " 1115 "zone: %s, calculated wastedspace = %d, " 1116 "maximum wasted space allowed = %d, " 1117 "calculated ipers = %d, " 1118 "new wasted space = %d\n", zone->uz_name, wastedspace, 1119 UMA_MAX_WASTE, keg->uk_ipers, 1120 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1121 #endif 1122 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1123 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1124 keg->uk_flags |= UMA_ZONE_HASH; 1125 } 1126 } 1127 1128 /* 1129 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1130 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1131 * more complicated. 1132 * 1133 * Arguments 1134 * zone The zone we should initialize 1135 * 1136 * Returns 1137 * Nothing 1138 */ 1139 static void 1140 zone_large_init(uma_zone_t zone) 1141 { 1142 uma_keg_t keg; 1143 int pages; 1144 1145 keg = zone->uz_keg; 1146 1147 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1148 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1149 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1150 1151 pages = keg->uk_size / UMA_SLAB_SIZE; 1152 1153 /* Account for remainder */ 1154 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1155 pages++; 1156 1157 keg->uk_ppera = pages; 1158 keg->uk_ipers = 1; 1159 1160 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1161 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1162 keg->uk_flags |= UMA_ZONE_HASH; 1163 1164 keg->uk_rsize = keg->uk_size; 1165 } 1166 1167 /* 1168 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1169 * the keg onto the global keg list. 1170 * 1171 * Arguments/Returns follow uma_ctor specifications 1172 * udata Actually uma_kctor_args 1173 */ 1174 static int 1175 keg_ctor(void *mem, int size, void *udata, int flags) 1176 { 1177 struct uma_kctor_args *arg = udata; 1178 uma_keg_t keg = mem; 1179 uma_zone_t zone; 1180 1181 bzero(keg, size); 1182 keg->uk_size = arg->size; 1183 keg->uk_init = arg->uminit; 1184 keg->uk_fini = arg->fini; 1185 keg->uk_align = arg->align; 1186 keg->uk_free = 0; 1187 keg->uk_pages = 0; 1188 keg->uk_flags = arg->flags; 1189 keg->uk_allocf = page_alloc; 1190 keg->uk_freef = page_free; 1191 keg->uk_recurse = 0; 1192 keg->uk_slabzone = NULL; 1193 1194 /* 1195 * The master zone is passed to us at keg-creation time. 1196 */ 1197 zone = arg->zone; 1198 zone->uz_keg = keg; 1199 1200 if (arg->flags & UMA_ZONE_VM) 1201 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1202 1203 if (arg->flags & UMA_ZONE_ZINIT) 1204 keg->uk_init = zero_init; 1205 1206 /* 1207 * The +UMA_FRITM_SZ added to uk_size is to account for the 1208 * linkage that is added to the size in zone_small_init(). If 1209 * we don't account for this here then we may end up in 1210 * zone_small_init() with a calculated 'ipers' of 0. 1211 */ 1212 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1213 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1214 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1215 zone_large_init(zone); 1216 else 1217 zone_small_init(zone); 1218 } else { 1219 if ((keg->uk_size+UMA_FRITM_SZ) > 1220 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1221 zone_large_init(zone); 1222 else 1223 zone_small_init(zone); 1224 } 1225 1226 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1227 if (keg->uk_flags & UMA_ZONE_REFCNT) 1228 keg->uk_slabzone = slabrefzone; 1229 else 1230 keg->uk_slabzone = slabzone; 1231 } 1232 1233 /* 1234 * If we haven't booted yet we need allocations to go through the 1235 * startup cache until the vm is ready. 1236 */ 1237 if (keg->uk_ppera == 1) { 1238 #ifdef UMA_MD_SMALL_ALLOC 1239 keg->uk_allocf = uma_small_alloc; 1240 keg->uk_freef = uma_small_free; 1241 #endif 1242 if (booted == 0) 1243 keg->uk_allocf = startup_alloc; 1244 } 1245 1246 /* 1247 * Initialize keg's lock (shared among zones) through 1248 * Master zone 1249 */ 1250 zone->uz_lock = &keg->uk_lock; 1251 if (arg->flags & UMA_ZONE_MTXCLASS) 1252 ZONE_LOCK_INIT(zone, 1); 1253 else 1254 ZONE_LOCK_INIT(zone, 0); 1255 1256 /* 1257 * If we're putting the slab header in the actual page we need to 1258 * figure out where in each page it goes. This calculates a right 1259 * justified offset into the memory on an ALIGN_PTR boundary. 1260 */ 1261 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1262 u_int totsize; 1263 1264 /* Size of the slab struct and free list */ 1265 if (keg->uk_flags & UMA_ZONE_REFCNT) 1266 totsize = sizeof(struct uma_slab_refcnt) + 1267 keg->uk_ipers * UMA_FRITMREF_SZ; 1268 else 1269 totsize = sizeof(struct uma_slab) + 1270 keg->uk_ipers * UMA_FRITM_SZ; 1271 1272 if (totsize & UMA_ALIGN_PTR) 1273 totsize = (totsize & ~UMA_ALIGN_PTR) + 1274 (UMA_ALIGN_PTR + 1); 1275 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1276 1277 if (keg->uk_flags & UMA_ZONE_REFCNT) 1278 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1279 + keg->uk_ipers * UMA_FRITMREF_SZ; 1280 else 1281 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1282 + keg->uk_ipers * UMA_FRITM_SZ; 1283 1284 /* 1285 * The only way the following is possible is if with our 1286 * UMA_ALIGN_PTR adjustments we are now bigger than 1287 * UMA_SLAB_SIZE. I haven't checked whether this is 1288 * mathematically possible for all cases, so we make 1289 * sure here anyway. 1290 */ 1291 if (totsize > UMA_SLAB_SIZE) { 1292 printf("zone %s ipers %d rsize %d size %d\n", 1293 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1294 keg->uk_size); 1295 panic("UMA slab won't fit.\n"); 1296 } 1297 } 1298 1299 if (keg->uk_flags & UMA_ZONE_HASH) 1300 hash_alloc(&keg->uk_hash); 1301 1302 #ifdef UMA_DEBUG 1303 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1304 zone->uz_name, zone, 1305 keg->uk_size, keg->uk_ipers, 1306 keg->uk_ppera, keg->uk_pgoff); 1307 #endif 1308 1309 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1310 1311 mtx_lock(&uma_mtx); 1312 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1313 mtx_unlock(&uma_mtx); 1314 return (0); 1315 } 1316 1317 /* 1318 * Zone header ctor. This initializes all fields, locks, etc. 1319 * 1320 * Arguments/Returns follow uma_ctor specifications 1321 * udata Actually uma_zctor_args 1322 */ 1323 1324 static int 1325 zone_ctor(void *mem, int size, void *udata, int flags) 1326 { 1327 struct uma_zctor_args *arg = udata; 1328 uma_zone_t zone = mem; 1329 uma_zone_t z; 1330 uma_keg_t keg; 1331 1332 bzero(zone, size); 1333 zone->uz_name = arg->name; 1334 zone->uz_ctor = arg->ctor; 1335 zone->uz_dtor = arg->dtor; 1336 zone->uz_init = NULL; 1337 zone->uz_fini = NULL; 1338 zone->uz_allocs = 0; 1339 zone->uz_frees = 0; 1340 zone->uz_fails = 0; 1341 zone->uz_fills = zone->uz_count = 0; 1342 1343 if (arg->flags & UMA_ZONE_SECONDARY) { 1344 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1345 keg = arg->keg; 1346 zone->uz_keg = keg; 1347 zone->uz_init = arg->uminit; 1348 zone->uz_fini = arg->fini; 1349 zone->uz_lock = &keg->uk_lock; 1350 mtx_lock(&uma_mtx); 1351 ZONE_LOCK(zone); 1352 keg->uk_flags |= UMA_ZONE_SECONDARY; 1353 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1354 if (LIST_NEXT(z, uz_link) == NULL) { 1355 LIST_INSERT_AFTER(z, zone, uz_link); 1356 break; 1357 } 1358 } 1359 ZONE_UNLOCK(zone); 1360 mtx_unlock(&uma_mtx); 1361 } else if (arg->keg == NULL) { 1362 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1363 arg->align, arg->flags) == NULL) 1364 return (ENOMEM); 1365 } else { 1366 struct uma_kctor_args karg; 1367 int error; 1368 1369 /* We should only be here from uma_startup() */ 1370 karg.size = arg->size; 1371 karg.uminit = arg->uminit; 1372 karg.fini = arg->fini; 1373 karg.align = arg->align; 1374 karg.flags = arg->flags; 1375 karg.zone = zone; 1376 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1377 flags); 1378 if (error) 1379 return (error); 1380 } 1381 keg = zone->uz_keg; 1382 zone->uz_lock = &keg->uk_lock; 1383 1384 /* 1385 * Some internal zones don't have room allocated for the per cpu 1386 * caches. If we're internal, bail out here. 1387 */ 1388 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1389 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1390 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1391 return (0); 1392 } 1393 1394 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1395 zone->uz_count = BUCKET_MAX; 1396 else if (keg->uk_ipers <= BUCKET_MAX) 1397 zone->uz_count = keg->uk_ipers; 1398 else 1399 zone->uz_count = BUCKET_MAX; 1400 return (0); 1401 } 1402 1403 /* 1404 * Keg header dtor. This frees all data, destroys locks, frees the hash 1405 * table and removes the keg from the global list. 1406 * 1407 * Arguments/Returns follow uma_dtor specifications 1408 * udata unused 1409 */ 1410 static void 1411 keg_dtor(void *arg, int size, void *udata) 1412 { 1413 uma_keg_t keg; 1414 1415 keg = (uma_keg_t)arg; 1416 mtx_lock(&keg->uk_lock); 1417 if (keg->uk_free != 0) { 1418 printf("Freed UMA keg was not empty (%d items). " 1419 " Lost %d pages of memory.\n", 1420 keg->uk_free, keg->uk_pages); 1421 } 1422 mtx_unlock(&keg->uk_lock); 1423 1424 if (keg->uk_flags & UMA_ZONE_HASH) 1425 hash_free(&keg->uk_hash); 1426 1427 mtx_destroy(&keg->uk_lock); 1428 } 1429 1430 /* 1431 * Zone header dtor. 1432 * 1433 * Arguments/Returns follow uma_dtor specifications 1434 * udata unused 1435 */ 1436 static void 1437 zone_dtor(void *arg, int size, void *udata) 1438 { 1439 uma_zone_t zone; 1440 uma_keg_t keg; 1441 1442 zone = (uma_zone_t)arg; 1443 keg = zone->uz_keg; 1444 1445 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1446 cache_drain(zone); 1447 1448 mtx_lock(&uma_mtx); 1449 zone_drain(zone); 1450 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1451 LIST_REMOVE(zone, uz_link); 1452 /* 1453 * XXX there are some races here where 1454 * the zone can be drained but zone lock 1455 * released and then refilled before we 1456 * remove it... we dont care for now 1457 */ 1458 ZONE_LOCK(zone); 1459 if (LIST_EMPTY(&keg->uk_zones)) 1460 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1461 ZONE_UNLOCK(zone); 1462 mtx_unlock(&uma_mtx); 1463 } else { 1464 LIST_REMOVE(keg, uk_link); 1465 LIST_REMOVE(zone, uz_link); 1466 mtx_unlock(&uma_mtx); 1467 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 1468 ZFREE_STATFREE); 1469 } 1470 zone->uz_keg = NULL; 1471 } 1472 1473 /* 1474 * Traverses every zone in the system and calls a callback 1475 * 1476 * Arguments: 1477 * zfunc A pointer to a function which accepts a zone 1478 * as an argument. 1479 * 1480 * Returns: 1481 * Nothing 1482 */ 1483 static void 1484 zone_foreach(void (*zfunc)(uma_zone_t)) 1485 { 1486 uma_keg_t keg; 1487 uma_zone_t zone; 1488 1489 mtx_lock(&uma_mtx); 1490 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1491 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1492 zfunc(zone); 1493 } 1494 mtx_unlock(&uma_mtx); 1495 } 1496 1497 /* Public functions */ 1498 /* See uma.h */ 1499 void 1500 uma_startup(void *bootmem, int boot_pages) 1501 { 1502 struct uma_zctor_args args; 1503 uma_slab_t slab; 1504 u_int slabsize; 1505 u_int objsize, totsize, wsize; 1506 int i; 1507 1508 #ifdef UMA_DEBUG 1509 printf("Creating uma keg headers zone and keg.\n"); 1510 #endif 1511 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1512 1513 /* 1514 * Figure out the maximum number of items-per-slab we'll have if 1515 * we're using the OFFPAGE slab header to track free items, given 1516 * all possible object sizes and the maximum desired wastage 1517 * (UMA_MAX_WASTE). 1518 * 1519 * We iterate until we find an object size for 1520 * which the calculated wastage in zone_small_init() will be 1521 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1522 * is an overall increasing see-saw function, we find the smallest 1523 * objsize such that the wastage is always acceptable for objects 1524 * with that objsize or smaller. Since a smaller objsize always 1525 * generates a larger possible uma_max_ipers, we use this computed 1526 * objsize to calculate the largest ipers possible. Since the 1527 * ipers calculated for OFFPAGE slab headers is always larger than 1528 * the ipers initially calculated in zone_small_init(), we use 1529 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1530 * obtain the maximum ipers possible for offpage slab headers. 1531 * 1532 * It should be noted that ipers versus objsize is an inversly 1533 * proportional function which drops off rather quickly so as 1534 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1535 * falls into the portion of the inverse relation AFTER the steep 1536 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1537 * 1538 * Note that we have 8-bits (1 byte) to use as a freelist index 1539 * inside the actual slab header itself and this is enough to 1540 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1541 * object with offpage slab header would have ipers = 1542 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1543 * 1 greater than what our byte-integer freelist index can 1544 * accomodate, but we know that this situation never occurs as 1545 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1546 * that we need to go to offpage slab headers. Or, if we do, 1547 * then we trap that condition below and panic in the INVARIANTS case. 1548 */ 1549 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1550 totsize = wsize; 1551 objsize = UMA_SMALLEST_UNIT; 1552 while (totsize >= wsize) { 1553 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1554 (objsize + UMA_FRITM_SZ); 1555 totsize *= (UMA_FRITM_SZ + objsize); 1556 objsize++; 1557 } 1558 if (objsize > UMA_SMALLEST_UNIT) 1559 objsize--; 1560 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1561 1562 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1563 totsize = wsize; 1564 objsize = UMA_SMALLEST_UNIT; 1565 while (totsize >= wsize) { 1566 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1567 (objsize + UMA_FRITMREF_SZ); 1568 totsize *= (UMA_FRITMREF_SZ + objsize); 1569 objsize++; 1570 } 1571 if (objsize > UMA_SMALLEST_UNIT) 1572 objsize--; 1573 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1574 1575 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1576 ("uma_startup: calculated uma_max_ipers values too large!")); 1577 1578 #ifdef UMA_DEBUG 1579 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1580 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1581 uma_max_ipers_ref); 1582 #endif 1583 1584 /* "manually" create the initial zone */ 1585 args.name = "UMA Kegs"; 1586 args.size = sizeof(struct uma_keg); 1587 args.ctor = keg_ctor; 1588 args.dtor = keg_dtor; 1589 args.uminit = zero_init; 1590 args.fini = NULL; 1591 args.keg = &masterkeg; 1592 args.align = 32 - 1; 1593 args.flags = UMA_ZFLAG_INTERNAL; 1594 /* The initial zone has no Per cpu queues so it's smaller */ 1595 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1596 1597 #ifdef UMA_DEBUG 1598 printf("Filling boot free list.\n"); 1599 #endif 1600 for (i = 0; i < boot_pages; i++) { 1601 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1602 slab->us_data = (u_int8_t *)slab; 1603 slab->us_flags = UMA_SLAB_BOOT; 1604 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1605 } 1606 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF); 1607 1608 #ifdef UMA_DEBUG 1609 printf("Creating uma zone headers zone and keg.\n"); 1610 #endif 1611 args.name = "UMA Zones"; 1612 args.size = sizeof(struct uma_zone) + 1613 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1614 args.ctor = zone_ctor; 1615 args.dtor = zone_dtor; 1616 args.uminit = zero_init; 1617 args.fini = NULL; 1618 args.keg = NULL; 1619 args.align = 32 - 1; 1620 args.flags = UMA_ZFLAG_INTERNAL; 1621 /* The initial zone has no Per cpu queues so it's smaller */ 1622 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1623 1624 #ifdef UMA_DEBUG 1625 printf("Initializing pcpu cache locks.\n"); 1626 #endif 1627 #ifdef UMA_DEBUG 1628 printf("Creating slab and hash zones.\n"); 1629 #endif 1630 1631 /* 1632 * This is the max number of free list items we'll have with 1633 * offpage slabs. 1634 */ 1635 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1636 slabsize += sizeof(struct uma_slab); 1637 1638 /* Now make a zone for slab headers */ 1639 slabzone = uma_zcreate("UMA Slabs", 1640 slabsize, 1641 NULL, NULL, NULL, NULL, 1642 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1643 1644 /* 1645 * We also create a zone for the bigger slabs with reference 1646 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1647 */ 1648 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1649 slabsize += sizeof(struct uma_slab_refcnt); 1650 slabrefzone = uma_zcreate("UMA RCntSlabs", 1651 slabsize, 1652 NULL, NULL, NULL, NULL, 1653 UMA_ALIGN_PTR, 1654 UMA_ZFLAG_INTERNAL); 1655 1656 hashzone = uma_zcreate("UMA Hash", 1657 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1658 NULL, NULL, NULL, NULL, 1659 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1660 1661 bucket_init(); 1662 1663 #if defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_MD_SMALL_ALLOC_NEEDS_VM) 1664 booted = 1; 1665 #endif 1666 1667 #ifdef UMA_DEBUG 1668 printf("UMA startup complete.\n"); 1669 #endif 1670 } 1671 1672 /* see uma.h */ 1673 void 1674 uma_startup2(void) 1675 { 1676 booted = 1; 1677 bucket_enable(); 1678 #ifdef UMA_DEBUG 1679 printf("UMA startup2 complete.\n"); 1680 #endif 1681 } 1682 1683 /* 1684 * Initialize our callout handle 1685 * 1686 */ 1687 1688 static void 1689 uma_startup3(void) 1690 { 1691 #ifdef UMA_DEBUG 1692 printf("Starting callout.\n"); 1693 #endif 1694 callout_init(&uma_callout, CALLOUT_MPSAFE); 1695 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1696 #ifdef UMA_DEBUG 1697 printf("UMA startup3 complete.\n"); 1698 #endif 1699 } 1700 1701 static uma_zone_t 1702 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1703 int align, u_int32_t flags) 1704 { 1705 struct uma_kctor_args args; 1706 1707 args.size = size; 1708 args.uminit = uminit; 1709 args.fini = fini; 1710 args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align; 1711 args.flags = flags; 1712 args.zone = zone; 1713 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1714 } 1715 1716 /* See uma.h */ 1717 void 1718 uma_set_align(int align) 1719 { 1720 1721 if (align != UMA_ALIGN_CACHE) 1722 uma_align_cache = align; 1723 } 1724 1725 /* See uma.h */ 1726 uma_zone_t 1727 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1728 uma_init uminit, uma_fini fini, int align, u_int32_t flags) 1729 1730 { 1731 struct uma_zctor_args args; 1732 1733 /* This stuff is essential for the zone ctor */ 1734 args.name = name; 1735 args.size = size; 1736 args.ctor = ctor; 1737 args.dtor = dtor; 1738 args.uminit = uminit; 1739 args.fini = fini; 1740 args.align = align; 1741 args.flags = flags; 1742 args.keg = NULL; 1743 1744 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1745 } 1746 1747 /* See uma.h */ 1748 uma_zone_t 1749 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1750 uma_init zinit, uma_fini zfini, uma_zone_t master) 1751 { 1752 struct uma_zctor_args args; 1753 1754 args.name = name; 1755 args.size = master->uz_keg->uk_size; 1756 args.ctor = ctor; 1757 args.dtor = dtor; 1758 args.uminit = zinit; 1759 args.fini = zfini; 1760 args.align = master->uz_keg->uk_align; 1761 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1762 args.keg = master->uz_keg; 1763 1764 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1765 } 1766 1767 /* See uma.h */ 1768 void 1769 uma_zdestroy(uma_zone_t zone) 1770 { 1771 1772 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE); 1773 } 1774 1775 /* See uma.h */ 1776 void * 1777 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1778 { 1779 void *item; 1780 uma_cache_t cache; 1781 uma_bucket_t bucket; 1782 int cpu; 1783 1784 /* This is the fast path allocation */ 1785 #ifdef UMA_DEBUG_ALLOC_1 1786 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1787 #endif 1788 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread, 1789 zone->uz_name, flags); 1790 1791 if (flags & M_WAITOK) { 1792 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1793 "uma_zalloc_arg: zone \"%s\"", zone->uz_name); 1794 } 1795 1796 /* 1797 * If possible, allocate from the per-CPU cache. There are two 1798 * requirements for safe access to the per-CPU cache: (1) the thread 1799 * accessing the cache must not be preempted or yield during access, 1800 * and (2) the thread must not migrate CPUs without switching which 1801 * cache it accesses. We rely on a critical section to prevent 1802 * preemption and migration. We release the critical section in 1803 * order to acquire the zone mutex if we are unable to allocate from 1804 * the current cache; when we re-acquire the critical section, we 1805 * must detect and handle migration if it has occurred. 1806 */ 1807 zalloc_restart: 1808 critical_enter(); 1809 cpu = curcpu; 1810 cache = &zone->uz_cpu[cpu]; 1811 1812 zalloc_start: 1813 bucket = cache->uc_allocbucket; 1814 1815 if (bucket) { 1816 if (bucket->ub_cnt > 0) { 1817 bucket->ub_cnt--; 1818 item = bucket->ub_bucket[bucket->ub_cnt]; 1819 #ifdef INVARIANTS 1820 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1821 #endif 1822 KASSERT(item != NULL, 1823 ("uma_zalloc: Bucket pointer mangled.")); 1824 cache->uc_allocs++; 1825 critical_exit(); 1826 #ifdef INVARIANTS 1827 ZONE_LOCK(zone); 1828 uma_dbg_alloc(zone, NULL, item); 1829 ZONE_UNLOCK(zone); 1830 #endif 1831 if (zone->uz_ctor != NULL) { 1832 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1833 udata, flags) != 0) { 1834 uma_zfree_internal(zone, item, udata, 1835 SKIP_DTOR, ZFREE_STATFAIL | 1836 ZFREE_STATFREE); 1837 return (NULL); 1838 } 1839 } 1840 if (flags & M_ZERO) 1841 bzero(item, zone->uz_keg->uk_size); 1842 return (item); 1843 } else if (cache->uc_freebucket) { 1844 /* 1845 * We have run out of items in our allocbucket. 1846 * See if we can switch with our free bucket. 1847 */ 1848 if (cache->uc_freebucket->ub_cnt > 0) { 1849 #ifdef UMA_DEBUG_ALLOC 1850 printf("uma_zalloc: Swapping empty with" 1851 " alloc.\n"); 1852 #endif 1853 bucket = cache->uc_freebucket; 1854 cache->uc_freebucket = cache->uc_allocbucket; 1855 cache->uc_allocbucket = bucket; 1856 1857 goto zalloc_start; 1858 } 1859 } 1860 } 1861 /* 1862 * Attempt to retrieve the item from the per-CPU cache has failed, so 1863 * we must go back to the zone. This requires the zone lock, so we 1864 * must drop the critical section, then re-acquire it when we go back 1865 * to the cache. Since the critical section is released, we may be 1866 * preempted or migrate. As such, make sure not to maintain any 1867 * thread-local state specific to the cache from prior to releasing 1868 * the critical section. 1869 */ 1870 critical_exit(); 1871 ZONE_LOCK(zone); 1872 critical_enter(); 1873 cpu = curcpu; 1874 cache = &zone->uz_cpu[cpu]; 1875 bucket = cache->uc_allocbucket; 1876 if (bucket != NULL) { 1877 if (bucket->ub_cnt > 0) { 1878 ZONE_UNLOCK(zone); 1879 goto zalloc_start; 1880 } 1881 bucket = cache->uc_freebucket; 1882 if (bucket != NULL && bucket->ub_cnt > 0) { 1883 ZONE_UNLOCK(zone); 1884 goto zalloc_start; 1885 } 1886 } 1887 1888 /* Since we have locked the zone we may as well send back our stats */ 1889 zone->uz_allocs += cache->uc_allocs; 1890 cache->uc_allocs = 0; 1891 zone->uz_frees += cache->uc_frees; 1892 cache->uc_frees = 0; 1893 1894 /* Our old one is now a free bucket */ 1895 if (cache->uc_allocbucket) { 1896 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1897 ("uma_zalloc_arg: Freeing a non free bucket.")); 1898 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1899 cache->uc_allocbucket, ub_link); 1900 cache->uc_allocbucket = NULL; 1901 } 1902 1903 /* Check the free list for a new alloc bucket */ 1904 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1905 KASSERT(bucket->ub_cnt != 0, 1906 ("uma_zalloc_arg: Returning an empty bucket.")); 1907 1908 LIST_REMOVE(bucket, ub_link); 1909 cache->uc_allocbucket = bucket; 1910 ZONE_UNLOCK(zone); 1911 goto zalloc_start; 1912 } 1913 /* We are no longer associated with this CPU. */ 1914 critical_exit(); 1915 1916 /* Bump up our uz_count so we get here less */ 1917 if (zone->uz_count < BUCKET_MAX) 1918 zone->uz_count++; 1919 1920 /* 1921 * Now lets just fill a bucket and put it on the free list. If that 1922 * works we'll restart the allocation from the begining. 1923 */ 1924 if (uma_zalloc_bucket(zone, flags)) { 1925 ZONE_UNLOCK(zone); 1926 goto zalloc_restart; 1927 } 1928 ZONE_UNLOCK(zone); 1929 /* 1930 * We may not be able to get a bucket so return an actual item. 1931 */ 1932 #ifdef UMA_DEBUG 1933 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1934 #endif 1935 1936 return (uma_zalloc_internal(zone, udata, flags)); 1937 } 1938 1939 static uma_slab_t 1940 uma_zone_slab(uma_zone_t zone, int flags) 1941 { 1942 uma_slab_t slab; 1943 uma_keg_t keg; 1944 1945 keg = zone->uz_keg; 1946 1947 /* 1948 * This is to prevent us from recursively trying to allocate 1949 * buckets. The problem is that if an allocation forces us to 1950 * grab a new bucket we will call page_alloc, which will go off 1951 * and cause the vm to allocate vm_map_entries. If we need new 1952 * buckets there too we will recurse in kmem_alloc and bad 1953 * things happen. So instead we return a NULL bucket, and make 1954 * the code that allocates buckets smart enough to deal with it 1955 * 1956 * XXX: While we want this protection for the bucket zones so that 1957 * recursion from the VM is handled (and the calling code that 1958 * allocates buckets knows how to deal with it), we do not want 1959 * to prevent allocation from the slab header zones (slabzone 1960 * and slabrefzone) if uk_recurse is not zero for them. The 1961 * reason is that it could lead to NULL being returned for 1962 * slab header allocations even in the M_WAITOK case, and the 1963 * caller can't handle that. 1964 */ 1965 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1966 if (zone != slabzone && zone != slabrefzone && zone != zones) 1967 return (NULL); 1968 1969 slab = NULL; 1970 1971 for (;;) { 1972 /* 1973 * Find a slab with some space. Prefer slabs that are partially 1974 * used over those that are totally full. This helps to reduce 1975 * fragmentation. 1976 */ 1977 if (keg->uk_free != 0) { 1978 if (!LIST_EMPTY(&keg->uk_part_slab)) { 1979 slab = LIST_FIRST(&keg->uk_part_slab); 1980 } else { 1981 slab = LIST_FIRST(&keg->uk_free_slab); 1982 LIST_REMOVE(slab, us_link); 1983 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 1984 us_link); 1985 } 1986 return (slab); 1987 } 1988 1989 /* 1990 * M_NOVM means don't ask at all! 1991 */ 1992 if (flags & M_NOVM) 1993 break; 1994 1995 if (keg->uk_maxpages && 1996 keg->uk_pages >= keg->uk_maxpages) { 1997 keg->uk_flags |= UMA_ZFLAG_FULL; 1998 1999 if (flags & M_NOWAIT) 2000 break; 2001 else 2002 msleep(keg, &keg->uk_lock, PVM, 2003 "zonelimit", 0); 2004 continue; 2005 } 2006 keg->uk_recurse++; 2007 slab = slab_zalloc(zone, flags); 2008 keg->uk_recurse--; 2009 2010 /* 2011 * If we got a slab here it's safe to mark it partially used 2012 * and return. We assume that the caller is going to remove 2013 * at least one item. 2014 */ 2015 if (slab) { 2016 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2017 return (slab); 2018 } 2019 /* 2020 * We might not have been able to get a slab but another cpu 2021 * could have while we were unlocked. Check again before we 2022 * fail. 2023 */ 2024 if (flags & M_NOWAIT) 2025 flags |= M_NOVM; 2026 } 2027 return (slab); 2028 } 2029 2030 static void * 2031 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 2032 { 2033 uma_keg_t keg; 2034 uma_slabrefcnt_t slabref; 2035 void *item; 2036 u_int8_t freei; 2037 2038 keg = zone->uz_keg; 2039 2040 freei = slab->us_firstfree; 2041 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2042 slabref = (uma_slabrefcnt_t)slab; 2043 slab->us_firstfree = slabref->us_freelist[freei].us_item; 2044 } else { 2045 slab->us_firstfree = slab->us_freelist[freei].us_item; 2046 } 2047 item = slab->us_data + (keg->uk_rsize * freei); 2048 2049 slab->us_freecount--; 2050 keg->uk_free--; 2051 #ifdef INVARIANTS 2052 uma_dbg_alloc(zone, slab, item); 2053 #endif 2054 /* Move this slab to the full list */ 2055 if (slab->us_freecount == 0) { 2056 LIST_REMOVE(slab, us_link); 2057 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2058 } 2059 2060 return (item); 2061 } 2062 2063 static int 2064 uma_zalloc_bucket(uma_zone_t zone, int flags) 2065 { 2066 uma_bucket_t bucket; 2067 uma_slab_t slab; 2068 int16_t saved; 2069 int max, origflags = flags; 2070 2071 /* 2072 * Try this zone's free list first so we don't allocate extra buckets. 2073 */ 2074 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2075 KASSERT(bucket->ub_cnt == 0, 2076 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2077 LIST_REMOVE(bucket, ub_link); 2078 } else { 2079 int bflags; 2080 2081 bflags = (flags & ~M_ZERO); 2082 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2083 bflags |= M_NOVM; 2084 2085 ZONE_UNLOCK(zone); 2086 bucket = bucket_alloc(zone->uz_count, bflags); 2087 ZONE_LOCK(zone); 2088 } 2089 2090 if (bucket == NULL) 2091 return (0); 2092 2093 #ifdef SMP 2094 /* 2095 * This code is here to limit the number of simultaneous bucket fills 2096 * for any given zone to the number of per cpu caches in this zone. This 2097 * is done so that we don't allocate more memory than we really need. 2098 */ 2099 if (zone->uz_fills >= mp_ncpus) 2100 goto done; 2101 2102 #endif 2103 zone->uz_fills++; 2104 2105 max = MIN(bucket->ub_entries, zone->uz_count); 2106 /* Try to keep the buckets totally full */ 2107 saved = bucket->ub_cnt; 2108 while (bucket->ub_cnt < max && 2109 (slab = uma_zone_slab(zone, flags)) != NULL) { 2110 while (slab->us_freecount && bucket->ub_cnt < max) { 2111 bucket->ub_bucket[bucket->ub_cnt++] = 2112 uma_slab_alloc(zone, slab); 2113 } 2114 2115 /* Don't block on the next fill */ 2116 flags |= M_NOWAIT; 2117 } 2118 2119 /* 2120 * We unlock here because we need to call the zone's init. 2121 * It should be safe to unlock because the slab dealt with 2122 * above is already on the appropriate list within the keg 2123 * and the bucket we filled is not yet on any list, so we 2124 * own it. 2125 */ 2126 if (zone->uz_init != NULL) { 2127 int i; 2128 2129 ZONE_UNLOCK(zone); 2130 for (i = saved; i < bucket->ub_cnt; i++) 2131 if (zone->uz_init(bucket->ub_bucket[i], 2132 zone->uz_keg->uk_size, origflags) != 0) 2133 break; 2134 /* 2135 * If we couldn't initialize the whole bucket, put the 2136 * rest back onto the freelist. 2137 */ 2138 if (i != bucket->ub_cnt) { 2139 int j; 2140 2141 for (j = i; j < bucket->ub_cnt; j++) { 2142 uma_zfree_internal(zone, bucket->ub_bucket[j], 2143 NULL, SKIP_FINI, 0); 2144 #ifdef INVARIANTS 2145 bucket->ub_bucket[j] = NULL; 2146 #endif 2147 } 2148 bucket->ub_cnt = i; 2149 } 2150 ZONE_LOCK(zone); 2151 } 2152 2153 zone->uz_fills--; 2154 if (bucket->ub_cnt != 0) { 2155 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2156 bucket, ub_link); 2157 return (1); 2158 } 2159 #ifdef SMP 2160 done: 2161 #endif 2162 bucket_free(bucket); 2163 2164 return (0); 2165 } 2166 /* 2167 * Allocates an item for an internal zone 2168 * 2169 * Arguments 2170 * zone The zone to alloc for. 2171 * udata The data to be passed to the constructor. 2172 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2173 * 2174 * Returns 2175 * NULL if there is no memory and M_NOWAIT is set 2176 * An item if successful 2177 */ 2178 2179 static void * 2180 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2181 { 2182 uma_keg_t keg; 2183 uma_slab_t slab; 2184 void *item; 2185 2186 item = NULL; 2187 keg = zone->uz_keg; 2188 2189 #ifdef UMA_DEBUG_ALLOC 2190 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2191 #endif 2192 ZONE_LOCK(zone); 2193 2194 slab = uma_zone_slab(zone, flags); 2195 if (slab == NULL) { 2196 zone->uz_fails++; 2197 ZONE_UNLOCK(zone); 2198 return (NULL); 2199 } 2200 2201 item = uma_slab_alloc(zone, slab); 2202 2203 zone->uz_allocs++; 2204 2205 ZONE_UNLOCK(zone); 2206 2207 /* 2208 * We have to call both the zone's init (not the keg's init) 2209 * and the zone's ctor. This is because the item is going from 2210 * a keg slab directly to the user, and the user is expecting it 2211 * to be both zone-init'd as well as zone-ctor'd. 2212 */ 2213 if (zone->uz_init != NULL) { 2214 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2215 uma_zfree_internal(zone, item, udata, SKIP_FINI, 2216 ZFREE_STATFAIL | ZFREE_STATFREE); 2217 return (NULL); 2218 } 2219 } 2220 if (zone->uz_ctor != NULL) { 2221 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2222 uma_zfree_internal(zone, item, udata, SKIP_DTOR, 2223 ZFREE_STATFAIL | ZFREE_STATFREE); 2224 return (NULL); 2225 } 2226 } 2227 if (flags & M_ZERO) 2228 bzero(item, keg->uk_size); 2229 2230 return (item); 2231 } 2232 2233 /* See uma.h */ 2234 void 2235 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2236 { 2237 uma_keg_t keg; 2238 uma_cache_t cache; 2239 uma_bucket_t bucket; 2240 int bflags; 2241 int cpu; 2242 2243 keg = zone->uz_keg; 2244 2245 #ifdef UMA_DEBUG_ALLOC_1 2246 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2247 #endif 2248 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, 2249 zone->uz_name); 2250 2251 if (zone->uz_dtor) 2252 zone->uz_dtor(item, keg->uk_size, udata); 2253 #ifdef INVARIANTS 2254 ZONE_LOCK(zone); 2255 if (keg->uk_flags & UMA_ZONE_MALLOC) 2256 uma_dbg_free(zone, udata, item); 2257 else 2258 uma_dbg_free(zone, NULL, item); 2259 ZONE_UNLOCK(zone); 2260 #endif 2261 /* 2262 * The race here is acceptable. If we miss it we'll just have to wait 2263 * a little longer for the limits to be reset. 2264 */ 2265 if (keg->uk_flags & UMA_ZFLAG_FULL) 2266 goto zfree_internal; 2267 2268 /* 2269 * If possible, free to the per-CPU cache. There are two 2270 * requirements for safe access to the per-CPU cache: (1) the thread 2271 * accessing the cache must not be preempted or yield during access, 2272 * and (2) the thread must not migrate CPUs without switching which 2273 * cache it accesses. We rely on a critical section to prevent 2274 * preemption and migration. We release the critical section in 2275 * order to acquire the zone mutex if we are unable to free to the 2276 * current cache; when we re-acquire the critical section, we must 2277 * detect and handle migration if it has occurred. 2278 */ 2279 zfree_restart: 2280 critical_enter(); 2281 cpu = curcpu; 2282 cache = &zone->uz_cpu[cpu]; 2283 2284 zfree_start: 2285 bucket = cache->uc_freebucket; 2286 2287 if (bucket) { 2288 /* 2289 * Do we have room in our bucket? It is OK for this uz count 2290 * check to be slightly out of sync. 2291 */ 2292 2293 if (bucket->ub_cnt < bucket->ub_entries) { 2294 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2295 ("uma_zfree: Freeing to non free bucket index.")); 2296 bucket->ub_bucket[bucket->ub_cnt] = item; 2297 bucket->ub_cnt++; 2298 cache->uc_frees++; 2299 critical_exit(); 2300 return; 2301 } else if (cache->uc_allocbucket) { 2302 #ifdef UMA_DEBUG_ALLOC 2303 printf("uma_zfree: Swapping buckets.\n"); 2304 #endif 2305 /* 2306 * We have run out of space in our freebucket. 2307 * See if we can switch with our alloc bucket. 2308 */ 2309 if (cache->uc_allocbucket->ub_cnt < 2310 cache->uc_freebucket->ub_cnt) { 2311 bucket = cache->uc_freebucket; 2312 cache->uc_freebucket = cache->uc_allocbucket; 2313 cache->uc_allocbucket = bucket; 2314 goto zfree_start; 2315 } 2316 } 2317 } 2318 /* 2319 * We can get here for two reasons: 2320 * 2321 * 1) The buckets are NULL 2322 * 2) The alloc and free buckets are both somewhat full. 2323 * 2324 * We must go back the zone, which requires acquiring the zone lock, 2325 * which in turn means we must release and re-acquire the critical 2326 * section. Since the critical section is released, we may be 2327 * preempted or migrate. As such, make sure not to maintain any 2328 * thread-local state specific to the cache from prior to releasing 2329 * the critical section. 2330 */ 2331 critical_exit(); 2332 ZONE_LOCK(zone); 2333 critical_enter(); 2334 cpu = curcpu; 2335 cache = &zone->uz_cpu[cpu]; 2336 if (cache->uc_freebucket != NULL) { 2337 if (cache->uc_freebucket->ub_cnt < 2338 cache->uc_freebucket->ub_entries) { 2339 ZONE_UNLOCK(zone); 2340 goto zfree_start; 2341 } 2342 if (cache->uc_allocbucket != NULL && 2343 (cache->uc_allocbucket->ub_cnt < 2344 cache->uc_freebucket->ub_cnt)) { 2345 ZONE_UNLOCK(zone); 2346 goto zfree_start; 2347 } 2348 } 2349 2350 /* Since we have locked the zone we may as well send back our stats */ 2351 zone->uz_allocs += cache->uc_allocs; 2352 cache->uc_allocs = 0; 2353 zone->uz_frees += cache->uc_frees; 2354 cache->uc_frees = 0; 2355 2356 bucket = cache->uc_freebucket; 2357 cache->uc_freebucket = NULL; 2358 2359 /* Can we throw this on the zone full list? */ 2360 if (bucket != NULL) { 2361 #ifdef UMA_DEBUG_ALLOC 2362 printf("uma_zfree: Putting old bucket on the free list.\n"); 2363 #endif 2364 /* ub_cnt is pointing to the last free item */ 2365 KASSERT(bucket->ub_cnt != 0, 2366 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2367 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2368 bucket, ub_link); 2369 } 2370 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2371 LIST_REMOVE(bucket, ub_link); 2372 ZONE_UNLOCK(zone); 2373 cache->uc_freebucket = bucket; 2374 goto zfree_start; 2375 } 2376 /* We are no longer associated with this CPU. */ 2377 critical_exit(); 2378 2379 /* And the zone.. */ 2380 ZONE_UNLOCK(zone); 2381 2382 #ifdef UMA_DEBUG_ALLOC 2383 printf("uma_zfree: Allocating new free bucket.\n"); 2384 #endif 2385 bflags = M_NOWAIT; 2386 2387 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2388 bflags |= M_NOVM; 2389 bucket = bucket_alloc(zone->uz_count, bflags); 2390 if (bucket) { 2391 ZONE_LOCK(zone); 2392 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2393 bucket, ub_link); 2394 ZONE_UNLOCK(zone); 2395 goto zfree_restart; 2396 } 2397 2398 /* 2399 * If nothing else caught this, we'll just do an internal free. 2400 */ 2401 zfree_internal: 2402 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE); 2403 2404 return; 2405 } 2406 2407 /* 2408 * Frees an item to an INTERNAL zone or allocates a free bucket 2409 * 2410 * Arguments: 2411 * zone The zone to free to 2412 * item The item we're freeing 2413 * udata User supplied data for the dtor 2414 * skip Skip dtors and finis 2415 */ 2416 static void 2417 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2418 enum zfreeskip skip, int flags) 2419 { 2420 uma_slab_t slab; 2421 uma_slabrefcnt_t slabref; 2422 uma_keg_t keg; 2423 u_int8_t *mem; 2424 u_int8_t freei; 2425 2426 keg = zone->uz_keg; 2427 2428 if (skip < SKIP_DTOR && zone->uz_dtor) 2429 zone->uz_dtor(item, keg->uk_size, udata); 2430 if (skip < SKIP_FINI && zone->uz_fini) 2431 zone->uz_fini(item, keg->uk_size); 2432 2433 ZONE_LOCK(zone); 2434 2435 if (flags & ZFREE_STATFAIL) 2436 zone->uz_fails++; 2437 if (flags & ZFREE_STATFREE) 2438 zone->uz_frees++; 2439 2440 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2441 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2442 if (keg->uk_flags & UMA_ZONE_HASH) 2443 slab = hash_sfind(&keg->uk_hash, mem); 2444 else { 2445 mem += keg->uk_pgoff; 2446 slab = (uma_slab_t)mem; 2447 } 2448 } else { 2449 slab = (uma_slab_t)udata; 2450 } 2451 2452 /* Do we need to remove from any lists? */ 2453 if (slab->us_freecount+1 == keg->uk_ipers) { 2454 LIST_REMOVE(slab, us_link); 2455 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2456 } else if (slab->us_freecount == 0) { 2457 LIST_REMOVE(slab, us_link); 2458 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2459 } 2460 2461 /* Slab management stuff */ 2462 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2463 / keg->uk_rsize; 2464 2465 #ifdef INVARIANTS 2466 if (!skip) 2467 uma_dbg_free(zone, slab, item); 2468 #endif 2469 2470 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2471 slabref = (uma_slabrefcnt_t)slab; 2472 slabref->us_freelist[freei].us_item = slab->us_firstfree; 2473 } else { 2474 slab->us_freelist[freei].us_item = slab->us_firstfree; 2475 } 2476 slab->us_firstfree = freei; 2477 slab->us_freecount++; 2478 2479 /* Zone statistics */ 2480 keg->uk_free++; 2481 2482 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2483 if (keg->uk_pages < keg->uk_maxpages) 2484 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2485 2486 /* 2487 * We can handle one more allocation. Since we're clearing ZFLAG_FULL, 2488 * wake up all procs blocked on pages. This should be uncommon, so 2489 * keeping this simple for now (rather than adding count of blocked 2490 * threads etc). 2491 */ 2492 wakeup(keg); 2493 } 2494 2495 ZONE_UNLOCK(zone); 2496 } 2497 2498 /* See uma.h */ 2499 void 2500 uma_zone_set_max(uma_zone_t zone, int nitems) 2501 { 2502 uma_keg_t keg; 2503 2504 keg = zone->uz_keg; 2505 ZONE_LOCK(zone); 2506 if (keg->uk_ppera > 1) 2507 keg->uk_maxpages = nitems * keg->uk_ppera; 2508 else 2509 keg->uk_maxpages = nitems / keg->uk_ipers; 2510 2511 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2512 keg->uk_maxpages++; 2513 2514 ZONE_UNLOCK(zone); 2515 } 2516 2517 /* See uma.h */ 2518 void 2519 uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2520 { 2521 ZONE_LOCK(zone); 2522 KASSERT(zone->uz_keg->uk_pages == 0, 2523 ("uma_zone_set_init on non-empty keg")); 2524 zone->uz_keg->uk_init = uminit; 2525 ZONE_UNLOCK(zone); 2526 } 2527 2528 /* See uma.h */ 2529 void 2530 uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2531 { 2532 ZONE_LOCK(zone); 2533 KASSERT(zone->uz_keg->uk_pages == 0, 2534 ("uma_zone_set_fini on non-empty keg")); 2535 zone->uz_keg->uk_fini = fini; 2536 ZONE_UNLOCK(zone); 2537 } 2538 2539 /* See uma.h */ 2540 void 2541 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2542 { 2543 ZONE_LOCK(zone); 2544 KASSERT(zone->uz_keg->uk_pages == 0, 2545 ("uma_zone_set_zinit on non-empty keg")); 2546 zone->uz_init = zinit; 2547 ZONE_UNLOCK(zone); 2548 } 2549 2550 /* See uma.h */ 2551 void 2552 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2553 { 2554 ZONE_LOCK(zone); 2555 KASSERT(zone->uz_keg->uk_pages == 0, 2556 ("uma_zone_set_zfini on non-empty keg")); 2557 zone->uz_fini = zfini; 2558 ZONE_UNLOCK(zone); 2559 } 2560 2561 /* See uma.h */ 2562 /* XXX uk_freef is not actually used with the zone locked */ 2563 void 2564 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2565 { 2566 ZONE_LOCK(zone); 2567 zone->uz_keg->uk_freef = freef; 2568 ZONE_UNLOCK(zone); 2569 } 2570 2571 /* See uma.h */ 2572 /* XXX uk_allocf is not actually used with the zone locked */ 2573 void 2574 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2575 { 2576 ZONE_LOCK(zone); 2577 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2578 zone->uz_keg->uk_allocf = allocf; 2579 ZONE_UNLOCK(zone); 2580 } 2581 2582 /* See uma.h */ 2583 int 2584 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2585 { 2586 uma_keg_t keg; 2587 vm_offset_t kva; 2588 int pages; 2589 2590 keg = zone->uz_keg; 2591 pages = count / keg->uk_ipers; 2592 2593 if (pages * keg->uk_ipers < count) 2594 pages++; 2595 2596 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2597 2598 if (kva == 0) 2599 return (0); 2600 if (obj == NULL) { 2601 obj = vm_object_allocate(OBJT_DEFAULT, 2602 pages); 2603 } else { 2604 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2605 _vm_object_allocate(OBJT_DEFAULT, 2606 pages, obj); 2607 } 2608 ZONE_LOCK(zone); 2609 keg->uk_kva = kva; 2610 keg->uk_obj = obj; 2611 keg->uk_maxpages = pages; 2612 keg->uk_allocf = obj_alloc; 2613 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2614 ZONE_UNLOCK(zone); 2615 return (1); 2616 } 2617 2618 /* See uma.h */ 2619 void 2620 uma_prealloc(uma_zone_t zone, int items) 2621 { 2622 int slabs; 2623 uma_slab_t slab; 2624 uma_keg_t keg; 2625 2626 keg = zone->uz_keg; 2627 ZONE_LOCK(zone); 2628 slabs = items / keg->uk_ipers; 2629 if (slabs * keg->uk_ipers < items) 2630 slabs++; 2631 while (slabs > 0) { 2632 slab = slab_zalloc(zone, M_WAITOK); 2633 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2634 slabs--; 2635 } 2636 ZONE_UNLOCK(zone); 2637 } 2638 2639 /* See uma.h */ 2640 u_int32_t * 2641 uma_find_refcnt(uma_zone_t zone, void *item) 2642 { 2643 uma_slabrefcnt_t slabref; 2644 uma_keg_t keg; 2645 u_int32_t *refcnt; 2646 int idx; 2647 2648 keg = zone->uz_keg; 2649 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & 2650 (~UMA_SLAB_MASK)); 2651 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, 2652 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2653 idx = ((unsigned long)item - (unsigned long)slabref->us_data) 2654 / keg->uk_rsize; 2655 refcnt = &slabref->us_freelist[idx].us_refcnt; 2656 return refcnt; 2657 } 2658 2659 /* See uma.h */ 2660 void 2661 uma_reclaim(void) 2662 { 2663 #ifdef UMA_DEBUG 2664 printf("UMA: vm asked us to release pages!\n"); 2665 #endif 2666 bucket_enable(); 2667 zone_foreach(zone_drain); 2668 /* 2669 * Some slabs may have been freed but this zone will be visited early 2670 * we visit again so that we can free pages that are empty once other 2671 * zones are drained. We have to do the same for buckets. 2672 */ 2673 zone_drain(slabzone); 2674 zone_drain(slabrefzone); 2675 bucket_zone_drain(); 2676 } 2677 2678 /* See uma.h */ 2679 int 2680 uma_zone_exhausted(uma_zone_t zone) 2681 { 2682 int full; 2683 2684 ZONE_LOCK(zone); 2685 full = (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL); 2686 ZONE_UNLOCK(zone); 2687 return (full); 2688 } 2689 2690 int 2691 uma_zone_exhausted_nolock(uma_zone_t zone) 2692 { 2693 return (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL); 2694 } 2695 2696 void * 2697 uma_large_malloc(int size, int wait) 2698 { 2699 void *mem; 2700 uma_slab_t slab; 2701 u_int8_t flags; 2702 2703 slab = uma_zalloc_internal(slabzone, NULL, wait); 2704 if (slab == NULL) 2705 return (NULL); 2706 mem = page_alloc(NULL, size, &flags, wait); 2707 if (mem) { 2708 vsetslab((vm_offset_t)mem, slab); 2709 slab->us_data = mem; 2710 slab->us_flags = flags | UMA_SLAB_MALLOC; 2711 slab->us_size = size; 2712 } else { 2713 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 2714 ZFREE_STATFAIL | ZFREE_STATFREE); 2715 } 2716 2717 return (mem); 2718 } 2719 2720 void 2721 uma_large_free(uma_slab_t slab) 2722 { 2723 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2724 page_free(slab->us_data, slab->us_size, slab->us_flags); 2725 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); 2726 } 2727 2728 void 2729 uma_print_stats(void) 2730 { 2731 zone_foreach(uma_print_zone); 2732 } 2733 2734 static void 2735 slab_print(uma_slab_t slab) 2736 { 2737 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2738 slab->us_keg, slab->us_data, slab->us_freecount, 2739 slab->us_firstfree); 2740 } 2741 2742 static void 2743 cache_print(uma_cache_t cache) 2744 { 2745 printf("alloc: %p(%d), free: %p(%d)\n", 2746 cache->uc_allocbucket, 2747 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2748 cache->uc_freebucket, 2749 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2750 } 2751 2752 void 2753 uma_print_zone(uma_zone_t zone) 2754 { 2755 uma_cache_t cache; 2756 uma_keg_t keg; 2757 uma_slab_t slab; 2758 int i; 2759 2760 keg = zone->uz_keg; 2761 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2762 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2763 keg->uk_ipers, keg->uk_ppera, 2764 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2765 printf("Part slabs:\n"); 2766 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2767 slab_print(slab); 2768 printf("Free slabs:\n"); 2769 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2770 slab_print(slab); 2771 printf("Full slabs:\n"); 2772 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2773 slab_print(slab); 2774 for (i = 0; i <= mp_maxid; i++) { 2775 if (CPU_ABSENT(i)) 2776 continue; 2777 cache = &zone->uz_cpu[i]; 2778 printf("CPU %d Cache:\n", i); 2779 cache_print(cache); 2780 } 2781 } 2782 2783 #ifdef DDB 2784 /* 2785 * Generate statistics across both the zone and its per-cpu cache's. Return 2786 * desired statistics if the pointer is non-NULL for that statistic. 2787 * 2788 * Note: does not update the zone statistics, as it can't safely clear the 2789 * per-CPU cache statistic. 2790 * 2791 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't 2792 * safe from off-CPU; we should modify the caches to track this information 2793 * directly so that we don't have to. 2794 */ 2795 static void 2796 uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp, 2797 u_int64_t *freesp) 2798 { 2799 uma_cache_t cache; 2800 u_int64_t allocs, frees; 2801 int cachefree, cpu; 2802 2803 allocs = frees = 0; 2804 cachefree = 0; 2805 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2806 if (CPU_ABSENT(cpu)) 2807 continue; 2808 cache = &z->uz_cpu[cpu]; 2809 if (cache->uc_allocbucket != NULL) 2810 cachefree += cache->uc_allocbucket->ub_cnt; 2811 if (cache->uc_freebucket != NULL) 2812 cachefree += cache->uc_freebucket->ub_cnt; 2813 allocs += cache->uc_allocs; 2814 frees += cache->uc_frees; 2815 } 2816 allocs += z->uz_allocs; 2817 frees += z->uz_frees; 2818 if (cachefreep != NULL) 2819 *cachefreep = cachefree; 2820 if (allocsp != NULL) 2821 *allocsp = allocs; 2822 if (freesp != NULL) 2823 *freesp = frees; 2824 } 2825 #endif /* DDB */ 2826 2827 static int 2828 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) 2829 { 2830 uma_keg_t kz; 2831 uma_zone_t z; 2832 int count; 2833 2834 count = 0; 2835 mtx_lock(&uma_mtx); 2836 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2837 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2838 count++; 2839 } 2840 mtx_unlock(&uma_mtx); 2841 return (sysctl_handle_int(oidp, &count, 0, req)); 2842 } 2843 2844 static int 2845 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) 2846 { 2847 struct uma_stream_header ush; 2848 struct uma_type_header uth; 2849 struct uma_percpu_stat ups; 2850 uma_bucket_t bucket; 2851 struct sbuf sbuf; 2852 uma_cache_t cache; 2853 uma_keg_t kz; 2854 uma_zone_t z; 2855 char *buffer; 2856 int buflen, count, error, i; 2857 2858 mtx_lock(&uma_mtx); 2859 restart: 2860 mtx_assert(&uma_mtx, MA_OWNED); 2861 count = 0; 2862 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2863 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2864 count++; 2865 } 2866 mtx_unlock(&uma_mtx); 2867 2868 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) * 2869 (mp_maxid + 1)) + 1; 2870 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 2871 2872 mtx_lock(&uma_mtx); 2873 i = 0; 2874 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2875 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2876 i++; 2877 } 2878 if (i > count) { 2879 free(buffer, M_TEMP); 2880 goto restart; 2881 } 2882 count = i; 2883 2884 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN); 2885 2886 /* 2887 * Insert stream header. 2888 */ 2889 bzero(&ush, sizeof(ush)); 2890 ush.ush_version = UMA_STREAM_VERSION; 2891 ush.ush_maxcpus = (mp_maxid + 1); 2892 ush.ush_count = count; 2893 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) { 2894 mtx_unlock(&uma_mtx); 2895 error = ENOMEM; 2896 goto out; 2897 } 2898 2899 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2900 LIST_FOREACH(z, &kz->uk_zones, uz_link) { 2901 bzero(&uth, sizeof(uth)); 2902 ZONE_LOCK(z); 2903 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); 2904 uth.uth_align = kz->uk_align; 2905 uth.uth_pages = kz->uk_pages; 2906 uth.uth_keg_free = kz->uk_free; 2907 uth.uth_size = kz->uk_size; 2908 uth.uth_rsize = kz->uk_rsize; 2909 uth.uth_maxpages = kz->uk_maxpages; 2910 if (kz->uk_ppera > 1) 2911 uth.uth_limit = kz->uk_maxpages / 2912 kz->uk_ppera; 2913 else 2914 uth.uth_limit = kz->uk_maxpages * 2915 kz->uk_ipers; 2916 2917 /* 2918 * A zone is secondary is it is not the first entry 2919 * on the keg's zone list. 2920 */ 2921 if ((kz->uk_flags & UMA_ZONE_SECONDARY) && 2922 (LIST_FIRST(&kz->uk_zones) != z)) 2923 uth.uth_zone_flags = UTH_ZONE_SECONDARY; 2924 2925 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) 2926 uth.uth_zone_free += bucket->ub_cnt; 2927 uth.uth_allocs = z->uz_allocs; 2928 uth.uth_frees = z->uz_frees; 2929 uth.uth_fails = z->uz_fails; 2930 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) { 2931 ZONE_UNLOCK(z); 2932 mtx_unlock(&uma_mtx); 2933 error = ENOMEM; 2934 goto out; 2935 } 2936 /* 2937 * While it is not normally safe to access the cache 2938 * bucket pointers while not on the CPU that owns the 2939 * cache, we only allow the pointers to be exchanged 2940 * without the zone lock held, not invalidated, so 2941 * accept the possible race associated with bucket 2942 * exchange during monitoring. 2943 */ 2944 for (i = 0; i < (mp_maxid + 1); i++) { 2945 bzero(&ups, sizeof(ups)); 2946 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) 2947 goto skip; 2948 if (CPU_ABSENT(i)) 2949 goto skip; 2950 cache = &z->uz_cpu[i]; 2951 if (cache->uc_allocbucket != NULL) 2952 ups.ups_cache_free += 2953 cache->uc_allocbucket->ub_cnt; 2954 if (cache->uc_freebucket != NULL) 2955 ups.ups_cache_free += 2956 cache->uc_freebucket->ub_cnt; 2957 ups.ups_allocs = cache->uc_allocs; 2958 ups.ups_frees = cache->uc_frees; 2959 skip: 2960 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) { 2961 ZONE_UNLOCK(z); 2962 mtx_unlock(&uma_mtx); 2963 error = ENOMEM; 2964 goto out; 2965 } 2966 } 2967 ZONE_UNLOCK(z); 2968 } 2969 } 2970 mtx_unlock(&uma_mtx); 2971 sbuf_finish(&sbuf); 2972 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 2973 out: 2974 free(buffer, M_TEMP); 2975 return (error); 2976 } 2977 2978 #ifdef DDB 2979 DB_SHOW_COMMAND(uma, db_show_uma) 2980 { 2981 u_int64_t allocs, frees; 2982 uma_bucket_t bucket; 2983 uma_keg_t kz; 2984 uma_zone_t z; 2985 int cachefree; 2986 2987 db_printf("%18s %8s %8s %8s %12s\n", "Zone", "Size", "Used", "Free", 2988 "Requests"); 2989 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2990 LIST_FOREACH(z, &kz->uk_zones, uz_link) { 2991 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) { 2992 allocs = z->uz_allocs; 2993 frees = z->uz_frees; 2994 cachefree = 0; 2995 } else 2996 uma_zone_sumstat(z, &cachefree, &allocs, 2997 &frees); 2998 if (!((kz->uk_flags & UMA_ZONE_SECONDARY) && 2999 (LIST_FIRST(&kz->uk_zones) != z))) 3000 cachefree += kz->uk_free; 3001 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) 3002 cachefree += bucket->ub_cnt; 3003 db_printf("%18s %8ju %8jd %8d %12ju\n", z->uz_name, 3004 (uintmax_t)kz->uk_size, 3005 (intmax_t)(allocs - frees), cachefree, 3006 (uintmax_t)allocs); 3007 } 3008 } 3009 } 3010 #endif 3011