1 /*- 2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org> 3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org> 4 * Copyright (c) 2004-2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * uma_core.c Implementation of the Universal Memory allocator 31 * 32 * This allocator is intended to replace the multitude of similar object caches 33 * in the standard FreeBSD kernel. The intent is to be flexible as well as 34 * effecient. A primary design goal is to return unused memory to the rest of 35 * the system. This will make the system as a whole more flexible due to the 36 * ability to move memory to subsystems which most need it instead of leaving 37 * pools of reserved memory unused. 38 * 39 * The basic ideas stem from similar slab/zone based allocators whose algorithms 40 * are well known. 41 * 42 */ 43 44 /* 45 * TODO: 46 * - Improve memory usage for large allocations 47 * - Investigate cache size adjustments 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 /* I should really use ktr.. */ 54 /* 55 #define UMA_DEBUG 1 56 #define UMA_DEBUG_ALLOC 1 57 #define UMA_DEBUG_ALLOC_1 1 58 */ 59 60 #include "opt_param.h" 61 #include <sys/param.h> 62 #include <sys/systm.h> 63 #include <sys/kernel.h> 64 #include <sys/types.h> 65 #include <sys/queue.h> 66 #include <sys/malloc.h> 67 #include <sys/ktr.h> 68 #include <sys/lock.h> 69 #include <sys/sysctl.h> 70 #include <sys/mutex.h> 71 #include <sys/proc.h> 72 #include <sys/sbuf.h> 73 #include <sys/smp.h> 74 #include <sys/vmmeter.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 #include <vm/vm_param.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_extern.h> 83 #include <vm/uma.h> 84 #include <vm/uma_int.h> 85 #include <vm/uma_dbg.h> 86 87 #include <machine/vmparam.h> 88 89 /* 90 * This is the zone and keg from which all zones are spawned. The idea is that 91 * even the zone & keg heads are allocated from the allocator, so we use the 92 * bss section to bootstrap us. 93 */ 94 static struct uma_keg masterkeg; 95 static struct uma_zone masterzone_k; 96 static struct uma_zone masterzone_z; 97 static uma_zone_t kegs = &masterzone_k; 98 static uma_zone_t zones = &masterzone_z; 99 100 /* This is the zone from which all of uma_slab_t's are allocated. */ 101 static uma_zone_t slabzone; 102 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ 103 104 /* 105 * The initial hash tables come out of this zone so they can be allocated 106 * prior to malloc coming up. 107 */ 108 static uma_zone_t hashzone; 109 110 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); 111 112 /* 113 * Are we allowed to allocate buckets? 114 */ 115 static int bucketdisable = 1; 116 117 /* Linked list of all kegs in the system */ 118 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs); 119 120 /* This mutex protects the keg list */ 121 static struct mtx uma_mtx; 122 123 /* Linked list of boot time pages */ 124 static LIST_HEAD(,uma_slab) uma_boot_pages = 125 LIST_HEAD_INITIALIZER(&uma_boot_pages); 126 127 /* This mutex protects the boot time pages list */ 128 static struct mtx uma_boot_pages_mtx; 129 130 /* Is the VM done starting up? */ 131 static int booted = 0; 132 133 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ 134 static u_int uma_max_ipers; 135 static u_int uma_max_ipers_ref; 136 137 /* 138 * This is the handle used to schedule events that need to happen 139 * outside of the allocation fast path. 140 */ 141 static struct callout uma_callout; 142 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ 143 144 /* 145 * This structure is passed as the zone ctor arg so that I don't have to create 146 * a special allocation function just for zones. 147 */ 148 struct uma_zctor_args { 149 char *name; 150 size_t size; 151 uma_ctor ctor; 152 uma_dtor dtor; 153 uma_init uminit; 154 uma_fini fini; 155 uma_keg_t keg; 156 int align; 157 u_int32_t flags; 158 }; 159 160 struct uma_kctor_args { 161 uma_zone_t zone; 162 size_t size; 163 uma_init uminit; 164 uma_fini fini; 165 int align; 166 u_int32_t flags; 167 }; 168 169 struct uma_bucket_zone { 170 uma_zone_t ubz_zone; 171 char *ubz_name; 172 int ubz_entries; 173 }; 174 175 #define BUCKET_MAX 128 176 177 struct uma_bucket_zone bucket_zones[] = { 178 { NULL, "16 Bucket", 16 }, 179 { NULL, "32 Bucket", 32 }, 180 { NULL, "64 Bucket", 64 }, 181 { NULL, "128 Bucket", 128 }, 182 { NULL, NULL, 0} 183 }; 184 185 #define BUCKET_SHIFT 4 186 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) 187 188 /* 189 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket 190 * of approximately the right size. 191 */ 192 static uint8_t bucket_size[BUCKET_ZONES]; 193 194 /* 195 * Flags and enumerations to be passed to internal functions. 196 */ 197 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; 198 199 #define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ 200 #define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ 201 202 /* Prototypes.. */ 203 204 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); 205 static void *page_alloc(uma_zone_t, int, u_int8_t *, int); 206 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); 207 static void page_free(void *, int, u_int8_t); 208 static uma_slab_t slab_zalloc(uma_zone_t, int); 209 static void cache_drain(uma_zone_t); 210 static void bucket_drain(uma_zone_t, uma_bucket_t); 211 static void bucket_cache_drain(uma_zone_t zone); 212 static int keg_ctor(void *, int, void *, int); 213 static void keg_dtor(void *, int, void *); 214 static int zone_ctor(void *, int, void *, int); 215 static void zone_dtor(void *, int, void *); 216 static int zero_init(void *, int, int); 217 static void zone_small_init(uma_zone_t zone); 218 static void zone_large_init(uma_zone_t zone); 219 static void zone_foreach(void (*zfunc)(uma_zone_t)); 220 static void zone_timeout(uma_zone_t zone); 221 static int hash_alloc(struct uma_hash *); 222 static int hash_expand(struct uma_hash *, struct uma_hash *); 223 static void hash_free(struct uma_hash *hash); 224 static void uma_timeout(void *); 225 static void uma_startup3(void); 226 static void *uma_zalloc_internal(uma_zone_t, void *, int); 227 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip, 228 int); 229 static void bucket_enable(void); 230 static void bucket_init(void); 231 static uma_bucket_t bucket_alloc(int, int); 232 static void bucket_free(uma_bucket_t); 233 static void bucket_zone_drain(void); 234 static int uma_zalloc_bucket(uma_zone_t zone, int flags); 235 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags); 236 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab); 237 static void zone_drain(uma_zone_t); 238 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, 239 uma_fini fini, int align, u_int32_t flags); 240 241 void uma_print_zone(uma_zone_t); 242 void uma_print_stats(void); 243 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 244 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); 245 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); 246 247 #ifdef WITNESS 248 static int nosleepwithlocks = 1; 249 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 250 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 251 #else 252 static int nosleepwithlocks = 0; 253 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks, 254 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths"); 255 #endif 256 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, 257 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 258 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); 259 260 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, 261 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); 262 263 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 264 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); 265 266 /* 267 * This routine checks to see whether or not it's safe to enable buckets. 268 */ 269 270 static void 271 bucket_enable(void) 272 { 273 if (cnt.v_free_count < cnt.v_free_min) 274 bucketdisable = 1; 275 else 276 bucketdisable = 0; 277 } 278 279 /* 280 * Initialize bucket_zones, the array of zones of buckets of various sizes. 281 * 282 * For each zone, calculate the memory required for each bucket, consisting 283 * of the header and an array of pointers. Initialize bucket_size[] to point 284 * the range of appropriate bucket sizes at the zone. 285 */ 286 static void 287 bucket_init(void) 288 { 289 struct uma_bucket_zone *ubz; 290 int i; 291 int j; 292 293 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { 294 int size; 295 296 ubz = &bucket_zones[j]; 297 size = roundup(sizeof(struct uma_bucket), sizeof(void *)); 298 size += sizeof(void *) * ubz->ubz_entries; 299 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, 300 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 301 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) 302 bucket_size[i >> BUCKET_SHIFT] = j; 303 } 304 } 305 306 /* 307 * Given a desired number of entries for a bucket, return the zone from which 308 * to allocate the bucket. 309 */ 310 static struct uma_bucket_zone * 311 bucket_zone_lookup(int entries) 312 { 313 int idx; 314 315 idx = howmany(entries, 1 << BUCKET_SHIFT); 316 return (&bucket_zones[bucket_size[idx]]); 317 } 318 319 static uma_bucket_t 320 bucket_alloc(int entries, int bflags) 321 { 322 struct uma_bucket_zone *ubz; 323 uma_bucket_t bucket; 324 325 /* 326 * This is to stop us from allocating per cpu buckets while we're 327 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the 328 * boot pages. This also prevents us from allocating buckets in 329 * low memory situations. 330 */ 331 if (bucketdisable) 332 return (NULL); 333 334 ubz = bucket_zone_lookup(entries); 335 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags); 336 if (bucket) { 337 #ifdef INVARIANTS 338 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); 339 #endif 340 bucket->ub_cnt = 0; 341 bucket->ub_entries = ubz->ubz_entries; 342 } 343 344 return (bucket); 345 } 346 347 static void 348 bucket_free(uma_bucket_t bucket) 349 { 350 struct uma_bucket_zone *ubz; 351 352 ubz = bucket_zone_lookup(bucket->ub_entries); 353 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE, 354 ZFREE_STATFREE); 355 } 356 357 static void 358 bucket_zone_drain(void) 359 { 360 struct uma_bucket_zone *ubz; 361 362 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) 363 zone_drain(ubz->ubz_zone); 364 } 365 366 367 /* 368 * Routine called by timeout which is used to fire off some time interval 369 * based calculations. (stats, hash size, etc.) 370 * 371 * Arguments: 372 * arg Unused 373 * 374 * Returns: 375 * Nothing 376 */ 377 static void 378 uma_timeout(void *unused) 379 { 380 bucket_enable(); 381 zone_foreach(zone_timeout); 382 383 /* Reschedule this event */ 384 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 385 } 386 387 /* 388 * Routine to perform timeout driven calculations. This expands the 389 * hashes and does per cpu statistics aggregation. 390 * 391 * Arguments: 392 * zone The zone to operate on 393 * 394 * Returns: 395 * Nothing 396 */ 397 static void 398 zone_timeout(uma_zone_t zone) 399 { 400 uma_keg_t keg; 401 u_int64_t alloc; 402 403 keg = zone->uz_keg; 404 alloc = 0; 405 406 /* 407 * Expand the zone hash table. 408 * 409 * This is done if the number of slabs is larger than the hash size. 410 * What I'm trying to do here is completely reduce collisions. This 411 * may be a little aggressive. Should I allow for two collisions max? 412 */ 413 ZONE_LOCK(zone); 414 if (keg->uk_flags & UMA_ZONE_HASH && 415 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) { 416 struct uma_hash newhash; 417 struct uma_hash oldhash; 418 int ret; 419 420 /* 421 * This is so involved because allocating and freeing 422 * while the zone lock is held will lead to deadlock. 423 * I have to do everything in stages and check for 424 * races. 425 */ 426 newhash = keg->uk_hash; 427 ZONE_UNLOCK(zone); 428 ret = hash_alloc(&newhash); 429 ZONE_LOCK(zone); 430 if (ret) { 431 if (hash_expand(&keg->uk_hash, &newhash)) { 432 oldhash = keg->uk_hash; 433 keg->uk_hash = newhash; 434 } else 435 oldhash = newhash; 436 437 ZONE_UNLOCK(zone); 438 hash_free(&oldhash); 439 ZONE_LOCK(zone); 440 } 441 } 442 ZONE_UNLOCK(zone); 443 } 444 445 /* 446 * Allocate and zero fill the next sized hash table from the appropriate 447 * backing store. 448 * 449 * Arguments: 450 * hash A new hash structure with the old hash size in uh_hashsize 451 * 452 * Returns: 453 * 1 on sucess and 0 on failure. 454 */ 455 static int 456 hash_alloc(struct uma_hash *hash) 457 { 458 int oldsize; 459 int alloc; 460 461 oldsize = hash->uh_hashsize; 462 463 /* We're just going to go to a power of two greater */ 464 if (oldsize) { 465 hash->uh_hashsize = oldsize * 2; 466 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; 467 hash->uh_slab_hash = (struct slabhead *)malloc(alloc, 468 M_UMAHASH, M_NOWAIT); 469 } else { 470 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; 471 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL, 472 M_WAITOK); 473 hash->uh_hashsize = UMA_HASH_SIZE_INIT; 474 } 475 if (hash->uh_slab_hash) { 476 bzero(hash->uh_slab_hash, alloc); 477 hash->uh_hashmask = hash->uh_hashsize - 1; 478 return (1); 479 } 480 481 return (0); 482 } 483 484 /* 485 * Expands the hash table for HASH zones. This is done from zone_timeout 486 * to reduce collisions. This must not be done in the regular allocation 487 * path, otherwise, we can recurse on the vm while allocating pages. 488 * 489 * Arguments: 490 * oldhash The hash you want to expand 491 * newhash The hash structure for the new table 492 * 493 * Returns: 494 * Nothing 495 * 496 * Discussion: 497 */ 498 static int 499 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) 500 { 501 uma_slab_t slab; 502 int hval; 503 int i; 504 505 if (!newhash->uh_slab_hash) 506 return (0); 507 508 if (oldhash->uh_hashsize >= newhash->uh_hashsize) 509 return (0); 510 511 /* 512 * I need to investigate hash algorithms for resizing without a 513 * full rehash. 514 */ 515 516 for (i = 0; i < oldhash->uh_hashsize; i++) 517 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) { 518 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]); 519 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink); 520 hval = UMA_HASH(newhash, slab->us_data); 521 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], 522 slab, us_hlink); 523 } 524 525 return (1); 526 } 527 528 /* 529 * Free the hash bucket to the appropriate backing store. 530 * 531 * Arguments: 532 * slab_hash The hash bucket we're freeing 533 * hashsize The number of entries in that hash bucket 534 * 535 * Returns: 536 * Nothing 537 */ 538 static void 539 hash_free(struct uma_hash *hash) 540 { 541 if (hash->uh_slab_hash == NULL) 542 return; 543 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) 544 uma_zfree_internal(hashzone, 545 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE); 546 else 547 free(hash->uh_slab_hash, M_UMAHASH); 548 } 549 550 /* 551 * Frees all outstanding items in a bucket 552 * 553 * Arguments: 554 * zone The zone to free to, must be unlocked. 555 * bucket The free/alloc bucket with items, cpu queue must be locked. 556 * 557 * Returns: 558 * Nothing 559 */ 560 561 static void 562 bucket_drain(uma_zone_t zone, uma_bucket_t bucket) 563 { 564 uma_slab_t slab; 565 int mzone; 566 void *item; 567 568 if (bucket == NULL) 569 return; 570 571 slab = NULL; 572 mzone = 0; 573 574 /* We have to lookup the slab again for malloc.. */ 575 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC) 576 mzone = 1; 577 578 while (bucket->ub_cnt > 0) { 579 bucket->ub_cnt--; 580 item = bucket->ub_bucket[bucket->ub_cnt]; 581 #ifdef INVARIANTS 582 bucket->ub_bucket[bucket->ub_cnt] = NULL; 583 KASSERT(item != NULL, 584 ("bucket_drain: botched ptr, item is NULL")); 585 #endif 586 /* 587 * This is extremely inefficient. The slab pointer was passed 588 * to uma_zfree_arg, but we lost it because the buckets don't 589 * hold them. This will go away when free() gets a size passed 590 * to it. 591 */ 592 if (mzone) 593 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK)); 594 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0); 595 } 596 } 597 598 /* 599 * Drains the per cpu caches for a zone. 600 * 601 * NOTE: This may only be called while the zone is being turn down, and not 602 * during normal operation. This is necessary in order that we do not have 603 * to migrate CPUs to drain the per-CPU caches. 604 * 605 * Arguments: 606 * zone The zone to drain, must be unlocked. 607 * 608 * Returns: 609 * Nothing 610 */ 611 static void 612 cache_drain(uma_zone_t zone) 613 { 614 uma_cache_t cache; 615 int cpu; 616 617 /* 618 * XXX: It is safe to not lock the per-CPU caches, because we're 619 * tearing down the zone anyway. I.e., there will be no further use 620 * of the caches at this point. 621 * 622 * XXX: It would good to be able to assert that the zone is being 623 * torn down to prevent improper use of cache_drain(). 624 * 625 * XXX: We lock the zone before passing into bucket_cache_drain() as 626 * it is used elsewhere. Should the tear-down path be made special 627 * there in some form? 628 */ 629 for (cpu = 0; cpu <= mp_maxid; cpu++) { 630 if (CPU_ABSENT(cpu)) 631 continue; 632 cache = &zone->uz_cpu[cpu]; 633 bucket_drain(zone, cache->uc_allocbucket); 634 bucket_drain(zone, cache->uc_freebucket); 635 if (cache->uc_allocbucket != NULL) 636 bucket_free(cache->uc_allocbucket); 637 if (cache->uc_freebucket != NULL) 638 bucket_free(cache->uc_freebucket); 639 cache->uc_allocbucket = cache->uc_freebucket = NULL; 640 } 641 ZONE_LOCK(zone); 642 bucket_cache_drain(zone); 643 ZONE_UNLOCK(zone); 644 } 645 646 /* 647 * Drain the cached buckets from a zone. Expects a locked zone on entry. 648 */ 649 static void 650 bucket_cache_drain(uma_zone_t zone) 651 { 652 uma_bucket_t bucket; 653 654 /* 655 * Drain the bucket queues and free the buckets, we just keep two per 656 * cpu (alloc/free). 657 */ 658 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 659 LIST_REMOVE(bucket, ub_link); 660 ZONE_UNLOCK(zone); 661 bucket_drain(zone, bucket); 662 bucket_free(bucket); 663 ZONE_LOCK(zone); 664 } 665 666 /* Now we do the free queue.. */ 667 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 668 LIST_REMOVE(bucket, ub_link); 669 bucket_free(bucket); 670 } 671 } 672 673 /* 674 * Frees pages from a zone back to the system. This is done on demand from 675 * the pageout daemon. 676 * 677 * Arguments: 678 * zone The zone to free pages from 679 * all Should we drain all items? 680 * 681 * Returns: 682 * Nothing. 683 */ 684 static void 685 zone_drain(uma_zone_t zone) 686 { 687 struct slabhead freeslabs = { 0 }; 688 uma_keg_t keg; 689 uma_slab_t slab; 690 uma_slab_t n; 691 u_int8_t flags; 692 u_int8_t *mem; 693 int i; 694 695 keg = zone->uz_keg; 696 697 /* 698 * We don't want to take pages from statically allocated zones at this 699 * time 700 */ 701 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) 702 return; 703 704 ZONE_LOCK(zone); 705 706 #ifdef UMA_DEBUG 707 printf("%s free items: %u\n", zone->uz_name, keg->uk_free); 708 #endif 709 bucket_cache_drain(zone); 710 if (keg->uk_free == 0) 711 goto finished; 712 713 slab = LIST_FIRST(&keg->uk_free_slab); 714 while (slab) { 715 n = LIST_NEXT(slab, us_link); 716 717 /* We have no where to free these to */ 718 if (slab->us_flags & UMA_SLAB_BOOT) { 719 slab = n; 720 continue; 721 } 722 723 LIST_REMOVE(slab, us_link); 724 keg->uk_pages -= keg->uk_ppera; 725 keg->uk_free -= keg->uk_ipers; 726 727 if (keg->uk_flags & UMA_ZONE_HASH) 728 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); 729 730 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); 731 732 slab = n; 733 } 734 finished: 735 ZONE_UNLOCK(zone); 736 737 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { 738 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); 739 if (keg->uk_fini) 740 for (i = 0; i < keg->uk_ipers; i++) 741 keg->uk_fini( 742 slab->us_data + (keg->uk_rsize * i), 743 keg->uk_size); 744 flags = slab->us_flags; 745 mem = slab->us_data; 746 747 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 748 (keg->uk_flags & UMA_ZONE_REFCNT)) { 749 vm_object_t obj; 750 751 if (flags & UMA_SLAB_KMEM) 752 obj = kmem_object; 753 else 754 obj = NULL; 755 for (i = 0; i < keg->uk_ppera; i++) 756 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), 757 obj); 758 } 759 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 760 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 761 SKIP_NONE, ZFREE_STATFREE); 762 #ifdef UMA_DEBUG 763 printf("%s: Returning %d bytes.\n", 764 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera); 765 #endif 766 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); 767 } 768 } 769 770 /* 771 * Allocate a new slab for a zone. This does not insert the slab onto a list. 772 * 773 * Arguments: 774 * zone The zone to allocate slabs for 775 * wait Shall we wait? 776 * 777 * Returns: 778 * The slab that was allocated or NULL if there is no memory and the 779 * caller specified M_NOWAIT. 780 */ 781 static uma_slab_t 782 slab_zalloc(uma_zone_t zone, int wait) 783 { 784 uma_slabrefcnt_t slabref; 785 uma_slab_t slab; 786 uma_keg_t keg; 787 u_int8_t *mem; 788 u_int8_t flags; 789 int i; 790 791 slab = NULL; 792 keg = zone->uz_keg; 793 794 #ifdef UMA_DEBUG 795 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); 796 #endif 797 ZONE_UNLOCK(zone); 798 799 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 800 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait); 801 if (slab == NULL) { 802 ZONE_LOCK(zone); 803 return NULL; 804 } 805 } 806 807 /* 808 * This reproduces the old vm_zone behavior of zero filling pages the 809 * first time they are added to a zone. 810 * 811 * Malloced items are zeroed in uma_zalloc. 812 */ 813 814 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 815 wait |= M_ZERO; 816 else 817 wait &= ~M_ZERO; 818 819 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, 820 &flags, wait); 821 if (mem == NULL) { 822 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 823 uma_zfree_internal(keg->uk_slabzone, slab, NULL, 824 SKIP_NONE, ZFREE_STATFREE); 825 ZONE_LOCK(zone); 826 return (NULL); 827 } 828 829 /* Point the slab into the allocated memory */ 830 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) 831 slab = (uma_slab_t )(mem + keg->uk_pgoff); 832 833 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 834 (keg->uk_flags & UMA_ZONE_REFCNT)) 835 for (i = 0; i < keg->uk_ppera; i++) 836 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab); 837 838 slab->us_keg = keg; 839 slab->us_data = mem; 840 slab->us_freecount = keg->uk_ipers; 841 slab->us_firstfree = 0; 842 slab->us_flags = flags; 843 844 if (keg->uk_flags & UMA_ZONE_REFCNT) { 845 slabref = (uma_slabrefcnt_t)slab; 846 for (i = 0; i < keg->uk_ipers; i++) { 847 slabref->us_freelist[i].us_refcnt = 0; 848 slabref->us_freelist[i].us_item = i+1; 849 } 850 } else { 851 for (i = 0; i < keg->uk_ipers; i++) 852 slab->us_freelist[i].us_item = i+1; 853 } 854 855 if (keg->uk_init != NULL) { 856 for (i = 0; i < keg->uk_ipers; i++) 857 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i), 858 keg->uk_size, wait) != 0) 859 break; 860 if (i != keg->uk_ipers) { 861 if (keg->uk_fini != NULL) { 862 for (i--; i > -1; i--) 863 keg->uk_fini(slab->us_data + 864 (keg->uk_rsize * i), 865 keg->uk_size); 866 } 867 if ((keg->uk_flags & UMA_ZONE_MALLOC) || 868 (keg->uk_flags & UMA_ZONE_REFCNT)) { 869 vm_object_t obj; 870 871 if (flags & UMA_SLAB_KMEM) 872 obj = kmem_object; 873 else 874 obj = NULL; 875 for (i = 0; i < keg->uk_ppera; i++) 876 vsetobj((vm_offset_t)mem + 877 (i * PAGE_SIZE), obj); 878 } 879 if (keg->uk_flags & UMA_ZONE_OFFPAGE) 880 uma_zfree_internal(keg->uk_slabzone, slab, 881 NULL, SKIP_NONE, ZFREE_STATFREE); 882 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, 883 flags); 884 ZONE_LOCK(zone); 885 return (NULL); 886 } 887 } 888 ZONE_LOCK(zone); 889 890 if (keg->uk_flags & UMA_ZONE_HASH) 891 UMA_HASH_INSERT(&keg->uk_hash, slab, mem); 892 893 keg->uk_pages += keg->uk_ppera; 894 keg->uk_free += keg->uk_ipers; 895 896 return (slab); 897 } 898 899 /* 900 * This function is intended to be used early on in place of page_alloc() so 901 * that we may use the boot time page cache to satisfy allocations before 902 * the VM is ready. 903 */ 904 static void * 905 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 906 { 907 uma_keg_t keg; 908 uma_slab_t tmps; 909 910 keg = zone->uz_keg; 911 912 /* 913 * Check our small startup cache to see if it has pages remaining. 914 */ 915 mtx_lock(&uma_boot_pages_mtx); 916 if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) { 917 LIST_REMOVE(tmps, us_link); 918 mtx_unlock(&uma_boot_pages_mtx); 919 *pflag = tmps->us_flags; 920 return (tmps->us_data); 921 } 922 mtx_unlock(&uma_boot_pages_mtx); 923 if (booted == 0) 924 panic("UMA: Increase UMA_BOOT_PAGES"); 925 /* 926 * Now that we've booted reset these users to their real allocator. 927 */ 928 #ifdef UMA_MD_SMALL_ALLOC 929 keg->uk_allocf = uma_small_alloc; 930 #else 931 keg->uk_allocf = page_alloc; 932 #endif 933 return keg->uk_allocf(zone, bytes, pflag, wait); 934 } 935 936 /* 937 * Allocates a number of pages from the system 938 * 939 * Arguments: 940 * zone Unused 941 * bytes The number of bytes requested 942 * wait Shall we wait? 943 * 944 * Returns: 945 * A pointer to the alloced memory or possibly 946 * NULL if M_NOWAIT is set. 947 */ 948 static void * 949 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) 950 { 951 void *p; /* Returned page */ 952 953 *pflag = UMA_SLAB_KMEM; 954 p = (void *) kmem_malloc(kmem_map, bytes, wait); 955 956 return (p); 957 } 958 959 /* 960 * Allocates a number of pages from within an object 961 * 962 * Arguments: 963 * zone Unused 964 * bytes The number of bytes requested 965 * wait Shall we wait? 966 * 967 * Returns: 968 * A pointer to the alloced memory or possibly 969 * NULL if M_NOWAIT is set. 970 */ 971 static void * 972 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 973 { 974 vm_object_t object; 975 vm_offset_t retkva, zkva; 976 vm_page_t p; 977 int pages, startpages; 978 979 object = zone->uz_keg->uk_obj; 980 retkva = 0; 981 982 /* 983 * This looks a little weird since we're getting one page at a time. 984 */ 985 VM_OBJECT_LOCK(object); 986 p = TAILQ_LAST(&object->memq, pglist); 987 pages = p != NULL ? p->pindex + 1 : 0; 988 startpages = pages; 989 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE; 990 for (; bytes > 0; bytes -= PAGE_SIZE) { 991 p = vm_page_alloc(object, pages, 992 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); 993 if (p == NULL) { 994 if (pages != startpages) 995 pmap_qremove(retkva, pages - startpages); 996 while (pages != startpages) { 997 pages--; 998 p = TAILQ_LAST(&object->memq, pglist); 999 vm_page_lock_queues(); 1000 vm_page_unwire(p, 0); 1001 vm_page_free(p); 1002 vm_page_unlock_queues(); 1003 } 1004 retkva = 0; 1005 goto done; 1006 } 1007 pmap_qenter(zkva, &p, 1); 1008 if (retkva == 0) 1009 retkva = zkva; 1010 zkva += PAGE_SIZE; 1011 pages += 1; 1012 } 1013 done: 1014 VM_OBJECT_UNLOCK(object); 1015 *flags = UMA_SLAB_PRIV; 1016 1017 return ((void *)retkva); 1018 } 1019 1020 /* 1021 * Frees a number of pages to the system 1022 * 1023 * Arguments: 1024 * mem A pointer to the memory to be freed 1025 * size The size of the memory being freed 1026 * flags The original p->us_flags field 1027 * 1028 * Returns: 1029 * Nothing 1030 */ 1031 static void 1032 page_free(void *mem, int size, u_int8_t flags) 1033 { 1034 vm_map_t map; 1035 1036 if (flags & UMA_SLAB_KMEM) 1037 map = kmem_map; 1038 else 1039 panic("UMA: page_free used with invalid flags %d\n", flags); 1040 1041 kmem_free(map, (vm_offset_t)mem, size); 1042 } 1043 1044 /* 1045 * Zero fill initializer 1046 * 1047 * Arguments/Returns follow uma_init specifications 1048 */ 1049 static int 1050 zero_init(void *mem, int size, int flags) 1051 { 1052 bzero(mem, size); 1053 return (0); 1054 } 1055 1056 /* 1057 * Finish creating a small uma zone. This calculates ipers, and the zone size. 1058 * 1059 * Arguments 1060 * zone The zone we should initialize 1061 * 1062 * Returns 1063 * Nothing 1064 */ 1065 static void 1066 zone_small_init(uma_zone_t zone) 1067 { 1068 uma_keg_t keg; 1069 u_int rsize; 1070 u_int memused; 1071 u_int wastedspace; 1072 u_int shsize; 1073 1074 keg = zone->uz_keg; 1075 KASSERT(keg != NULL, ("Keg is null in zone_small_init")); 1076 rsize = keg->uk_size; 1077 1078 if (rsize < UMA_SMALLEST_UNIT) 1079 rsize = UMA_SMALLEST_UNIT; 1080 if (rsize & keg->uk_align) 1081 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); 1082 1083 keg->uk_rsize = rsize; 1084 keg->uk_ppera = 1; 1085 1086 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1087 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ 1088 shsize = sizeof(struct uma_slab_refcnt); 1089 } else { 1090 rsize += UMA_FRITM_SZ; /* Account for linkage */ 1091 shsize = sizeof(struct uma_slab); 1092 } 1093 1094 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; 1095 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0")); 1096 memused = keg->uk_ipers * rsize + shsize; 1097 wastedspace = UMA_SLAB_SIZE - memused; 1098 1099 /* 1100 * We can't do OFFPAGE if we're internal or if we've been 1101 * asked to not go to the VM for buckets. If we do this we 1102 * may end up going to the VM (kmem_map) for slabs which we 1103 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a 1104 * result of UMA_ZONE_VM, which clearly forbids it. 1105 */ 1106 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || 1107 (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) 1108 return; 1109 1110 if ((wastedspace >= UMA_MAX_WASTE) && 1111 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { 1112 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; 1113 KASSERT(keg->uk_ipers <= 255, 1114 ("zone_small_init: keg->uk_ipers too high!")); 1115 #ifdef UMA_DEBUG 1116 printf("UMA decided we need offpage slab headers for " 1117 "zone: %s, calculated wastedspace = %d, " 1118 "maximum wasted space allowed = %d, " 1119 "calculated ipers = %d, " 1120 "new wasted space = %d\n", zone->uz_name, wastedspace, 1121 UMA_MAX_WASTE, keg->uk_ipers, 1122 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); 1123 #endif 1124 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1125 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1126 keg->uk_flags |= UMA_ZONE_HASH; 1127 } 1128 } 1129 1130 /* 1131 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do 1132 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be 1133 * more complicated. 1134 * 1135 * Arguments 1136 * zone The zone we should initialize 1137 * 1138 * Returns 1139 * Nothing 1140 */ 1141 static void 1142 zone_large_init(uma_zone_t zone) 1143 { 1144 uma_keg_t keg; 1145 int pages; 1146 1147 keg = zone->uz_keg; 1148 1149 KASSERT(keg != NULL, ("Keg is null in zone_large_init")); 1150 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, 1151 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone")); 1152 1153 pages = keg->uk_size / UMA_SLAB_SIZE; 1154 1155 /* Account for remainder */ 1156 if ((pages * UMA_SLAB_SIZE) < keg->uk_size) 1157 pages++; 1158 1159 keg->uk_ppera = pages; 1160 keg->uk_ipers = 1; 1161 1162 keg->uk_flags |= UMA_ZONE_OFFPAGE; 1163 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) 1164 keg->uk_flags |= UMA_ZONE_HASH; 1165 1166 keg->uk_rsize = keg->uk_size; 1167 } 1168 1169 /* 1170 * Keg header ctor. This initializes all fields, locks, etc. And inserts 1171 * the keg onto the global keg list. 1172 * 1173 * Arguments/Returns follow uma_ctor specifications 1174 * udata Actually uma_kctor_args 1175 */ 1176 static int 1177 keg_ctor(void *mem, int size, void *udata, int flags) 1178 { 1179 struct uma_kctor_args *arg = udata; 1180 uma_keg_t keg = mem; 1181 uma_zone_t zone; 1182 1183 bzero(keg, size); 1184 keg->uk_size = arg->size; 1185 keg->uk_init = arg->uminit; 1186 keg->uk_fini = arg->fini; 1187 keg->uk_align = arg->align; 1188 keg->uk_free = 0; 1189 keg->uk_pages = 0; 1190 keg->uk_flags = arg->flags; 1191 keg->uk_allocf = page_alloc; 1192 keg->uk_freef = page_free; 1193 keg->uk_recurse = 0; 1194 keg->uk_slabzone = NULL; 1195 1196 /* 1197 * The master zone is passed to us at keg-creation time. 1198 */ 1199 zone = arg->zone; 1200 zone->uz_keg = keg; 1201 1202 if (arg->flags & UMA_ZONE_VM) 1203 keg->uk_flags |= UMA_ZFLAG_CACHEONLY; 1204 1205 if (arg->flags & UMA_ZONE_ZINIT) 1206 keg->uk_init = zero_init; 1207 1208 /* 1209 * The +UMA_FRITM_SZ added to uk_size is to account for the 1210 * linkage that is added to the size in zone_small_init(). If 1211 * we don't account for this here then we may end up in 1212 * zone_small_init() with a calculated 'ipers' of 0. 1213 */ 1214 if (keg->uk_flags & UMA_ZONE_REFCNT) { 1215 if ((keg->uk_size+UMA_FRITMREF_SZ) > 1216 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) 1217 zone_large_init(zone); 1218 else 1219 zone_small_init(zone); 1220 } else { 1221 if ((keg->uk_size+UMA_FRITM_SZ) > 1222 (UMA_SLAB_SIZE - sizeof(struct uma_slab))) 1223 zone_large_init(zone); 1224 else 1225 zone_small_init(zone); 1226 } 1227 1228 if (keg->uk_flags & UMA_ZONE_OFFPAGE) { 1229 if (keg->uk_flags & UMA_ZONE_REFCNT) 1230 keg->uk_slabzone = slabrefzone; 1231 else 1232 keg->uk_slabzone = slabzone; 1233 } 1234 1235 /* 1236 * If we haven't booted yet we need allocations to go through the 1237 * startup cache until the vm is ready. 1238 */ 1239 if (keg->uk_ppera == 1) { 1240 #ifdef UMA_MD_SMALL_ALLOC 1241 keg->uk_allocf = uma_small_alloc; 1242 keg->uk_freef = uma_small_free; 1243 #endif 1244 if (booted == 0) 1245 keg->uk_allocf = startup_alloc; 1246 } 1247 1248 /* 1249 * Initialize keg's lock (shared among zones) through 1250 * Master zone 1251 */ 1252 zone->uz_lock = &keg->uk_lock; 1253 if (arg->flags & UMA_ZONE_MTXCLASS) 1254 ZONE_LOCK_INIT(zone, 1); 1255 else 1256 ZONE_LOCK_INIT(zone, 0); 1257 1258 /* 1259 * If we're putting the slab header in the actual page we need to 1260 * figure out where in each page it goes. This calculates a right 1261 * justified offset into the memory on an ALIGN_PTR boundary. 1262 */ 1263 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { 1264 u_int totsize; 1265 1266 /* Size of the slab struct and free list */ 1267 if (keg->uk_flags & UMA_ZONE_REFCNT) 1268 totsize = sizeof(struct uma_slab_refcnt) + 1269 keg->uk_ipers * UMA_FRITMREF_SZ; 1270 else 1271 totsize = sizeof(struct uma_slab) + 1272 keg->uk_ipers * UMA_FRITM_SZ; 1273 1274 if (totsize & UMA_ALIGN_PTR) 1275 totsize = (totsize & ~UMA_ALIGN_PTR) + 1276 (UMA_ALIGN_PTR + 1); 1277 keg->uk_pgoff = UMA_SLAB_SIZE - totsize; 1278 1279 if (keg->uk_flags & UMA_ZONE_REFCNT) 1280 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) 1281 + keg->uk_ipers * UMA_FRITMREF_SZ; 1282 else 1283 totsize = keg->uk_pgoff + sizeof(struct uma_slab) 1284 + keg->uk_ipers * UMA_FRITM_SZ; 1285 1286 /* 1287 * The only way the following is possible is if with our 1288 * UMA_ALIGN_PTR adjustments we are now bigger than 1289 * UMA_SLAB_SIZE. I haven't checked whether this is 1290 * mathematically possible for all cases, so we make 1291 * sure here anyway. 1292 */ 1293 if (totsize > UMA_SLAB_SIZE) { 1294 printf("zone %s ipers %d rsize %d size %d\n", 1295 zone->uz_name, keg->uk_ipers, keg->uk_rsize, 1296 keg->uk_size); 1297 panic("UMA slab won't fit.\n"); 1298 } 1299 } 1300 1301 if (keg->uk_flags & UMA_ZONE_HASH) 1302 hash_alloc(&keg->uk_hash); 1303 1304 #ifdef UMA_DEBUG 1305 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", 1306 zone->uz_name, zone, 1307 keg->uk_size, keg->uk_ipers, 1308 keg->uk_ppera, keg->uk_pgoff); 1309 #endif 1310 1311 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); 1312 1313 mtx_lock(&uma_mtx); 1314 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); 1315 mtx_unlock(&uma_mtx); 1316 return (0); 1317 } 1318 1319 /* 1320 * Zone header ctor. This initializes all fields, locks, etc. 1321 * 1322 * Arguments/Returns follow uma_ctor specifications 1323 * udata Actually uma_zctor_args 1324 */ 1325 1326 static int 1327 zone_ctor(void *mem, int size, void *udata, int flags) 1328 { 1329 struct uma_zctor_args *arg = udata; 1330 uma_zone_t zone = mem; 1331 uma_zone_t z; 1332 uma_keg_t keg; 1333 1334 bzero(zone, size); 1335 zone->uz_name = arg->name; 1336 zone->uz_ctor = arg->ctor; 1337 zone->uz_dtor = arg->dtor; 1338 zone->uz_init = NULL; 1339 zone->uz_fini = NULL; 1340 zone->uz_allocs = 0; 1341 zone->uz_frees = 0; 1342 zone->uz_fails = 0; 1343 zone->uz_fills = zone->uz_count = 0; 1344 1345 if (arg->flags & UMA_ZONE_SECONDARY) { 1346 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); 1347 keg = arg->keg; 1348 zone->uz_keg = keg; 1349 zone->uz_init = arg->uminit; 1350 zone->uz_fini = arg->fini; 1351 zone->uz_lock = &keg->uk_lock; 1352 mtx_lock(&uma_mtx); 1353 ZONE_LOCK(zone); 1354 keg->uk_flags |= UMA_ZONE_SECONDARY; 1355 LIST_FOREACH(z, &keg->uk_zones, uz_link) { 1356 if (LIST_NEXT(z, uz_link) == NULL) { 1357 LIST_INSERT_AFTER(z, zone, uz_link); 1358 break; 1359 } 1360 } 1361 ZONE_UNLOCK(zone); 1362 mtx_unlock(&uma_mtx); 1363 } else if (arg->keg == NULL) { 1364 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini, 1365 arg->align, arg->flags) == NULL) 1366 return (ENOMEM); 1367 } else { 1368 struct uma_kctor_args karg; 1369 int error; 1370 1371 /* We should only be here from uma_startup() */ 1372 karg.size = arg->size; 1373 karg.uminit = arg->uminit; 1374 karg.fini = arg->fini; 1375 karg.align = arg->align; 1376 karg.flags = arg->flags; 1377 karg.zone = zone; 1378 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, 1379 flags); 1380 if (error) 1381 return (error); 1382 } 1383 keg = zone->uz_keg; 1384 zone->uz_lock = &keg->uk_lock; 1385 1386 /* 1387 * Some internal zones don't have room allocated for the per cpu 1388 * caches. If we're internal, bail out here. 1389 */ 1390 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) { 1391 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0, 1392 ("Secondary zone requested UMA_ZFLAG_INTERNAL")); 1393 return (0); 1394 } 1395 1396 if (keg->uk_flags & UMA_ZONE_MAXBUCKET) 1397 zone->uz_count = BUCKET_MAX; 1398 else if (keg->uk_ipers <= BUCKET_MAX) 1399 zone->uz_count = keg->uk_ipers; 1400 else 1401 zone->uz_count = BUCKET_MAX; 1402 return (0); 1403 } 1404 1405 /* 1406 * Keg header dtor. This frees all data, destroys locks, frees the hash 1407 * table and removes the keg from the global list. 1408 * 1409 * Arguments/Returns follow uma_dtor specifications 1410 * udata unused 1411 */ 1412 static void 1413 keg_dtor(void *arg, int size, void *udata) 1414 { 1415 uma_keg_t keg; 1416 1417 keg = (uma_keg_t)arg; 1418 mtx_lock(&keg->uk_lock); 1419 if (keg->uk_free != 0) { 1420 printf("Freed UMA keg was not empty (%d items). " 1421 " Lost %d pages of memory.\n", 1422 keg->uk_free, keg->uk_pages); 1423 } 1424 mtx_unlock(&keg->uk_lock); 1425 1426 if (keg->uk_flags & UMA_ZONE_HASH) 1427 hash_free(&keg->uk_hash); 1428 1429 mtx_destroy(&keg->uk_lock); 1430 } 1431 1432 /* 1433 * Zone header dtor. 1434 * 1435 * Arguments/Returns follow uma_dtor specifications 1436 * udata unused 1437 */ 1438 static void 1439 zone_dtor(void *arg, int size, void *udata) 1440 { 1441 uma_zone_t zone; 1442 uma_keg_t keg; 1443 1444 zone = (uma_zone_t)arg; 1445 keg = zone->uz_keg; 1446 1447 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL)) 1448 cache_drain(zone); 1449 1450 mtx_lock(&uma_mtx); 1451 zone_drain(zone); 1452 if (keg->uk_flags & UMA_ZONE_SECONDARY) { 1453 LIST_REMOVE(zone, uz_link); 1454 /* 1455 * XXX there are some races here where 1456 * the zone can be drained but zone lock 1457 * released and then refilled before we 1458 * remove it... we dont care for now 1459 */ 1460 ZONE_LOCK(zone); 1461 if (LIST_EMPTY(&keg->uk_zones)) 1462 keg->uk_flags &= ~UMA_ZONE_SECONDARY; 1463 ZONE_UNLOCK(zone); 1464 mtx_unlock(&uma_mtx); 1465 } else { 1466 LIST_REMOVE(keg, uk_link); 1467 LIST_REMOVE(zone, uz_link); 1468 mtx_unlock(&uma_mtx); 1469 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE, 1470 ZFREE_STATFREE); 1471 } 1472 zone->uz_keg = NULL; 1473 } 1474 1475 /* 1476 * Traverses every zone in the system and calls a callback 1477 * 1478 * Arguments: 1479 * zfunc A pointer to a function which accepts a zone 1480 * as an argument. 1481 * 1482 * Returns: 1483 * Nothing 1484 */ 1485 static void 1486 zone_foreach(void (*zfunc)(uma_zone_t)) 1487 { 1488 uma_keg_t keg; 1489 uma_zone_t zone; 1490 1491 mtx_lock(&uma_mtx); 1492 LIST_FOREACH(keg, &uma_kegs, uk_link) { 1493 LIST_FOREACH(zone, &keg->uk_zones, uz_link) 1494 zfunc(zone); 1495 } 1496 mtx_unlock(&uma_mtx); 1497 } 1498 1499 /* Public functions */ 1500 /* See uma.h */ 1501 void 1502 uma_startup(void *bootmem) 1503 { 1504 struct uma_zctor_args args; 1505 uma_slab_t slab; 1506 u_int slabsize; 1507 u_int objsize, totsize, wsize; 1508 int i; 1509 1510 #ifdef UMA_DEBUG 1511 printf("Creating uma keg headers zone and keg.\n"); 1512 #endif 1513 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); 1514 1515 /* 1516 * Figure out the maximum number of items-per-slab we'll have if 1517 * we're using the OFFPAGE slab header to track free items, given 1518 * all possible object sizes and the maximum desired wastage 1519 * (UMA_MAX_WASTE). 1520 * 1521 * We iterate until we find an object size for 1522 * which the calculated wastage in zone_small_init() will be 1523 * enough to warrant OFFPAGE. Since wastedspace versus objsize 1524 * is an overall increasing see-saw function, we find the smallest 1525 * objsize such that the wastage is always acceptable for objects 1526 * with that objsize or smaller. Since a smaller objsize always 1527 * generates a larger possible uma_max_ipers, we use this computed 1528 * objsize to calculate the largest ipers possible. Since the 1529 * ipers calculated for OFFPAGE slab headers is always larger than 1530 * the ipers initially calculated in zone_small_init(), we use 1531 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to 1532 * obtain the maximum ipers possible for offpage slab headers. 1533 * 1534 * It should be noted that ipers versus objsize is an inversly 1535 * proportional function which drops off rather quickly so as 1536 * long as our UMA_MAX_WASTE is such that the objsize we calculate 1537 * falls into the portion of the inverse relation AFTER the steep 1538 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). 1539 * 1540 * Note that we have 8-bits (1 byte) to use as a freelist index 1541 * inside the actual slab header itself and this is enough to 1542 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized 1543 * object with offpage slab header would have ipers = 1544 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is 1545 * 1 greater than what our byte-integer freelist index can 1546 * accomodate, but we know that this situation never occurs as 1547 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate 1548 * that we need to go to offpage slab headers. Or, if we do, 1549 * then we trap that condition below and panic in the INVARIANTS case. 1550 */ 1551 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; 1552 totsize = wsize; 1553 objsize = UMA_SMALLEST_UNIT; 1554 while (totsize >= wsize) { 1555 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / 1556 (objsize + UMA_FRITM_SZ); 1557 totsize *= (UMA_FRITM_SZ + objsize); 1558 objsize++; 1559 } 1560 if (objsize > UMA_SMALLEST_UNIT) 1561 objsize--; 1562 uma_max_ipers = UMA_SLAB_SIZE / objsize; 1563 1564 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; 1565 totsize = wsize; 1566 objsize = UMA_SMALLEST_UNIT; 1567 while (totsize >= wsize) { 1568 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / 1569 (objsize + UMA_FRITMREF_SZ); 1570 totsize *= (UMA_FRITMREF_SZ + objsize); 1571 objsize++; 1572 } 1573 if (objsize > UMA_SMALLEST_UNIT) 1574 objsize--; 1575 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize; 1576 1577 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), 1578 ("uma_startup: calculated uma_max_ipers values too large!")); 1579 1580 #ifdef UMA_DEBUG 1581 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); 1582 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n", 1583 uma_max_ipers_ref); 1584 #endif 1585 1586 /* "manually" create the initial zone */ 1587 args.name = "UMA Kegs"; 1588 args.size = sizeof(struct uma_keg); 1589 args.ctor = keg_ctor; 1590 args.dtor = keg_dtor; 1591 args.uminit = zero_init; 1592 args.fini = NULL; 1593 args.keg = &masterkeg; 1594 args.align = 32 - 1; 1595 args.flags = UMA_ZFLAG_INTERNAL; 1596 /* The initial zone has no Per cpu queues so it's smaller */ 1597 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); 1598 1599 #ifdef UMA_DEBUG 1600 printf("Filling boot free list.\n"); 1601 #endif 1602 for (i = 0; i < UMA_BOOT_PAGES; i++) { 1603 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); 1604 slab->us_data = (u_int8_t *)slab; 1605 slab->us_flags = UMA_SLAB_BOOT; 1606 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); 1607 } 1608 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF); 1609 1610 #ifdef UMA_DEBUG 1611 printf("Creating uma zone headers zone and keg.\n"); 1612 #endif 1613 args.name = "UMA Zones"; 1614 args.size = sizeof(struct uma_zone) + 1615 (sizeof(struct uma_cache) * (mp_maxid + 1)); 1616 args.ctor = zone_ctor; 1617 args.dtor = zone_dtor; 1618 args.uminit = zero_init; 1619 args.fini = NULL; 1620 args.keg = NULL; 1621 args.align = 32 - 1; 1622 args.flags = UMA_ZFLAG_INTERNAL; 1623 /* The initial zone has no Per cpu queues so it's smaller */ 1624 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); 1625 1626 #ifdef UMA_DEBUG 1627 printf("Initializing pcpu cache locks.\n"); 1628 #endif 1629 #ifdef UMA_DEBUG 1630 printf("Creating slab and hash zones.\n"); 1631 #endif 1632 1633 /* 1634 * This is the max number of free list items we'll have with 1635 * offpage slabs. 1636 */ 1637 slabsize = uma_max_ipers * UMA_FRITM_SZ; 1638 slabsize += sizeof(struct uma_slab); 1639 1640 /* Now make a zone for slab headers */ 1641 slabzone = uma_zcreate("UMA Slabs", 1642 slabsize, 1643 NULL, NULL, NULL, NULL, 1644 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1645 1646 /* 1647 * We also create a zone for the bigger slabs with reference 1648 * counts in them, to accomodate UMA_ZONE_REFCNT zones. 1649 */ 1650 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; 1651 slabsize += sizeof(struct uma_slab_refcnt); 1652 slabrefzone = uma_zcreate("UMA RCntSlabs", 1653 slabsize, 1654 NULL, NULL, NULL, NULL, 1655 UMA_ALIGN_PTR, 1656 UMA_ZFLAG_INTERNAL); 1657 1658 hashzone = uma_zcreate("UMA Hash", 1659 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, 1660 NULL, NULL, NULL, NULL, 1661 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); 1662 1663 bucket_init(); 1664 1665 #ifdef UMA_MD_SMALL_ALLOC 1666 booted = 1; 1667 #endif 1668 1669 #ifdef UMA_DEBUG 1670 printf("UMA startup complete.\n"); 1671 #endif 1672 } 1673 1674 /* see uma.h */ 1675 void 1676 uma_startup2(void) 1677 { 1678 booted = 1; 1679 bucket_enable(); 1680 #ifdef UMA_DEBUG 1681 printf("UMA startup2 complete.\n"); 1682 #endif 1683 } 1684 1685 /* 1686 * Initialize our callout handle 1687 * 1688 */ 1689 1690 static void 1691 uma_startup3(void) 1692 { 1693 #ifdef UMA_DEBUG 1694 printf("Starting callout.\n"); 1695 #endif 1696 callout_init(&uma_callout, CALLOUT_MPSAFE); 1697 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); 1698 #ifdef UMA_DEBUG 1699 printf("UMA startup3 complete.\n"); 1700 #endif 1701 } 1702 1703 static uma_zone_t 1704 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, 1705 int align, u_int32_t flags) 1706 { 1707 struct uma_kctor_args args; 1708 1709 args.size = size; 1710 args.uminit = uminit; 1711 args.fini = fini; 1712 args.align = align; 1713 args.flags = flags; 1714 args.zone = zone; 1715 return (uma_zalloc_internal(kegs, &args, M_WAITOK)); 1716 } 1717 1718 /* See uma.h */ 1719 uma_zone_t 1720 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, 1721 uma_init uminit, uma_fini fini, int align, u_int32_t flags) 1722 1723 { 1724 struct uma_zctor_args args; 1725 1726 /* This stuff is essential for the zone ctor */ 1727 args.name = name; 1728 args.size = size; 1729 args.ctor = ctor; 1730 args.dtor = dtor; 1731 args.uminit = uminit; 1732 args.fini = fini; 1733 args.align = align; 1734 args.flags = flags; 1735 args.keg = NULL; 1736 1737 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1738 } 1739 1740 /* See uma.h */ 1741 uma_zone_t 1742 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, 1743 uma_init zinit, uma_fini zfini, uma_zone_t master) 1744 { 1745 struct uma_zctor_args args; 1746 1747 args.name = name; 1748 args.size = master->uz_keg->uk_size; 1749 args.ctor = ctor; 1750 args.dtor = dtor; 1751 args.uminit = zinit; 1752 args.fini = zfini; 1753 args.align = master->uz_keg->uk_align; 1754 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY; 1755 args.keg = master->uz_keg; 1756 1757 return (uma_zalloc_internal(zones, &args, M_WAITOK)); 1758 } 1759 1760 /* See uma.h */ 1761 void 1762 uma_zdestroy(uma_zone_t zone) 1763 { 1764 1765 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE); 1766 } 1767 1768 /* See uma.h */ 1769 void * 1770 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) 1771 { 1772 void *item; 1773 uma_cache_t cache; 1774 uma_bucket_t bucket; 1775 int cpu; 1776 int badness; 1777 1778 /* This is the fast path allocation */ 1779 #ifdef UMA_DEBUG_ALLOC_1 1780 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); 1781 #endif 1782 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread, 1783 zone->uz_name, flags); 1784 1785 if (!(flags & M_NOWAIT)) { 1786 KASSERT(curthread->td_intr_nesting_level == 0, 1787 ("malloc(M_WAITOK) in interrupt context")); 1788 if (nosleepwithlocks) { 1789 #ifdef WITNESS 1790 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, 1791 NULL, 1792 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT", 1793 zone->uz_name); 1794 #else 1795 badness = 1; 1796 #endif 1797 } else { 1798 badness = 0; 1799 #ifdef WITNESS 1800 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1801 "malloc(M_WAITOK) of \"%s\"", zone->uz_name); 1802 #endif 1803 } 1804 if (badness) { 1805 flags &= ~M_WAITOK; 1806 flags |= M_NOWAIT; 1807 } 1808 } 1809 1810 /* 1811 * If possible, allocate from the per-CPU cache. There are two 1812 * requirements for safe access to the per-CPU cache: (1) the thread 1813 * accessing the cache must not be preempted or yield during access, 1814 * and (2) the thread must not migrate CPUs without switching which 1815 * cache it accesses. We rely on a critical section to prevent 1816 * preemption and migration. We release the critical section in 1817 * order to acquire the zone mutex if we are unable to allocate from 1818 * the current cache; when we re-acquire the critical section, we 1819 * must detect and handle migration if it has occurred. 1820 */ 1821 zalloc_restart: 1822 critical_enter(); 1823 cpu = curcpu; 1824 cache = &zone->uz_cpu[cpu]; 1825 1826 zalloc_start: 1827 bucket = cache->uc_allocbucket; 1828 1829 if (bucket) { 1830 if (bucket->ub_cnt > 0) { 1831 bucket->ub_cnt--; 1832 item = bucket->ub_bucket[bucket->ub_cnt]; 1833 #ifdef INVARIANTS 1834 bucket->ub_bucket[bucket->ub_cnt] = NULL; 1835 #endif 1836 KASSERT(item != NULL, 1837 ("uma_zalloc: Bucket pointer mangled.")); 1838 cache->uc_allocs++; 1839 critical_exit(); 1840 #ifdef INVARIANTS 1841 ZONE_LOCK(zone); 1842 uma_dbg_alloc(zone, NULL, item); 1843 ZONE_UNLOCK(zone); 1844 #endif 1845 if (zone->uz_ctor != NULL) { 1846 if (zone->uz_ctor(item, zone->uz_keg->uk_size, 1847 udata, flags) != 0) { 1848 uma_zfree_internal(zone, item, udata, 1849 SKIP_DTOR, ZFREE_STATFAIL | 1850 ZFREE_STATFREE); 1851 return (NULL); 1852 } 1853 } 1854 if (flags & M_ZERO) 1855 bzero(item, zone->uz_keg->uk_size); 1856 return (item); 1857 } else if (cache->uc_freebucket) { 1858 /* 1859 * We have run out of items in our allocbucket. 1860 * See if we can switch with our free bucket. 1861 */ 1862 if (cache->uc_freebucket->ub_cnt > 0) { 1863 #ifdef UMA_DEBUG_ALLOC 1864 printf("uma_zalloc: Swapping empty with" 1865 " alloc.\n"); 1866 #endif 1867 bucket = cache->uc_freebucket; 1868 cache->uc_freebucket = cache->uc_allocbucket; 1869 cache->uc_allocbucket = bucket; 1870 1871 goto zalloc_start; 1872 } 1873 } 1874 } 1875 /* 1876 * Attempt to retrieve the item from the per-CPU cache has failed, so 1877 * we must go back to the zone. This requires the zone lock, so we 1878 * must drop the critical section, then re-acquire it when we go back 1879 * to the cache. Since the critical section is released, we may be 1880 * preempted or migrate. As such, make sure not to maintain any 1881 * thread-local state specific to the cache from prior to releasing 1882 * the critical section. 1883 */ 1884 critical_exit(); 1885 ZONE_LOCK(zone); 1886 critical_enter(); 1887 cpu = curcpu; 1888 cache = &zone->uz_cpu[cpu]; 1889 bucket = cache->uc_allocbucket; 1890 if (bucket != NULL) { 1891 if (bucket->ub_cnt > 0) { 1892 ZONE_UNLOCK(zone); 1893 goto zalloc_start; 1894 } 1895 bucket = cache->uc_freebucket; 1896 if (bucket != NULL && bucket->ub_cnt > 0) { 1897 ZONE_UNLOCK(zone); 1898 goto zalloc_start; 1899 } 1900 } 1901 1902 /* Since we have locked the zone we may as well send back our stats */ 1903 zone->uz_allocs += cache->uc_allocs; 1904 cache->uc_allocs = 0; 1905 zone->uz_frees += cache->uc_frees; 1906 cache->uc_frees = 0; 1907 1908 /* Our old one is now a free bucket */ 1909 if (cache->uc_allocbucket) { 1910 KASSERT(cache->uc_allocbucket->ub_cnt == 0, 1911 ("uma_zalloc_arg: Freeing a non free bucket.")); 1912 LIST_INSERT_HEAD(&zone->uz_free_bucket, 1913 cache->uc_allocbucket, ub_link); 1914 cache->uc_allocbucket = NULL; 1915 } 1916 1917 /* Check the free list for a new alloc bucket */ 1918 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { 1919 KASSERT(bucket->ub_cnt != 0, 1920 ("uma_zalloc_arg: Returning an empty bucket.")); 1921 1922 LIST_REMOVE(bucket, ub_link); 1923 cache->uc_allocbucket = bucket; 1924 ZONE_UNLOCK(zone); 1925 goto zalloc_start; 1926 } 1927 /* We are no longer associated with this CPU. */ 1928 critical_exit(); 1929 1930 /* Bump up our uz_count so we get here less */ 1931 if (zone->uz_count < BUCKET_MAX) 1932 zone->uz_count++; 1933 1934 /* 1935 * Now lets just fill a bucket and put it on the free list. If that 1936 * works we'll restart the allocation from the begining. 1937 */ 1938 if (uma_zalloc_bucket(zone, flags)) { 1939 ZONE_UNLOCK(zone); 1940 goto zalloc_restart; 1941 } 1942 ZONE_UNLOCK(zone); 1943 /* 1944 * We may not be able to get a bucket so return an actual item. 1945 */ 1946 #ifdef UMA_DEBUG 1947 printf("uma_zalloc_arg: Bucketzone returned NULL\n"); 1948 #endif 1949 1950 return (uma_zalloc_internal(zone, udata, flags)); 1951 } 1952 1953 static uma_slab_t 1954 uma_zone_slab(uma_zone_t zone, int flags) 1955 { 1956 uma_slab_t slab; 1957 uma_keg_t keg; 1958 1959 keg = zone->uz_keg; 1960 1961 /* 1962 * This is to prevent us from recursively trying to allocate 1963 * buckets. The problem is that if an allocation forces us to 1964 * grab a new bucket we will call page_alloc, which will go off 1965 * and cause the vm to allocate vm_map_entries. If we need new 1966 * buckets there too we will recurse in kmem_alloc and bad 1967 * things happen. So instead we return a NULL bucket, and make 1968 * the code that allocates buckets smart enough to deal with it 1969 * 1970 * XXX: While we want this protection for the bucket zones so that 1971 * recursion from the VM is handled (and the calling code that 1972 * allocates buckets knows how to deal with it), we do not want 1973 * to prevent allocation from the slab header zones (slabzone 1974 * and slabrefzone) if uk_recurse is not zero for them. The 1975 * reason is that it could lead to NULL being returned for 1976 * slab header allocations even in the M_WAITOK case, and the 1977 * caller can't handle that. 1978 */ 1979 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0) 1980 if ((zone != slabzone) && (zone != slabrefzone)) 1981 return (NULL); 1982 1983 slab = NULL; 1984 1985 for (;;) { 1986 /* 1987 * Find a slab with some space. Prefer slabs that are partially 1988 * used over those that are totally full. This helps to reduce 1989 * fragmentation. 1990 */ 1991 if (keg->uk_free != 0) { 1992 if (!LIST_EMPTY(&keg->uk_part_slab)) { 1993 slab = LIST_FIRST(&keg->uk_part_slab); 1994 } else { 1995 slab = LIST_FIRST(&keg->uk_free_slab); 1996 LIST_REMOVE(slab, us_link); 1997 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, 1998 us_link); 1999 } 2000 return (slab); 2001 } 2002 2003 /* 2004 * M_NOVM means don't ask at all! 2005 */ 2006 if (flags & M_NOVM) 2007 break; 2008 2009 if (keg->uk_maxpages && 2010 keg->uk_pages >= keg->uk_maxpages) { 2011 keg->uk_flags |= UMA_ZFLAG_FULL; 2012 2013 if (flags & M_NOWAIT) 2014 break; 2015 else 2016 msleep(keg, &keg->uk_lock, PVM, 2017 "zonelimit", 0); 2018 continue; 2019 } 2020 keg->uk_recurse++; 2021 slab = slab_zalloc(zone, flags); 2022 keg->uk_recurse--; 2023 2024 /* 2025 * If we got a slab here it's safe to mark it partially used 2026 * and return. We assume that the caller is going to remove 2027 * at least one item. 2028 */ 2029 if (slab) { 2030 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2031 return (slab); 2032 } 2033 /* 2034 * We might not have been able to get a slab but another cpu 2035 * could have while we were unlocked. Check again before we 2036 * fail. 2037 */ 2038 if (flags & M_NOWAIT) 2039 flags |= M_NOVM; 2040 } 2041 return (slab); 2042 } 2043 2044 static void * 2045 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab) 2046 { 2047 uma_keg_t keg; 2048 uma_slabrefcnt_t slabref; 2049 void *item; 2050 u_int8_t freei; 2051 2052 keg = zone->uz_keg; 2053 2054 freei = slab->us_firstfree; 2055 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2056 slabref = (uma_slabrefcnt_t)slab; 2057 slab->us_firstfree = slabref->us_freelist[freei].us_item; 2058 } else { 2059 slab->us_firstfree = slab->us_freelist[freei].us_item; 2060 } 2061 item = slab->us_data + (keg->uk_rsize * freei); 2062 2063 slab->us_freecount--; 2064 keg->uk_free--; 2065 #ifdef INVARIANTS 2066 uma_dbg_alloc(zone, slab, item); 2067 #endif 2068 /* Move this slab to the full list */ 2069 if (slab->us_freecount == 0) { 2070 LIST_REMOVE(slab, us_link); 2071 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link); 2072 } 2073 2074 return (item); 2075 } 2076 2077 static int 2078 uma_zalloc_bucket(uma_zone_t zone, int flags) 2079 { 2080 uma_bucket_t bucket; 2081 uma_slab_t slab; 2082 int16_t saved; 2083 int max, origflags = flags; 2084 2085 /* 2086 * Try this zone's free list first so we don't allocate extra buckets. 2087 */ 2088 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2089 KASSERT(bucket->ub_cnt == 0, 2090 ("uma_zalloc_bucket: Bucket on free list is not empty.")); 2091 LIST_REMOVE(bucket, ub_link); 2092 } else { 2093 int bflags; 2094 2095 bflags = (flags & ~M_ZERO); 2096 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2097 bflags |= M_NOVM; 2098 2099 ZONE_UNLOCK(zone); 2100 bucket = bucket_alloc(zone->uz_count, bflags); 2101 ZONE_LOCK(zone); 2102 } 2103 2104 if (bucket == NULL) 2105 return (0); 2106 2107 #ifdef SMP 2108 /* 2109 * This code is here to limit the number of simultaneous bucket fills 2110 * for any given zone to the number of per cpu caches in this zone. This 2111 * is done so that we don't allocate more memory than we really need. 2112 */ 2113 if (zone->uz_fills >= mp_ncpus) 2114 goto done; 2115 2116 #endif 2117 zone->uz_fills++; 2118 2119 max = MIN(bucket->ub_entries, zone->uz_count); 2120 /* Try to keep the buckets totally full */ 2121 saved = bucket->ub_cnt; 2122 while (bucket->ub_cnt < max && 2123 (slab = uma_zone_slab(zone, flags)) != NULL) { 2124 while (slab->us_freecount && bucket->ub_cnt < max) { 2125 bucket->ub_bucket[bucket->ub_cnt++] = 2126 uma_slab_alloc(zone, slab); 2127 } 2128 2129 /* Don't block on the next fill */ 2130 flags |= M_NOWAIT; 2131 } 2132 2133 /* 2134 * We unlock here because we need to call the zone's init. 2135 * It should be safe to unlock because the slab dealt with 2136 * above is already on the appropriate list within the keg 2137 * and the bucket we filled is not yet on any list, so we 2138 * own it. 2139 */ 2140 if (zone->uz_init != NULL) { 2141 int i; 2142 2143 ZONE_UNLOCK(zone); 2144 for (i = saved; i < bucket->ub_cnt; i++) 2145 if (zone->uz_init(bucket->ub_bucket[i], 2146 zone->uz_keg->uk_size, origflags) != 0) 2147 break; 2148 /* 2149 * If we couldn't initialize the whole bucket, put the 2150 * rest back onto the freelist. 2151 */ 2152 if (i != bucket->ub_cnt) { 2153 int j; 2154 2155 for (j = i; j < bucket->ub_cnt; j++) { 2156 uma_zfree_internal(zone, bucket->ub_bucket[j], 2157 NULL, SKIP_FINI, 0); 2158 #ifdef INVARIANTS 2159 bucket->ub_bucket[j] = NULL; 2160 #endif 2161 } 2162 bucket->ub_cnt = i; 2163 } 2164 ZONE_LOCK(zone); 2165 } 2166 2167 zone->uz_fills--; 2168 if (bucket->ub_cnt != 0) { 2169 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2170 bucket, ub_link); 2171 return (1); 2172 } 2173 #ifdef SMP 2174 done: 2175 #endif 2176 bucket_free(bucket); 2177 2178 return (0); 2179 } 2180 /* 2181 * Allocates an item for an internal zone 2182 * 2183 * Arguments 2184 * zone The zone to alloc for. 2185 * udata The data to be passed to the constructor. 2186 * flags M_WAITOK, M_NOWAIT, M_ZERO. 2187 * 2188 * Returns 2189 * NULL if there is no memory and M_NOWAIT is set 2190 * An item if successful 2191 */ 2192 2193 static void * 2194 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags) 2195 { 2196 uma_keg_t keg; 2197 uma_slab_t slab; 2198 void *item; 2199 2200 item = NULL; 2201 keg = zone->uz_keg; 2202 2203 #ifdef UMA_DEBUG_ALLOC 2204 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); 2205 #endif 2206 ZONE_LOCK(zone); 2207 2208 slab = uma_zone_slab(zone, flags); 2209 if (slab == NULL) { 2210 zone->uz_fails++; 2211 ZONE_UNLOCK(zone); 2212 return (NULL); 2213 } 2214 2215 item = uma_slab_alloc(zone, slab); 2216 2217 zone->uz_allocs++; 2218 2219 ZONE_UNLOCK(zone); 2220 2221 /* 2222 * We have to call both the zone's init (not the keg's init) 2223 * and the zone's ctor. This is because the item is going from 2224 * a keg slab directly to the user, and the user is expecting it 2225 * to be both zone-init'd as well as zone-ctor'd. 2226 */ 2227 if (zone->uz_init != NULL) { 2228 if (zone->uz_init(item, keg->uk_size, flags) != 0) { 2229 uma_zfree_internal(zone, item, udata, SKIP_FINI, 2230 ZFREE_STATFAIL | ZFREE_STATFREE); 2231 return (NULL); 2232 } 2233 } 2234 if (zone->uz_ctor != NULL) { 2235 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) { 2236 uma_zfree_internal(zone, item, udata, SKIP_DTOR, 2237 ZFREE_STATFAIL | ZFREE_STATFREE); 2238 return (NULL); 2239 } 2240 } 2241 if (flags & M_ZERO) 2242 bzero(item, keg->uk_size); 2243 2244 return (item); 2245 } 2246 2247 /* See uma.h */ 2248 void 2249 uma_zfree_arg(uma_zone_t zone, void *item, void *udata) 2250 { 2251 uma_keg_t keg; 2252 uma_cache_t cache; 2253 uma_bucket_t bucket; 2254 int bflags; 2255 int cpu; 2256 2257 keg = zone->uz_keg; 2258 2259 #ifdef UMA_DEBUG_ALLOC_1 2260 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); 2261 #endif 2262 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, 2263 zone->uz_name); 2264 2265 if (zone->uz_dtor) 2266 zone->uz_dtor(item, keg->uk_size, udata); 2267 #ifdef INVARIANTS 2268 ZONE_LOCK(zone); 2269 if (keg->uk_flags & UMA_ZONE_MALLOC) 2270 uma_dbg_free(zone, udata, item); 2271 else 2272 uma_dbg_free(zone, NULL, item); 2273 ZONE_UNLOCK(zone); 2274 #endif 2275 /* 2276 * The race here is acceptable. If we miss it we'll just have to wait 2277 * a little longer for the limits to be reset. 2278 */ 2279 if (keg->uk_flags & UMA_ZFLAG_FULL) 2280 goto zfree_internal; 2281 2282 /* 2283 * If possible, free to the per-CPU cache. There are two 2284 * requirements for safe access to the per-CPU cache: (1) the thread 2285 * accessing the cache must not be preempted or yield during access, 2286 * and (2) the thread must not migrate CPUs without switching which 2287 * cache it accesses. We rely on a critical section to prevent 2288 * preemption and migration. We release the critical section in 2289 * order to acquire the zone mutex if we are unable to free to the 2290 * current cache; when we re-acquire the critical section, we must 2291 * detect and handle migration if it has occurred. 2292 */ 2293 zfree_restart: 2294 critical_enter(); 2295 cpu = curcpu; 2296 cache = &zone->uz_cpu[cpu]; 2297 2298 zfree_start: 2299 bucket = cache->uc_freebucket; 2300 2301 if (bucket) { 2302 /* 2303 * Do we have room in our bucket? It is OK for this uz count 2304 * check to be slightly out of sync. 2305 */ 2306 2307 if (bucket->ub_cnt < bucket->ub_entries) { 2308 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, 2309 ("uma_zfree: Freeing to non free bucket index.")); 2310 bucket->ub_bucket[bucket->ub_cnt] = item; 2311 bucket->ub_cnt++; 2312 cache->uc_frees++; 2313 critical_exit(); 2314 return; 2315 } else if (cache->uc_allocbucket) { 2316 #ifdef UMA_DEBUG_ALLOC 2317 printf("uma_zfree: Swapping buckets.\n"); 2318 #endif 2319 /* 2320 * We have run out of space in our freebucket. 2321 * See if we can switch with our alloc bucket. 2322 */ 2323 if (cache->uc_allocbucket->ub_cnt < 2324 cache->uc_freebucket->ub_cnt) { 2325 bucket = cache->uc_freebucket; 2326 cache->uc_freebucket = cache->uc_allocbucket; 2327 cache->uc_allocbucket = bucket; 2328 goto zfree_start; 2329 } 2330 } 2331 } 2332 /* 2333 * We can get here for two reasons: 2334 * 2335 * 1) The buckets are NULL 2336 * 2) The alloc and free buckets are both somewhat full. 2337 * 2338 * We must go back the zone, which requires acquiring the zone lock, 2339 * which in turn means we must release and re-acquire the critical 2340 * section. Since the critical section is released, we may be 2341 * preempted or migrate. As such, make sure not to maintain any 2342 * thread-local state specific to the cache from prior to releasing 2343 * the critical section. 2344 */ 2345 critical_exit(); 2346 ZONE_LOCK(zone); 2347 critical_enter(); 2348 cpu = curcpu; 2349 cache = &zone->uz_cpu[cpu]; 2350 if (cache->uc_freebucket != NULL) { 2351 if (cache->uc_freebucket->ub_cnt < 2352 cache->uc_freebucket->ub_entries) { 2353 ZONE_UNLOCK(zone); 2354 goto zfree_start; 2355 } 2356 if (cache->uc_allocbucket != NULL && 2357 (cache->uc_allocbucket->ub_cnt < 2358 cache->uc_freebucket->ub_cnt)) { 2359 ZONE_UNLOCK(zone); 2360 goto zfree_start; 2361 } 2362 } 2363 2364 /* Since we have locked the zone we may as well send back our stats */ 2365 zone->uz_allocs += cache->uc_allocs; 2366 cache->uc_allocs = 0; 2367 zone->uz_frees += cache->uc_frees; 2368 cache->uc_frees = 0; 2369 2370 bucket = cache->uc_freebucket; 2371 cache->uc_freebucket = NULL; 2372 2373 /* Can we throw this on the zone full list? */ 2374 if (bucket != NULL) { 2375 #ifdef UMA_DEBUG_ALLOC 2376 printf("uma_zfree: Putting old bucket on the free list.\n"); 2377 #endif 2378 /* ub_cnt is pointing to the last free item */ 2379 KASSERT(bucket->ub_cnt != 0, 2380 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); 2381 LIST_INSERT_HEAD(&zone->uz_full_bucket, 2382 bucket, ub_link); 2383 } 2384 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { 2385 LIST_REMOVE(bucket, ub_link); 2386 ZONE_UNLOCK(zone); 2387 cache->uc_freebucket = bucket; 2388 goto zfree_start; 2389 } 2390 /* We are no longer associated with this CPU. */ 2391 critical_exit(); 2392 2393 /* And the zone.. */ 2394 ZONE_UNLOCK(zone); 2395 2396 #ifdef UMA_DEBUG_ALLOC 2397 printf("uma_zfree: Allocating new free bucket.\n"); 2398 #endif 2399 bflags = M_NOWAIT; 2400 2401 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY) 2402 bflags |= M_NOVM; 2403 bucket = bucket_alloc(zone->uz_count, bflags); 2404 if (bucket) { 2405 ZONE_LOCK(zone); 2406 LIST_INSERT_HEAD(&zone->uz_free_bucket, 2407 bucket, ub_link); 2408 ZONE_UNLOCK(zone); 2409 goto zfree_restart; 2410 } 2411 2412 /* 2413 * If nothing else caught this, we'll just do an internal free. 2414 */ 2415 zfree_internal: 2416 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL | 2417 ZFREE_STATFREE); 2418 2419 return; 2420 } 2421 2422 /* 2423 * Frees an item to an INTERNAL zone or allocates a free bucket 2424 * 2425 * Arguments: 2426 * zone The zone to free to 2427 * item The item we're freeing 2428 * udata User supplied data for the dtor 2429 * skip Skip dtors and finis 2430 */ 2431 static void 2432 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, 2433 enum zfreeskip skip, int flags) 2434 { 2435 uma_slab_t slab; 2436 uma_slabrefcnt_t slabref; 2437 uma_keg_t keg; 2438 u_int8_t *mem; 2439 u_int8_t freei; 2440 2441 keg = zone->uz_keg; 2442 2443 if (skip < SKIP_DTOR && zone->uz_dtor) 2444 zone->uz_dtor(item, keg->uk_size, udata); 2445 if (skip < SKIP_FINI && zone->uz_fini) 2446 zone->uz_fini(item, keg->uk_size); 2447 2448 ZONE_LOCK(zone); 2449 2450 if (flags & ZFREE_STATFAIL) 2451 zone->uz_fails++; 2452 if (flags & ZFREE_STATFREE) 2453 zone->uz_frees++; 2454 2455 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) { 2456 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); 2457 if (keg->uk_flags & UMA_ZONE_HASH) 2458 slab = hash_sfind(&keg->uk_hash, mem); 2459 else { 2460 mem += keg->uk_pgoff; 2461 slab = (uma_slab_t)mem; 2462 } 2463 } else { 2464 slab = (uma_slab_t)udata; 2465 } 2466 2467 /* Do we need to remove from any lists? */ 2468 if (slab->us_freecount+1 == keg->uk_ipers) { 2469 LIST_REMOVE(slab, us_link); 2470 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2471 } else if (slab->us_freecount == 0) { 2472 LIST_REMOVE(slab, us_link); 2473 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); 2474 } 2475 2476 /* Slab management stuff */ 2477 freei = ((unsigned long)item - (unsigned long)slab->us_data) 2478 / keg->uk_rsize; 2479 2480 #ifdef INVARIANTS 2481 if (!skip) 2482 uma_dbg_free(zone, slab, item); 2483 #endif 2484 2485 if (keg->uk_flags & UMA_ZONE_REFCNT) { 2486 slabref = (uma_slabrefcnt_t)slab; 2487 slabref->us_freelist[freei].us_item = slab->us_firstfree; 2488 } else { 2489 slab->us_freelist[freei].us_item = slab->us_firstfree; 2490 } 2491 slab->us_firstfree = freei; 2492 slab->us_freecount++; 2493 2494 /* Zone statistics */ 2495 keg->uk_free++; 2496 2497 if (keg->uk_flags & UMA_ZFLAG_FULL) { 2498 if (keg->uk_pages < keg->uk_maxpages) 2499 keg->uk_flags &= ~UMA_ZFLAG_FULL; 2500 2501 /* We can handle one more allocation */ 2502 wakeup_one(keg); 2503 } 2504 2505 ZONE_UNLOCK(zone); 2506 } 2507 2508 /* See uma.h */ 2509 void 2510 uma_zone_set_max(uma_zone_t zone, int nitems) 2511 { 2512 uma_keg_t keg; 2513 2514 keg = zone->uz_keg; 2515 ZONE_LOCK(zone); 2516 if (keg->uk_ppera > 1) 2517 keg->uk_maxpages = nitems * keg->uk_ppera; 2518 else 2519 keg->uk_maxpages = nitems / keg->uk_ipers; 2520 2521 if (keg->uk_maxpages * keg->uk_ipers < nitems) 2522 keg->uk_maxpages++; 2523 2524 ZONE_UNLOCK(zone); 2525 } 2526 2527 /* See uma.h */ 2528 void 2529 uma_zone_set_init(uma_zone_t zone, uma_init uminit) 2530 { 2531 ZONE_LOCK(zone); 2532 KASSERT(zone->uz_keg->uk_pages == 0, 2533 ("uma_zone_set_init on non-empty keg")); 2534 zone->uz_keg->uk_init = uminit; 2535 ZONE_UNLOCK(zone); 2536 } 2537 2538 /* See uma.h */ 2539 void 2540 uma_zone_set_fini(uma_zone_t zone, uma_fini fini) 2541 { 2542 ZONE_LOCK(zone); 2543 KASSERT(zone->uz_keg->uk_pages == 0, 2544 ("uma_zone_set_fini on non-empty keg")); 2545 zone->uz_keg->uk_fini = fini; 2546 ZONE_UNLOCK(zone); 2547 } 2548 2549 /* See uma.h */ 2550 void 2551 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) 2552 { 2553 ZONE_LOCK(zone); 2554 KASSERT(zone->uz_keg->uk_pages == 0, 2555 ("uma_zone_set_zinit on non-empty keg")); 2556 zone->uz_init = zinit; 2557 ZONE_UNLOCK(zone); 2558 } 2559 2560 /* See uma.h */ 2561 void 2562 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) 2563 { 2564 ZONE_LOCK(zone); 2565 KASSERT(zone->uz_keg->uk_pages == 0, 2566 ("uma_zone_set_zfini on non-empty keg")); 2567 zone->uz_fini = zfini; 2568 ZONE_UNLOCK(zone); 2569 } 2570 2571 /* See uma.h */ 2572 /* XXX uk_freef is not actually used with the zone locked */ 2573 void 2574 uma_zone_set_freef(uma_zone_t zone, uma_free freef) 2575 { 2576 ZONE_LOCK(zone); 2577 zone->uz_keg->uk_freef = freef; 2578 ZONE_UNLOCK(zone); 2579 } 2580 2581 /* See uma.h */ 2582 /* XXX uk_allocf is not actually used with the zone locked */ 2583 void 2584 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) 2585 { 2586 ZONE_LOCK(zone); 2587 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; 2588 zone->uz_keg->uk_allocf = allocf; 2589 ZONE_UNLOCK(zone); 2590 } 2591 2592 /* See uma.h */ 2593 int 2594 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) 2595 { 2596 uma_keg_t keg; 2597 vm_offset_t kva; 2598 int pages; 2599 2600 keg = zone->uz_keg; 2601 pages = count / keg->uk_ipers; 2602 2603 if (pages * keg->uk_ipers < count) 2604 pages++; 2605 2606 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); 2607 2608 if (kva == 0) 2609 return (0); 2610 if (obj == NULL) { 2611 obj = vm_object_allocate(OBJT_DEFAULT, 2612 pages); 2613 } else { 2614 VM_OBJECT_LOCK_INIT(obj, "uma object"); 2615 _vm_object_allocate(OBJT_DEFAULT, 2616 pages, obj); 2617 } 2618 ZONE_LOCK(zone); 2619 keg->uk_kva = kva; 2620 keg->uk_obj = obj; 2621 keg->uk_maxpages = pages; 2622 keg->uk_allocf = obj_alloc; 2623 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; 2624 ZONE_UNLOCK(zone); 2625 return (1); 2626 } 2627 2628 /* See uma.h */ 2629 void 2630 uma_prealloc(uma_zone_t zone, int items) 2631 { 2632 int slabs; 2633 uma_slab_t slab; 2634 uma_keg_t keg; 2635 2636 keg = zone->uz_keg; 2637 ZONE_LOCK(zone); 2638 slabs = items / keg->uk_ipers; 2639 if (slabs * keg->uk_ipers < items) 2640 slabs++; 2641 while (slabs > 0) { 2642 slab = slab_zalloc(zone, M_WAITOK); 2643 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); 2644 slabs--; 2645 } 2646 ZONE_UNLOCK(zone); 2647 } 2648 2649 /* See uma.h */ 2650 u_int32_t * 2651 uma_find_refcnt(uma_zone_t zone, void *item) 2652 { 2653 uma_slabrefcnt_t slabref; 2654 uma_keg_t keg; 2655 u_int32_t *refcnt; 2656 int idx; 2657 2658 keg = zone->uz_keg; 2659 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & 2660 (~UMA_SLAB_MASK)); 2661 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, 2662 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); 2663 idx = ((unsigned long)item - (unsigned long)slabref->us_data) 2664 / keg->uk_rsize; 2665 refcnt = &slabref->us_freelist[idx].us_refcnt; 2666 return refcnt; 2667 } 2668 2669 /* See uma.h */ 2670 void 2671 uma_reclaim(void) 2672 { 2673 #ifdef UMA_DEBUG 2674 printf("UMA: vm asked us to release pages!\n"); 2675 #endif 2676 bucket_enable(); 2677 zone_foreach(zone_drain); 2678 /* 2679 * Some slabs may have been freed but this zone will be visited early 2680 * we visit again so that we can free pages that are empty once other 2681 * zones are drained. We have to do the same for buckets. 2682 */ 2683 zone_drain(slabzone); 2684 zone_drain(slabrefzone); 2685 bucket_zone_drain(); 2686 } 2687 2688 void * 2689 uma_large_malloc(int size, int wait) 2690 { 2691 void *mem; 2692 uma_slab_t slab; 2693 u_int8_t flags; 2694 2695 slab = uma_zalloc_internal(slabzone, NULL, wait); 2696 if (slab == NULL) 2697 return (NULL); 2698 mem = page_alloc(NULL, size, &flags, wait); 2699 if (mem) { 2700 vsetslab((vm_offset_t)mem, slab); 2701 slab->us_data = mem; 2702 slab->us_flags = flags | UMA_SLAB_MALLOC; 2703 slab->us_size = size; 2704 } else { 2705 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, 2706 ZFREE_STATFAIL | ZFREE_STATFREE); 2707 } 2708 2709 return (mem); 2710 } 2711 2712 void 2713 uma_large_free(uma_slab_t slab) 2714 { 2715 vsetobj((vm_offset_t)slab->us_data, kmem_object); 2716 page_free(slab->us_data, slab->us_size, slab->us_flags); 2717 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); 2718 } 2719 2720 void 2721 uma_print_stats(void) 2722 { 2723 zone_foreach(uma_print_zone); 2724 } 2725 2726 static void 2727 slab_print(uma_slab_t slab) 2728 { 2729 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", 2730 slab->us_keg, slab->us_data, slab->us_freecount, 2731 slab->us_firstfree); 2732 } 2733 2734 static void 2735 cache_print(uma_cache_t cache) 2736 { 2737 printf("alloc: %p(%d), free: %p(%d)\n", 2738 cache->uc_allocbucket, 2739 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0, 2740 cache->uc_freebucket, 2741 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0); 2742 } 2743 2744 void 2745 uma_print_zone(uma_zone_t zone) 2746 { 2747 uma_cache_t cache; 2748 uma_keg_t keg; 2749 uma_slab_t slab; 2750 int i; 2751 2752 keg = zone->uz_keg; 2753 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", 2754 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags, 2755 keg->uk_ipers, keg->uk_ppera, 2756 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free); 2757 printf("Part slabs:\n"); 2758 LIST_FOREACH(slab, &keg->uk_part_slab, us_link) 2759 slab_print(slab); 2760 printf("Free slabs:\n"); 2761 LIST_FOREACH(slab, &keg->uk_free_slab, us_link) 2762 slab_print(slab); 2763 printf("Full slabs:\n"); 2764 LIST_FOREACH(slab, &keg->uk_full_slab, us_link) 2765 slab_print(slab); 2766 for (i = 0; i <= mp_maxid; i++) { 2767 if (CPU_ABSENT(i)) 2768 continue; 2769 cache = &zone->uz_cpu[i]; 2770 printf("CPU %d Cache:\n", i); 2771 cache_print(cache); 2772 } 2773 } 2774 2775 /* 2776 * Generate statistics across both the zone and its per-cpu cache's. Return 2777 * desired statistics if the pointer is non-NULL for that statistic. 2778 * 2779 * Note: does not update the zone statistics, as it can't safely clear the 2780 * per-CPU cache statistic. 2781 * 2782 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't 2783 * safe from off-CPU; we should modify the caches to track this information 2784 * directly so that we don't have to. 2785 */ 2786 static void 2787 uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp, 2788 u_int64_t *freesp) 2789 { 2790 uma_cache_t cache; 2791 u_int64_t allocs, frees; 2792 int cachefree, cpu; 2793 2794 allocs = frees = 0; 2795 cachefree = 0; 2796 for (cpu = 0; cpu <= mp_maxid; cpu++) { 2797 if (CPU_ABSENT(cpu)) 2798 continue; 2799 cache = &z->uz_cpu[cpu]; 2800 if (cache->uc_allocbucket != NULL) 2801 cachefree += cache->uc_allocbucket->ub_cnt; 2802 if (cache->uc_freebucket != NULL) 2803 cachefree += cache->uc_freebucket->ub_cnt; 2804 allocs += cache->uc_allocs; 2805 frees += cache->uc_frees; 2806 } 2807 allocs += z->uz_allocs; 2808 frees += z->uz_frees; 2809 if (cachefreep != NULL) 2810 *cachefreep = cachefree; 2811 if (allocsp != NULL) 2812 *allocsp = allocs; 2813 if (freesp != NULL) 2814 *freesp = frees; 2815 } 2816 2817 /* 2818 * Sysctl handler for vm.zone 2819 * 2820 * stolen from vm_zone.c 2821 */ 2822 static int 2823 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 2824 { 2825 int error, len, cnt; 2826 const int linesize = 128; /* conservative */ 2827 int totalfree; 2828 char *tmpbuf, *offset; 2829 uma_zone_t z; 2830 uma_keg_t zk; 2831 char *p; 2832 int cachefree; 2833 uma_bucket_t bucket; 2834 u_int64_t allocs, frees; 2835 2836 cnt = 0; 2837 mtx_lock(&uma_mtx); 2838 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2839 LIST_FOREACH(z, &zk->uk_zones, uz_link) 2840 cnt++; 2841 } 2842 mtx_unlock(&uma_mtx); 2843 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, 2844 M_TEMP, M_WAITOK); 2845 len = snprintf(tmpbuf, linesize, 2846 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); 2847 if (cnt == 0) 2848 tmpbuf[len - 1] = '\0'; 2849 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); 2850 if (error || cnt == 0) 2851 goto out; 2852 offset = tmpbuf; 2853 mtx_lock(&uma_mtx); 2854 LIST_FOREACH(zk, &uma_kegs, uk_link) { 2855 LIST_FOREACH(z, &zk->uk_zones, uz_link) { 2856 if (cnt == 0) /* list may have changed size */ 2857 break; 2858 ZONE_LOCK(z); 2859 cachefree = 0; 2860 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) { 2861 uma_zone_sumstat(z, &cachefree, &allocs, &frees); 2862 } else { 2863 allocs = z->uz_allocs; 2864 frees = z->uz_frees; 2865 } 2866 2867 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) { 2868 cachefree += bucket->ub_cnt; 2869 } 2870 totalfree = zk->uk_free + cachefree; 2871 len = snprintf(offset, linesize, 2872 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", 2873 z->uz_name, zk->uk_size, 2874 zk->uk_maxpages * zk->uk_ipers, 2875 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree, 2876 totalfree, 2877 (unsigned long long)allocs); 2878 ZONE_UNLOCK(z); 2879 for (p = offset + 12; p > offset && *p == ' '; --p) 2880 /* nothing */ ; 2881 p[1] = ':'; 2882 cnt--; 2883 offset += len; 2884 } 2885 } 2886 mtx_unlock(&uma_mtx); 2887 *offset++ = '\0'; 2888 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); 2889 out: 2890 FREE(tmpbuf, M_TEMP); 2891 return (error); 2892 } 2893 2894 static int 2895 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) 2896 { 2897 uma_keg_t kz; 2898 uma_zone_t z; 2899 int count; 2900 2901 count = 0; 2902 mtx_lock(&uma_mtx); 2903 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2904 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2905 count++; 2906 } 2907 mtx_unlock(&uma_mtx); 2908 return (sysctl_handle_int(oidp, &count, 0, req)); 2909 } 2910 2911 static int 2912 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) 2913 { 2914 struct uma_stream_header ush; 2915 struct uma_type_header uth; 2916 struct uma_percpu_stat ups; 2917 uma_bucket_t bucket; 2918 struct sbuf sbuf; 2919 uma_cache_t cache; 2920 uma_keg_t kz; 2921 uma_zone_t z; 2922 char *buffer; 2923 int buflen, count, error, i; 2924 2925 mtx_lock(&uma_mtx); 2926 restart: 2927 mtx_assert(&uma_mtx, MA_OWNED); 2928 count = 0; 2929 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2930 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2931 count++; 2932 } 2933 mtx_unlock(&uma_mtx); 2934 2935 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) * 2936 (mp_maxid + 1)) + 1; 2937 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO); 2938 2939 mtx_lock(&uma_mtx); 2940 i = 0; 2941 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2942 LIST_FOREACH(z, &kz->uk_zones, uz_link) 2943 i++; 2944 } 2945 if (i > count) { 2946 free(buffer, M_TEMP); 2947 goto restart; 2948 } 2949 count = i; 2950 2951 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN); 2952 2953 /* 2954 * Insert stream header. 2955 */ 2956 bzero(&ush, sizeof(ush)); 2957 ush.ush_version = UMA_STREAM_VERSION; 2958 ush.ush_maxcpus = (mp_maxid + 1); 2959 ush.ush_count = count; 2960 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) { 2961 mtx_unlock(&uma_mtx); 2962 error = ENOMEM; 2963 goto out; 2964 } 2965 2966 LIST_FOREACH(kz, &uma_kegs, uk_link) { 2967 LIST_FOREACH(z, &kz->uk_zones, uz_link) { 2968 bzero(&uth, sizeof(uth)); 2969 ZONE_LOCK(z); 2970 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); 2971 uth.uth_align = kz->uk_align; 2972 uth.uth_pages = kz->uk_pages; 2973 uth.uth_keg_free = kz->uk_free; 2974 uth.uth_size = kz->uk_size; 2975 uth.uth_rsize = kz->uk_rsize; 2976 uth.uth_maxpages = kz->uk_maxpages; 2977 if (kz->uk_ppera > 1) 2978 uth.uth_limit = kz->uk_maxpages / 2979 kz->uk_ppera; 2980 else 2981 uth.uth_limit = kz->uk_maxpages * 2982 kz->uk_ipers; 2983 2984 /* 2985 * A zone is secondary is it is not the first entry 2986 * on the keg's zone list. 2987 */ 2988 if ((kz->uk_flags & UMA_ZONE_SECONDARY) && 2989 (LIST_FIRST(&kz->uk_zones) != z)) 2990 uth.uth_zone_flags = UTH_ZONE_SECONDARY; 2991 2992 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) 2993 uth.uth_zone_free += bucket->ub_cnt; 2994 uth.uth_allocs = z->uz_allocs; 2995 uth.uth_frees = z->uz_frees; 2996 uth.uth_fails = z->uz_fails; 2997 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) { 2998 ZONE_UNLOCK(z); 2999 mtx_unlock(&uma_mtx); 3000 error = ENOMEM; 3001 goto out; 3002 } 3003 /* 3004 * While it is not normally safe to access the cache 3005 * bucket pointers while not on the CPU that owns the 3006 * cache, we only allow the pointers to be exchanged 3007 * without the zone lock held, not invalidated, so 3008 * accept the possible race associated with bucket 3009 * exchange during monitoring. 3010 */ 3011 for (i = 0; i < (mp_maxid + 1); i++) { 3012 bzero(&ups, sizeof(ups)); 3013 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) 3014 goto skip; 3015 cache = &z->uz_cpu[i]; 3016 if (cache->uc_allocbucket != NULL) 3017 ups.ups_cache_free += 3018 cache->uc_allocbucket->ub_cnt; 3019 if (cache->uc_freebucket != NULL) 3020 ups.ups_cache_free += 3021 cache->uc_freebucket->ub_cnt; 3022 ups.ups_allocs = cache->uc_allocs; 3023 ups.ups_frees = cache->uc_frees; 3024 skip: 3025 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) { 3026 ZONE_UNLOCK(z); 3027 mtx_unlock(&uma_mtx); 3028 error = ENOMEM; 3029 goto out; 3030 } 3031 } 3032 ZONE_UNLOCK(z); 3033 } 3034 } 3035 mtx_unlock(&uma_mtx); 3036 sbuf_finish(&sbuf); 3037 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 3038 out: 3039 free(buffer, M_TEMP); 3040 return (error); 3041 } 3042