1 /* 2 * Copyright (c) 1987, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_vm.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/vmmeter.h> 47 #include <sys/proc.h> 48 #include <sys/sysctl.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_param.h> 54 #include <vm/vm_kern.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_page.h> 58 #include <vm/uma.h> 59 #include <vm/uma_int.h> 60 #include <vm/uma_dbg.h> 61 62 #if defined(INVARIANTS) && defined(__i386__) 63 #include <machine/cpu.h> 64 #endif 65 66 /* 67 * When realloc() is called, if the new size is sufficiently smaller than 68 * the old size, realloc() will allocate a new, smaller block to avoid 69 * wasting memory. 'Sufficiently smaller' is defined as: newsize <= 70 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. 71 */ 72 #ifndef REALLOC_FRACTION 73 #define REALLOC_FRACTION 1 /* new block if <= half the size */ 74 #endif 75 76 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); 77 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 78 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 79 80 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 81 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); 82 83 static void kmeminit(void *); 84 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) 85 86 static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); 87 88 static struct malloc_type *kmemstatistics; 89 static char *kmembase; 90 static char *kmemlimit; 91 92 #define KMEM_ZSHIFT 4 93 #define KMEM_ZBASE 16 94 #define KMEM_ZMASK (KMEM_ZBASE - 1) 95 96 #define KMEM_ZMAX 65536 97 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) 98 static u_int8_t kmemsize[KMEM_ZSIZE + 1]; 99 100 /* These won't be powers of two for long */ 101 struct { 102 int kz_size; 103 char *kz_name; 104 uma_zone_t kz_zone; 105 } kmemzones[] = { 106 {16, "16", NULL}, 107 {32, "32", NULL}, 108 {64, "64", NULL}, 109 {128, "128", NULL}, 110 {256, "256", NULL}, 111 {512, "512", NULL}, 112 {1024, "1024", NULL}, 113 {2048, "2048", NULL}, 114 {4096, "4096", NULL}, 115 {8192, "8192", NULL}, 116 {16384, "16384", NULL}, 117 {32768, "32768", NULL}, 118 {65536, "65536", NULL}, 119 {0, NULL}, 120 }; 121 122 u_int vm_kmem_size; 123 124 /* 125 * The malloc_mtx protects the kmemstatistics linked list. 126 */ 127 128 struct mtx malloc_mtx; 129 130 #ifdef MALLOC_PROFILE 131 uint64_t krequests[KMEM_ZSIZE + 1]; 132 133 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS); 134 #endif 135 136 static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS); 137 138 /* time_uptime of last malloc(9) failure */ 139 static time_t t_malloc_fail; 140 141 #ifdef MALLOC_MAKE_FAILURES 142 /* 143 * Causes malloc failures every (n) mallocs with M_NOWAIT. If set to 0, 144 * doesn't cause failures. 145 */ 146 SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0, 147 "Kernel malloc debugging options"); 148 149 static int malloc_failure_rate; 150 static int malloc_nowait_count; 151 static int malloc_failure_count; 152 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RW, 153 &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail"); 154 TUNABLE_INT("debug.malloc.failure_rate", &malloc_failure_rate); 155 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD, 156 &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures"); 157 #endif 158 159 int 160 malloc_last_fail(void) 161 { 162 163 return (time_uptime - t_malloc_fail); 164 } 165 166 /* 167 * malloc: 168 * 169 * Allocate a block of memory. 170 * 171 * If M_NOWAIT is set, this routine will not block and return NULL if 172 * the allocation fails. 173 */ 174 void * 175 malloc(size, type, flags) 176 unsigned long size; 177 struct malloc_type *type; 178 int flags; 179 { 180 int indx; 181 caddr_t va; 182 uma_zone_t zone; 183 #ifdef DIAGNOSTIC 184 unsigned long osize = size; 185 #endif 186 register struct malloc_type *ksp = type; 187 188 #ifdef INVARIANTS 189 /* 190 * To make sure that WAITOK or NOWAIT is set, but not more than 191 * one, and check against the API botches that are common. 192 */ 193 indx = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT); 194 if (indx != M_NOWAIT && indx != M_WAITOK) { 195 static struct timeval lasterr; 196 static int curerr, once; 197 if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) { 198 printf("Bad malloc flags: %x\n", indx); 199 backtrace(); 200 flags |= M_WAITOK; 201 once++; 202 } 203 } 204 #endif 205 #if 0 206 if (size == 0) 207 Debugger("zero size malloc"); 208 #endif 209 #ifdef MALLOC_MAKE_FAILURES 210 if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) { 211 atomic_add_int(&malloc_nowait_count, 1); 212 if ((malloc_nowait_count % malloc_failure_rate) == 0) { 213 atomic_add_int(&malloc_failure_count, 1); 214 t_malloc_fail = time_uptime; 215 return (NULL); 216 } 217 } 218 #endif 219 if (flags & M_WAITOK) 220 KASSERT(curthread->td_intr_nesting_level == 0, 221 ("malloc(M_WAITOK) in interrupt context")); 222 if (size <= KMEM_ZMAX) { 223 if (size & KMEM_ZMASK) 224 size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; 225 indx = kmemsize[size >> KMEM_ZSHIFT]; 226 zone = kmemzones[indx].kz_zone; 227 #ifdef MALLOC_PROFILE 228 krequests[size >> KMEM_ZSHIFT]++; 229 #endif 230 va = uma_zalloc(zone, flags); 231 mtx_lock(&ksp->ks_mtx); 232 if (va == NULL) 233 goto out; 234 235 ksp->ks_size |= 1 << indx; 236 size = zone->uz_size; 237 } else { 238 size = roundup(size, PAGE_SIZE); 239 zone = NULL; 240 va = uma_large_malloc(size, flags); 241 mtx_lock(&ksp->ks_mtx); 242 if (va == NULL) 243 goto out; 244 } 245 ksp->ks_memuse += size; 246 ksp->ks_inuse++; 247 out: 248 ksp->ks_calls++; 249 if (ksp->ks_memuse > ksp->ks_maxused) 250 ksp->ks_maxused = ksp->ks_memuse; 251 252 mtx_unlock(&ksp->ks_mtx); 253 if (!(flags & M_NOWAIT)) 254 KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL")); 255 if (va == NULL) { 256 t_malloc_fail = time_uptime; 257 } 258 #ifdef DIAGNOSTIC 259 if (!(flags & M_ZERO)) { 260 memset(va, 0x70, osize); 261 } 262 #endif 263 return ((void *) va); 264 } 265 266 /* 267 * free: 268 * 269 * Free a block of memory allocated by malloc. 270 * 271 * This routine may not block. 272 */ 273 void 274 free(addr, type) 275 void *addr; 276 struct malloc_type *type; 277 { 278 register struct malloc_type *ksp = type; 279 uma_slab_t slab; 280 u_long size; 281 282 /* free(NULL, ...) does nothing */ 283 if (addr == NULL) 284 return; 285 286 KASSERT(ksp->ks_memuse > 0, 287 ("malloc(9)/free(9) confusion.\n%s", 288 "Probably freeing with wrong type, but maybe not here.")); 289 size = 0; 290 291 slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); 292 293 if (slab == NULL) 294 panic("free: address %p(%p) has not been allocated.\n", 295 addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); 296 297 298 if (!(slab->us_flags & UMA_SLAB_MALLOC)) { 299 #ifdef INVARIANTS 300 struct malloc_type **mtp = addr; 301 #endif 302 size = slab->us_zone->uz_size; 303 #ifdef INVARIANTS 304 /* 305 * Cache a pointer to the malloc_type that most recently freed 306 * this memory here. This way we know who is most likely to 307 * have stepped on it later. 308 * 309 * This code assumes that size is a multiple of 8 bytes for 310 * 64 bit machines 311 */ 312 mtp = (struct malloc_type **) 313 ((unsigned long)mtp & ~UMA_ALIGN_PTR); 314 mtp += (size - sizeof(struct malloc_type *)) / 315 sizeof(struct malloc_type *); 316 *mtp = type; 317 #endif 318 uma_zfree_arg(slab->us_zone, addr, slab); 319 } else { 320 size = slab->us_size; 321 uma_large_free(slab); 322 } 323 mtx_lock(&ksp->ks_mtx); 324 KASSERT(size <= ksp->ks_memuse, 325 ("malloc(9)/free(9) confusion.\n%s", 326 "Probably freeing with wrong type, but maybe not here.")); 327 ksp->ks_memuse -= size; 328 ksp->ks_inuse--; 329 mtx_unlock(&ksp->ks_mtx); 330 } 331 332 /* 333 * realloc: change the size of a memory block 334 */ 335 void * 336 realloc(addr, size, type, flags) 337 void *addr; 338 unsigned long size; 339 struct malloc_type *type; 340 int flags; 341 { 342 uma_slab_t slab; 343 unsigned long alloc; 344 void *newaddr; 345 346 /* realloc(NULL, ...) is equivalent to malloc(...) */ 347 if (addr == NULL) 348 return (malloc(size, type, flags)); 349 350 slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK)); 351 352 /* Sanity check */ 353 KASSERT(slab != NULL, 354 ("realloc: address %p out of range", (void *)addr)); 355 356 /* Get the size of the original block */ 357 if (slab->us_zone) 358 alloc = slab->us_zone->uz_size; 359 else 360 alloc = slab->us_size; 361 362 /* Reuse the original block if appropriate */ 363 if (size <= alloc 364 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) 365 return (addr); 366 367 /* Allocate a new, bigger (or smaller) block */ 368 if ((newaddr = malloc(size, type, flags)) == NULL) 369 return (NULL); 370 371 /* Copy over original contents */ 372 bcopy(addr, newaddr, min(size, alloc)); 373 free(addr, type); 374 return (newaddr); 375 } 376 377 /* 378 * reallocf: same as realloc() but free memory on failure. 379 */ 380 void * 381 reallocf(addr, size, type, flags) 382 void *addr; 383 unsigned long size; 384 struct malloc_type *type; 385 int flags; 386 { 387 void *mem; 388 389 if ((mem = realloc(addr, size, type, flags)) == NULL) 390 free(addr, type); 391 return (mem); 392 } 393 394 /* 395 * Initialize the kernel memory allocator 396 */ 397 /* ARGSUSED*/ 398 static void 399 kmeminit(dummy) 400 void *dummy; 401 { 402 u_int8_t indx; 403 u_long npg; 404 u_long mem_size; 405 int i; 406 407 mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); 408 409 /* 410 * Try to auto-tune the kernel memory size, so that it is 411 * more applicable for a wider range of machine sizes. 412 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while 413 * a VM_KMEM_SIZE of 12MB is a fair compromise. The 414 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space 415 * available, and on an X86 with a total KVA space of 256MB, 416 * try to keep VM_KMEM_SIZE_MAX at 80MB or below. 417 * 418 * Note that the kmem_map is also used by the zone allocator, 419 * so make sure that there is enough space. 420 */ 421 vm_kmem_size = VM_KMEM_SIZE; 422 mem_size = cnt.v_page_count * PAGE_SIZE; 423 424 #if defined(VM_KMEM_SIZE_SCALE) 425 if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size) 426 vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE; 427 #endif 428 429 #if defined(VM_KMEM_SIZE_MAX) 430 if (vm_kmem_size >= VM_KMEM_SIZE_MAX) 431 vm_kmem_size = VM_KMEM_SIZE_MAX; 432 #endif 433 434 /* Allow final override from the kernel environment */ 435 TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size); 436 437 /* 438 * Limit kmem virtual size to twice the physical memory. 439 * This allows for kmem map sparseness, but limits the size 440 * to something sane. Be careful to not overflow the 32bit 441 * ints while doing the check. 442 */ 443 if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE)) 444 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE; 445 446 /* 447 * In mbuf_init(), we set up submaps for mbufs and clusters, in which 448 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), 449 * respectively. Mathematically, this means that what we do here may 450 * amount to slightly more address space than we need for the submaps, 451 * but it never hurts to have an extra page in kmem_map. 452 */ 453 npg = (nmbufs*MSIZE + nmbclusters*MCLBYTES + vm_kmem_size) / PAGE_SIZE; 454 455 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, 456 (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); 457 kmem_map->system_map = 1; 458 459 uma_startup2(); 460 461 for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { 462 int size = kmemzones[indx].kz_size; 463 char *name = kmemzones[indx].kz_name; 464 465 kmemzones[indx].kz_zone = uma_zcreate(name, size, 466 #ifdef INVARIANTS 467 mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, 468 #else 469 NULL, NULL, NULL, NULL, 470 #endif 471 UMA_ALIGN_PTR, UMA_ZONE_MALLOC); 472 473 for (;i <= size; i+= KMEM_ZBASE) 474 kmemsize[i >> KMEM_ZSHIFT] = indx; 475 476 } 477 } 478 479 void 480 malloc_init(data) 481 void *data; 482 { 483 struct malloc_type *type = (struct malloc_type *)data; 484 485 mtx_lock(&malloc_mtx); 486 if (type->ks_magic != M_MAGIC) 487 panic("malloc type lacks magic"); 488 489 if (cnt.v_page_count == 0) 490 panic("malloc_init not allowed before vm init"); 491 492 if (type->ks_next != NULL) 493 return; 494 495 type->ks_next = kmemstatistics; 496 kmemstatistics = type; 497 mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF); 498 mtx_unlock(&malloc_mtx); 499 } 500 501 void 502 malloc_uninit(data) 503 void *data; 504 { 505 struct malloc_type *type = (struct malloc_type *)data; 506 struct malloc_type *t; 507 508 mtx_lock(&malloc_mtx); 509 mtx_lock(&type->ks_mtx); 510 if (type->ks_magic != M_MAGIC) 511 panic("malloc type lacks magic"); 512 513 if (cnt.v_page_count == 0) 514 panic("malloc_uninit not allowed before vm init"); 515 516 if (type == kmemstatistics) 517 kmemstatistics = type->ks_next; 518 else { 519 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { 520 if (t->ks_next == type) { 521 t->ks_next = type->ks_next; 522 break; 523 } 524 } 525 } 526 type->ks_next = NULL; 527 mtx_destroy(&type->ks_mtx); 528 mtx_unlock(&malloc_mtx); 529 } 530 531 static int 532 sysctl_kern_malloc(SYSCTL_HANDLER_ARGS) 533 { 534 struct malloc_type *type; 535 int linesize = 128; 536 int curline; 537 int bufsize; 538 int first; 539 int error; 540 char *buf; 541 char *p; 542 int cnt; 543 int len; 544 int i; 545 546 cnt = 0; 547 548 mtx_lock(&malloc_mtx); 549 for (type = kmemstatistics; type != NULL; type = type->ks_next) 550 cnt++; 551 552 mtx_unlock(&malloc_mtx); 553 bufsize = linesize * (cnt + 1); 554 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 555 mtx_lock(&malloc_mtx); 556 557 len = snprintf(p, linesize, 558 "\n Type InUse MemUse HighUse Requests Size(s)\n"); 559 p += len; 560 561 for (type = kmemstatistics; cnt != 0 && type != NULL; 562 type = type->ks_next, cnt--) { 563 if (type->ks_calls == 0) 564 continue; 565 566 curline = linesize - 2; /* Leave room for the \n */ 567 len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu", 568 type->ks_shortdesc, 569 type->ks_inuse, 570 (type->ks_memuse + 1023) / 1024, 571 (type->ks_maxused + 1023) / 1024, 572 (long long unsigned)type->ks_calls); 573 curline -= len; 574 p += len; 575 576 first = 1; 577 for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1; 578 i++) { 579 if (type->ks_size & (1 << i)) { 580 if (first) 581 len = snprintf(p, curline, " "); 582 else 583 len = snprintf(p, curline, ","); 584 curline -= len; 585 p += len; 586 587 len = snprintf(p, curline, 588 "%s", kmemzones[i].kz_name); 589 curline -= len; 590 p += len; 591 592 first = 0; 593 } 594 } 595 596 len = snprintf(p, 2, "\n"); 597 p += len; 598 } 599 600 mtx_unlock(&malloc_mtx); 601 error = SYSCTL_OUT(req, buf, p - buf); 602 603 free(buf, M_TEMP); 604 return (error); 605 } 606 607 SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD, 608 NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats"); 609 610 #ifdef MALLOC_PROFILE 611 612 static int 613 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) 614 { 615 int linesize = 64; 616 uint64_t count; 617 uint64_t waste; 618 uint64_t mem; 619 int bufsize; 620 int error; 621 char *buf; 622 int rsize; 623 int size; 624 char *p; 625 int len; 626 int i; 627 628 bufsize = linesize * (KMEM_ZSIZE + 1); 629 bufsize += 128; /* For the stats line */ 630 bufsize += 128; /* For the banner line */ 631 waste = 0; 632 mem = 0; 633 634 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 635 len = snprintf(p, bufsize, 636 "\n Size Requests Real Size\n"); 637 bufsize -= len; 638 p += len; 639 640 for (i = 0; i < KMEM_ZSIZE; i++) { 641 size = i << KMEM_ZSHIFT; 642 rsize = kmemzones[kmemsize[i]].kz_size; 643 count = (long long unsigned)krequests[i]; 644 645 len = snprintf(p, bufsize, "%6d%28llu%11d\n", 646 size, (unsigned long long)count, rsize); 647 bufsize -= len; 648 p += len; 649 650 if ((rsize * count) > (size * count)) 651 waste += (rsize * count) - (size * count); 652 mem += (rsize * count); 653 } 654 655 len = snprintf(p, bufsize, 656 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", 657 (unsigned long long)mem, (unsigned long long)waste); 658 p += len; 659 660 error = SYSCTL_OUT(req, buf, p - buf); 661 662 free(buf, M_TEMP); 663 return (error); 664 } 665 666 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD, 667 NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling"); 668 #endif /* MALLOC_PROFILE */ 669