1 /* 2 * Copyright (c) 1987, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_vm.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/vmmeter.h> 47 #include <sys/proc.h> 48 #include <sys/sysctl.h> 49 50 #include <vm/vm.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_page.h> 57 #include <vm/uma.h> 58 #include <vm/uma_int.h> 59 #include <vm/uma_dbg.h> 60 61 #if defined(INVARIANTS) && defined(__i386__) 62 #include <machine/cpu.h> 63 #endif 64 65 /* 66 * When realloc() is called, if the new size is sufficiently smaller than 67 * the old size, realloc() will allocate a new, smaller block to avoid 68 * wasting memory. 'Sufficiently smaller' is defined as: newsize <= 69 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. 70 */ 71 #ifndef REALLOC_FRACTION 72 #define REALLOC_FRACTION 1 /* new block if <= half the size */ 73 #endif 74 75 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); 76 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 77 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 78 79 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 80 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); 81 82 static void kmeminit(void *); 83 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) 84 85 static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); 86 87 static struct malloc_type *kmemstatistics; 88 static char *kmembase; 89 static char *kmemlimit; 90 91 #define KMEM_ZSHIFT 4 92 #define KMEM_ZBASE 16 93 #define KMEM_ZMASK (KMEM_ZBASE - 1) 94 95 #define KMEM_ZMAX 65536 96 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) 97 static u_int8_t kmemsize[KMEM_ZSIZE + 1]; 98 99 /* These won't be powers of two for long */ 100 struct { 101 int kz_size; 102 char *kz_name; 103 uma_zone_t kz_zone; 104 } kmemzones[] = { 105 {16, "16", NULL}, 106 {32, "32", NULL}, 107 {64, "64", NULL}, 108 {128, "128", NULL}, 109 {256, "256", NULL}, 110 {512, "512", NULL}, 111 {1024, "1024", NULL}, 112 {2048, "2048", NULL}, 113 {4096, "4096", NULL}, 114 {8192, "8192", NULL}, 115 {16384, "16384", NULL}, 116 {32768, "32768", NULL}, 117 {65536, "65536", NULL}, 118 {0, NULL}, 119 }; 120 121 u_int vm_kmem_size; 122 123 /* 124 * The malloc_mtx protects the kmemstatistics linked list. 125 */ 126 127 struct mtx malloc_mtx; 128 129 #ifdef MALLOC_PROFILE 130 uint64_t krequests[KMEM_ZSIZE + 1]; 131 132 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS); 133 #endif 134 135 static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS); 136 137 /* 138 * malloc: 139 * 140 * Allocate a block of memory. 141 * 142 * If M_NOWAIT is set, this routine will not block and return NULL if 143 * the allocation fails. 144 */ 145 void * 146 malloc(size, type, flags) 147 unsigned long size; 148 struct malloc_type *type; 149 int flags; 150 { 151 int indx; 152 caddr_t va; 153 uma_zone_t zone; 154 register struct malloc_type *ksp = type; 155 156 #if 0 157 if (size == 0) 158 Debugger("zero size malloc"); 159 #endif 160 if (!(flags & M_NOWAIT)) 161 KASSERT(curthread->td_intr_nesting_level == 0, 162 ("malloc(M_WAITOK) in interrupt context")); 163 if (size <= KMEM_ZMAX) { 164 if (size & KMEM_ZMASK) 165 size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; 166 indx = kmemsize[size >> KMEM_ZSHIFT]; 167 zone = kmemzones[indx].kz_zone; 168 #ifdef MALLOC_PROFILE 169 krequests[size >> KMEM_ZSHIFT]++; 170 #endif 171 va = uma_zalloc(zone, flags); 172 mtx_lock(&ksp->ks_mtx); 173 if (va == NULL) 174 goto out; 175 176 ksp->ks_size |= 1 << indx; 177 size = zone->uz_size; 178 } else { 179 size = roundup(size, PAGE_SIZE); 180 zone = NULL; 181 va = uma_large_malloc(size, flags); 182 mtx_lock(&ksp->ks_mtx); 183 if (va == NULL) 184 goto out; 185 } 186 ksp->ks_memuse += size; 187 ksp->ks_inuse++; 188 out: 189 ksp->ks_calls++; 190 if (ksp->ks_memuse > ksp->ks_maxused) 191 ksp->ks_maxused = ksp->ks_memuse; 192 193 mtx_unlock(&ksp->ks_mtx); 194 return ((void *) va); 195 } 196 197 /* 198 * free: 199 * 200 * Free a block of memory allocated by malloc. 201 * 202 * This routine may not block. 203 */ 204 void 205 free(addr, type) 206 void *addr; 207 struct malloc_type *type; 208 { 209 register struct malloc_type *ksp = type; 210 uma_slab_t slab; 211 u_long size; 212 213 /* free(NULL, ...) does nothing */ 214 if (addr == NULL) 215 return; 216 217 size = 0; 218 219 slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK)); 220 221 if (slab == NULL) 222 panic("free: address %p(%p) has not been allocated.\n", 223 addr, (void *)((u_long)addr & (~UMA_SLAB_MASK))); 224 225 226 if (!(slab->us_flags & UMA_SLAB_MALLOC)) { 227 #ifdef INVARIANTS 228 struct malloc_type **mtp = addr; 229 #endif 230 size = slab->us_zone->uz_size; 231 #ifdef INVARIANTS 232 /* 233 * Cache a pointer to the malloc_type that most recently freed 234 * this memory here. This way we know who is most likely to 235 * have stepped on it later. 236 * 237 * This code assumes that size is a multiple of 8 bytes for 238 * 64 bit machines 239 */ 240 mtp = (struct malloc_type **) 241 ((unsigned long)mtp & ~UMA_ALIGN_PTR); 242 mtp += (size - sizeof(struct malloc_type *)) / 243 sizeof(struct malloc_type *); 244 *mtp = type; 245 #endif 246 uma_zfree_arg(slab->us_zone, addr, slab); 247 } else { 248 size = slab->us_size; 249 uma_large_free(slab); 250 } 251 mtx_lock(&ksp->ks_mtx); 252 ksp->ks_memuse -= size; 253 ksp->ks_inuse--; 254 mtx_unlock(&ksp->ks_mtx); 255 } 256 257 /* 258 * realloc: change the size of a memory block 259 */ 260 void * 261 realloc(addr, size, type, flags) 262 void *addr; 263 unsigned long size; 264 struct malloc_type *type; 265 int flags; 266 { 267 uma_slab_t slab; 268 unsigned long alloc; 269 void *newaddr; 270 271 /* realloc(NULL, ...) is equivalent to malloc(...) */ 272 if (addr == NULL) 273 return (malloc(size, type, flags)); 274 275 slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK)); 276 277 /* Sanity check */ 278 KASSERT(slab != NULL, 279 ("realloc: address %p out of range", (void *)addr)); 280 281 /* Get the size of the original block */ 282 if (slab->us_zone) 283 alloc = slab->us_zone->uz_size; 284 else 285 alloc = slab->us_size; 286 287 /* Reuse the original block if appropriate */ 288 if (size <= alloc 289 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) 290 return (addr); 291 292 /* Allocate a new, bigger (or smaller) block */ 293 if ((newaddr = malloc(size, type, flags)) == NULL) 294 return (NULL); 295 296 /* Copy over original contents */ 297 bcopy(addr, newaddr, min(size, alloc)); 298 free(addr, type); 299 return (newaddr); 300 } 301 302 /* 303 * reallocf: same as realloc() but free memory on failure. 304 */ 305 void * 306 reallocf(addr, size, type, flags) 307 void *addr; 308 unsigned long size; 309 struct malloc_type *type; 310 int flags; 311 { 312 void *mem; 313 314 if ((mem = realloc(addr, size, type, flags)) == NULL) 315 free(addr, type); 316 return (mem); 317 } 318 319 /* 320 * Initialize the kernel memory allocator 321 */ 322 /* ARGSUSED*/ 323 static void 324 kmeminit(dummy) 325 void *dummy; 326 { 327 u_int8_t indx; 328 u_long npg; 329 u_long mem_size; 330 int i; 331 332 mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); 333 334 /* 335 * Try to auto-tune the kernel memory size, so that it is 336 * more applicable for a wider range of machine sizes. 337 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while 338 * a VM_KMEM_SIZE of 12MB is a fair compromise. The 339 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space 340 * available, and on an X86 with a total KVA space of 256MB, 341 * try to keep VM_KMEM_SIZE_MAX at 80MB or below. 342 * 343 * Note that the kmem_map is also used by the zone allocator, 344 * so make sure that there is enough space. 345 */ 346 vm_kmem_size = VM_KMEM_SIZE; 347 mem_size = cnt.v_page_count * PAGE_SIZE; 348 349 #if defined(VM_KMEM_SIZE_SCALE) 350 if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size) 351 vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE; 352 #endif 353 354 #if defined(VM_KMEM_SIZE_MAX) 355 if (vm_kmem_size >= VM_KMEM_SIZE_MAX) 356 vm_kmem_size = VM_KMEM_SIZE_MAX; 357 #endif 358 359 /* Allow final override from the kernel environment */ 360 TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size); 361 362 /* 363 * Limit kmem virtual size to twice the physical memory. 364 * This allows for kmem map sparseness, but limits the size 365 * to something sane. Be careful to not overflow the 32bit 366 * ints while doing the check. 367 */ 368 if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE)) 369 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE; 370 371 /* 372 * In mbuf_init(), we set up submaps for mbufs and clusters, in which 373 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), 374 * respectively. Mathematically, this means that what we do here may 375 * amount to slightly more address space than we need for the submaps, 376 * but it never hurts to have an extra page in kmem_map. 377 */ 378 npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * 379 sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; 380 381 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, 382 (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); 383 kmem_map->system_map = 1; 384 385 uma_startup2(); 386 387 for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) { 388 int size = kmemzones[indx].kz_size; 389 char *name = kmemzones[indx].kz_name; 390 391 kmemzones[indx].kz_zone = uma_zcreate(name, size, 392 #ifdef INVARIANTS 393 mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini, 394 #else 395 NULL, NULL, NULL, NULL, 396 #endif 397 UMA_ALIGN_PTR, UMA_ZONE_MALLOC); 398 399 for (;i <= size; i+= KMEM_ZBASE) 400 kmemsize[i >> KMEM_ZSHIFT] = indx; 401 402 } 403 } 404 405 void 406 malloc_init(data) 407 void *data; 408 { 409 struct malloc_type *type = (struct malloc_type *)data; 410 411 mtx_lock(&malloc_mtx); 412 if (type->ks_magic != M_MAGIC) 413 panic("malloc type lacks magic"); 414 415 if (cnt.v_page_count == 0) 416 panic("malloc_init not allowed before vm init"); 417 418 if (type->ks_next != NULL) 419 return; 420 421 type->ks_next = kmemstatistics; 422 kmemstatistics = type; 423 mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF); 424 mtx_unlock(&malloc_mtx); 425 } 426 427 void 428 malloc_uninit(data) 429 void *data; 430 { 431 struct malloc_type *type = (struct malloc_type *)data; 432 struct malloc_type *t; 433 434 mtx_lock(&malloc_mtx); 435 mtx_lock(&type->ks_mtx); 436 if (type->ks_magic != M_MAGIC) 437 panic("malloc type lacks magic"); 438 439 if (cnt.v_page_count == 0) 440 panic("malloc_uninit not allowed before vm init"); 441 442 if (type == kmemstatistics) 443 kmemstatistics = type->ks_next; 444 else { 445 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { 446 if (t->ks_next == type) { 447 t->ks_next = type->ks_next; 448 break; 449 } 450 } 451 } 452 type->ks_next = NULL; 453 mtx_destroy(&type->ks_mtx); 454 mtx_unlock(&malloc_mtx); 455 } 456 457 static int 458 sysctl_kern_malloc(SYSCTL_HANDLER_ARGS) 459 { 460 struct malloc_type *type; 461 int linesize = 128; 462 int curline; 463 int bufsize; 464 int first; 465 int error; 466 char *buf; 467 char *p; 468 int cnt; 469 int len; 470 int i; 471 472 cnt = 0; 473 474 mtx_lock(&malloc_mtx); 475 for (type = kmemstatistics; type != NULL; type = type->ks_next) 476 cnt++; 477 478 mtx_unlock(&malloc_mtx); 479 bufsize = linesize * (cnt + 1); 480 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 481 mtx_lock(&malloc_mtx); 482 483 len = snprintf(p, linesize, 484 "\n Type InUse MemUse HighUse Requests Size(s)\n"); 485 p += len; 486 487 for (type = kmemstatistics; cnt != 0 && type != NULL; 488 type = type->ks_next, cnt--) { 489 if (type->ks_calls == 0) 490 continue; 491 492 curline = linesize - 2; /* Leave room for the \n */ 493 len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu", 494 type->ks_shortdesc, 495 type->ks_inuse, 496 (type->ks_memuse + 1023) / 1024, 497 (type->ks_maxused + 1023) / 1024, 498 (long long unsigned)type->ks_calls); 499 curline -= len; 500 p += len; 501 502 first = 1; 503 for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1; 504 i++) { 505 if (type->ks_size & (1 << i)) { 506 if (first) 507 len = snprintf(p, curline, " "); 508 else 509 len = snprintf(p, curline, ","); 510 curline -= len; 511 p += len; 512 513 len = snprintf(p, curline, 514 "%s", kmemzones[i].kz_name); 515 curline -= len; 516 p += len; 517 518 first = 0; 519 } 520 } 521 522 len = snprintf(p, 2, "\n"); 523 p += len; 524 } 525 526 mtx_unlock(&malloc_mtx); 527 error = SYSCTL_OUT(req, buf, p - buf); 528 529 free(buf, M_TEMP); 530 return (error); 531 } 532 533 SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD, 534 NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats"); 535 536 #ifdef MALLOC_PROFILE 537 538 static int 539 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS) 540 { 541 int linesize = 64; 542 uint64_t count; 543 uint64_t waste; 544 uint64_t mem; 545 int bufsize; 546 int error; 547 char *buf; 548 int rsize; 549 int size; 550 char *p; 551 int len; 552 int i; 553 554 bufsize = linesize * (KMEM_ZSIZE + 1); 555 bufsize += 128; /* For the stats line */ 556 bufsize += 128; /* For the banner line */ 557 waste = 0; 558 mem = 0; 559 560 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); 561 len = snprintf(p, bufsize, 562 "\n Size Requests Real Size\n"); 563 bufsize -= len; 564 p += len; 565 566 for (i = 0; i < KMEM_ZSIZE; i++) { 567 size = i << KMEM_ZSHIFT; 568 rsize = kmemzones[kmemsize[i]].kz_size; 569 count = (long long unsigned)krequests[i]; 570 571 len = snprintf(p, bufsize, "%6d%28llu%11d\n", 572 size, (unsigned long long)count, rsize); 573 bufsize -= len; 574 p += len; 575 576 if ((rsize * count) > (size * count)) 577 waste += (rsize * count) - (size * count); 578 mem += (rsize * count); 579 } 580 581 len = snprintf(p, bufsize, 582 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n", 583 (unsigned long long)mem, (unsigned long long)waste); 584 p += len; 585 586 error = SYSCTL_OUT(req, buf, p - buf); 587 588 free(buf, M_TEMP); 589 return (error); 590 } 591 592 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD, 593 NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling"); 594 #endif /* MALLOC_PROFILE */ 595