1 /* 2 * Copyright (c) 1987, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_vm.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/mutex.h> 46 #include <sys/vmmeter.h> 47 #include <sys/proc.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <vm/vm_kern.h> 52 #include <vm/vm_extern.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_map.h> 55 56 #if defined(INVARIANTS) && defined(__i386__) 57 #include <machine/cpu.h> 58 #endif 59 60 /* 61 * When realloc() is called, if the new size is sufficiently smaller than 62 * the old size, realloc() will allocate a new, smaller block to avoid 63 * wasting memory. 'Sufficiently smaller' is defined as: newsize <= 64 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'. 65 */ 66 #ifndef REALLOC_FRACTION 67 #define REALLOC_FRACTION 1 /* new block if <= half the size */ 68 #endif 69 70 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches"); 71 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory"); 72 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); 73 74 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); 75 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); 76 77 static void kmeminit __P((void *)); 78 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) 79 80 static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); 81 82 static struct malloc_type *kmemstatistics; 83 static struct kmembuckets bucket[MINBUCKET + 16]; 84 static struct kmemusage *kmemusage; 85 static char *kmembase; 86 static char *kmemlimit; 87 88 static struct mtx malloc_mtx; 89 90 u_int vm_kmem_size; 91 92 #ifdef INVARIANTS 93 /* 94 * This structure provides a set of masks to catch unaligned frees. 95 */ 96 static long addrmask[] = { 0, 97 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 98 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, 99 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, 100 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 101 }; 102 103 /* 104 * The WEIRD_ADDR is used as known text to copy into free objects so 105 * that modifications after frees can be detected. 106 */ 107 #define WEIRD_ADDR 0xdeadc0de 108 #define MAX_COPY 64 109 110 /* 111 * Normally the first word of the structure is used to hold the list 112 * pointer for free objects. However, when running with diagnostics, 113 * we use the third and fourth fields, so as to catch modifications 114 * in the most commonly trashed first two words. 115 */ 116 struct freelist { 117 long spare0; 118 struct malloc_type *type; 119 long spare1; 120 caddr_t next; 121 }; 122 #else /* !INVARIANTS */ 123 struct freelist { 124 caddr_t next; 125 }; 126 #endif /* INVARIANTS */ 127 128 /* 129 * malloc: 130 * 131 * Allocate a block of memory. 132 * 133 * If M_NOWAIT is set, this routine will not block and return NULL if 134 * the allocation fails. 135 */ 136 void * 137 malloc(size, type, flags) 138 unsigned long size; 139 struct malloc_type *type; 140 int flags; 141 { 142 register struct kmembuckets *kbp; 143 register struct kmemusage *kup; 144 register struct freelist *freep; 145 long indx, npg, allocsize; 146 int s; 147 caddr_t va, cp, savedlist; 148 #ifdef INVARIANTS 149 long *end, *lp; 150 int copysize; 151 const char *savedtype; 152 #endif 153 register struct malloc_type *ksp = type; 154 155 #if defined(INVARIANTS) 156 if (flags == M_WAITOK) 157 KASSERT(curthread->td_intr_nesting_level == 0, 158 ("malloc(M_WAITOK) in interrupt context")); 159 #endif 160 indx = BUCKETINDX(size); 161 kbp = &bucket[indx]; 162 s = splmem(); 163 mtx_lock(&malloc_mtx); 164 while (ksp->ks_memuse >= ksp->ks_limit) { 165 if (flags & M_NOWAIT) { 166 splx(s); 167 mtx_unlock(&malloc_mtx); 168 return ((void *) NULL); 169 } 170 if (ksp->ks_limblocks < 65535) 171 ksp->ks_limblocks++; 172 msleep((caddr_t)ksp, &malloc_mtx, PSWP+2, type->ks_shortdesc, 173 0); 174 } 175 ksp->ks_size |= 1 << indx; 176 #ifdef INVARIANTS 177 copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY; 178 #endif 179 if (kbp->kb_next == NULL) { 180 kbp->kb_last = NULL; 181 if (size > MAXALLOCSAVE) 182 allocsize = roundup(size, PAGE_SIZE); 183 else 184 allocsize = 1 << indx; 185 npg = btoc(allocsize); 186 187 mtx_unlock(&malloc_mtx); 188 va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), flags); 189 190 if (va == NULL) { 191 splx(s); 192 return ((void *) NULL); 193 } 194 /* 195 * Enter malloc_mtx after the error check to avoid having to 196 * immediately exit it again if there is an error. 197 */ 198 mtx_lock(&malloc_mtx); 199 200 kbp->kb_total += kbp->kb_elmpercl; 201 kup = btokup(va); 202 kup->ku_indx = indx; 203 if (allocsize > MAXALLOCSAVE) { 204 if (npg > 65535) 205 panic("malloc: allocation too large"); 206 kup->ku_pagecnt = npg; 207 ksp->ks_memuse += allocsize; 208 goto out; 209 } 210 kup->ku_freecnt = kbp->kb_elmpercl; 211 kbp->kb_totalfree += kbp->kb_elmpercl; 212 /* 213 * Just in case we blocked while allocating memory, 214 * and someone else also allocated memory for this 215 * bucket, don't assume the list is still empty. 216 */ 217 savedlist = kbp->kb_next; 218 kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize; 219 for (;;) { 220 freep = (struct freelist *)cp; 221 #ifdef INVARIANTS 222 /* 223 * Copy in known text to detect modification 224 * after freeing. 225 */ 226 end = (long *)&cp[copysize]; 227 for (lp = (long *)cp; lp < end; lp++) 228 *lp = WEIRD_ADDR; 229 freep->type = M_FREE; 230 #endif /* INVARIANTS */ 231 if (cp <= va) 232 break; 233 cp -= allocsize; 234 freep->next = cp; 235 } 236 freep->next = savedlist; 237 if (kbp->kb_last == NULL) 238 kbp->kb_last = (caddr_t)freep; 239 } 240 va = kbp->kb_next; 241 kbp->kb_next = ((struct freelist *)va)->next; 242 #ifdef INVARIANTS 243 freep = (struct freelist *)va; 244 savedtype = (const char *) freep->type->ks_shortdesc; 245 freep->type = (struct malloc_type *)WEIRD_ADDR; 246 if ((intptr_t)(void *)&freep->next & 0x2) 247 freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16)); 248 else 249 freep->next = (caddr_t)WEIRD_ADDR; 250 end = (long *)&va[copysize]; 251 for (lp = (long *)va; lp < end; lp++) { 252 if (*lp == WEIRD_ADDR) 253 continue; 254 printf("%s %ld of object %p size %lu %s %s (0x%lx != 0x%lx)\n", 255 "Data modified on freelist: word", 256 (long)(lp - (long *)va), (void *)va, size, 257 "previous type", savedtype, *lp, (u_long)WEIRD_ADDR); 258 break; 259 } 260 freep->spare0 = 0; 261 #endif /* INVARIANTS */ 262 kup = btokup(va); 263 if (kup->ku_indx != indx) 264 panic("malloc: wrong bucket"); 265 if (kup->ku_freecnt == 0) 266 panic("malloc: lost data"); 267 kup->ku_freecnt--; 268 kbp->kb_totalfree--; 269 ksp->ks_memuse += 1 << indx; 270 out: 271 kbp->kb_calls++; 272 ksp->ks_inuse++; 273 ksp->ks_calls++; 274 if (ksp->ks_memuse > ksp->ks_maxused) 275 ksp->ks_maxused = ksp->ks_memuse; 276 splx(s); 277 mtx_unlock(&malloc_mtx); 278 /* XXX: Do idle pre-zeroing. */ 279 if (va != NULL && (flags & M_ZERO)) 280 bzero(va, size); 281 return ((void *) va); 282 } 283 284 /* 285 * free: 286 * 287 * Free a block of memory allocated by malloc. 288 * 289 * This routine may not block. 290 */ 291 void 292 free(addr, type) 293 void *addr; 294 struct malloc_type *type; 295 { 296 register struct kmembuckets *kbp; 297 register struct kmemusage *kup; 298 register struct freelist *freep; 299 long size; 300 int s; 301 #ifdef INVARIANTS 302 struct freelist *fp; 303 long *end, *lp, alloc, copysize; 304 #endif 305 register struct malloc_type *ksp = type; 306 307 /* free(NULL, ...) does nothing */ 308 if (addr == NULL) 309 return; 310 311 KASSERT(kmembase <= (char *)addr && (char *)addr < kmemlimit, 312 ("free: address %p out of range", (void *)addr)); 313 kup = btokup(addr); 314 size = 1 << kup->ku_indx; 315 kbp = &bucket[kup->ku_indx]; 316 s = splmem(); 317 mtx_lock(&malloc_mtx); 318 #ifdef INVARIANTS 319 /* 320 * Check for returns of data that do not point to the 321 * beginning of the allocation. 322 */ 323 if (size > PAGE_SIZE) 324 alloc = addrmask[BUCKETINDX(PAGE_SIZE)]; 325 else 326 alloc = addrmask[kup->ku_indx]; 327 if (((uintptr_t)(void *)addr & alloc) != 0) 328 panic("free: unaligned addr %p, size %ld, type %s, mask %ld", 329 (void *)addr, size, type->ks_shortdesc, alloc); 330 #endif /* INVARIANTS */ 331 if (size > MAXALLOCSAVE) { 332 mtx_unlock(&malloc_mtx); 333 kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt)); 334 mtx_lock(&malloc_mtx); 335 336 size = kup->ku_pagecnt << PAGE_SHIFT; 337 ksp->ks_memuse -= size; 338 kup->ku_indx = 0; 339 kup->ku_pagecnt = 0; 340 if (ksp->ks_memuse + size >= ksp->ks_limit && 341 ksp->ks_memuse < ksp->ks_limit) 342 wakeup((caddr_t)ksp); 343 ksp->ks_inuse--; 344 kbp->kb_total -= 1; 345 splx(s); 346 mtx_unlock(&malloc_mtx); 347 return; 348 } 349 freep = (struct freelist *)addr; 350 #ifdef INVARIANTS 351 /* 352 * Check for multiple frees. Use a quick check to see if 353 * it looks free before laboriously searching the freelist. 354 */ 355 if (freep->spare0 == WEIRD_ADDR) { 356 fp = (struct freelist *)kbp->kb_next; 357 while (fp) { 358 if (fp->spare0 != WEIRD_ADDR) 359 panic("free: free item %p modified", fp); 360 else if (addr == (caddr_t)fp) 361 panic("free: multiple freed item %p", addr); 362 fp = (struct freelist *)fp->next; 363 } 364 } 365 /* 366 * Copy in known text to detect modification after freeing 367 * and to make it look free. Also, save the type being freed 368 * so we can list likely culprit if modification is detected 369 * when the object is reallocated. 370 */ 371 copysize = size < MAX_COPY ? size : MAX_COPY; 372 end = (long *)&((caddr_t)addr)[copysize]; 373 for (lp = (long *)addr; lp < end; lp++) 374 *lp = WEIRD_ADDR; 375 freep->type = type; 376 #endif /* INVARIANTS */ 377 kup->ku_freecnt++; 378 if (kup->ku_freecnt >= kbp->kb_elmpercl) { 379 if (kup->ku_freecnt > kbp->kb_elmpercl) 380 panic("free: multiple frees"); 381 else if (kbp->kb_totalfree > kbp->kb_highwat) 382 kbp->kb_couldfree++; 383 } 384 kbp->kb_totalfree++; 385 ksp->ks_memuse -= size; 386 if (ksp->ks_memuse + size >= ksp->ks_limit && 387 ksp->ks_memuse < ksp->ks_limit) 388 wakeup((caddr_t)ksp); 389 ksp->ks_inuse--; 390 #ifdef OLD_MALLOC_MEMORY_POLICY 391 if (kbp->kb_next == NULL) 392 kbp->kb_next = addr; 393 else 394 ((struct freelist *)kbp->kb_last)->next = addr; 395 freep->next = NULL; 396 kbp->kb_last = addr; 397 #else 398 /* 399 * Return memory to the head of the queue for quick reuse. This 400 * can improve performance by improving the probability of the 401 * item being in the cache when it is reused. 402 */ 403 if (kbp->kb_next == NULL) { 404 kbp->kb_next = addr; 405 kbp->kb_last = addr; 406 freep->next = NULL; 407 } else { 408 freep->next = kbp->kb_next; 409 kbp->kb_next = addr; 410 } 411 #endif 412 splx(s); 413 mtx_unlock(&malloc_mtx); 414 } 415 416 /* 417 * realloc: change the size of a memory block 418 */ 419 void * 420 realloc(addr, size, type, flags) 421 void *addr; 422 unsigned long size; 423 struct malloc_type *type; 424 int flags; 425 { 426 struct kmemusage *kup; 427 unsigned long alloc; 428 void *newaddr; 429 430 /* realloc(NULL, ...) is equivalent to malloc(...) */ 431 if (addr == NULL) 432 return (malloc(size, type, flags)); 433 434 /* Sanity check */ 435 KASSERT(kmembase <= (char *)addr && (char *)addr < kmemlimit, 436 ("realloc: address %p out of range", (void *)addr)); 437 438 /* Get the size of the original block */ 439 kup = btokup(addr); 440 alloc = 1 << kup->ku_indx; 441 if (alloc > MAXALLOCSAVE) 442 alloc = kup->ku_pagecnt << PAGE_SHIFT; 443 444 /* Reuse the original block if appropriate */ 445 if (size <= alloc 446 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE)) 447 return (addr); 448 449 /* Allocate a new, bigger (or smaller) block */ 450 if ((newaddr = malloc(size, type, flags)) == NULL) 451 return (NULL); 452 453 /* Copy over original contents */ 454 bcopy(addr, newaddr, min(size, alloc)); 455 free(addr, type); 456 return (newaddr); 457 } 458 459 /* 460 * reallocf: same as realloc() but free memory on failure. 461 */ 462 void * 463 reallocf(addr, size, type, flags) 464 void *addr; 465 unsigned long size; 466 struct malloc_type *type; 467 int flags; 468 { 469 void *mem; 470 471 if ((mem = realloc(addr, size, type, flags)) == NULL) 472 free(addr, type); 473 return (mem); 474 } 475 476 /* 477 * Initialize the kernel memory allocator 478 */ 479 /* ARGSUSED*/ 480 static void 481 kmeminit(dummy) 482 void *dummy; 483 { 484 register long indx; 485 u_long npg; 486 u_long mem_size; 487 488 #if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0) 489 #error "kmeminit: MAXALLOCSAVE not power of 2" 490 #endif 491 #if (MAXALLOCSAVE > MINALLOCSIZE * 32768) 492 #error "kmeminit: MAXALLOCSAVE too big" 493 #endif 494 #if (MAXALLOCSAVE < PAGE_SIZE) 495 #error "kmeminit: MAXALLOCSAVE too small" 496 #endif 497 498 mtx_init(&malloc_mtx, "malloc", MTX_DEF); 499 500 /* 501 * Try to auto-tune the kernel memory size, so that it is 502 * more applicable for a wider range of machine sizes. 503 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while 504 * a VM_KMEM_SIZE of 12MB is a fair compromise. The 505 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space 506 * available, and on an X86 with a total KVA space of 256MB, 507 * try to keep VM_KMEM_SIZE_MAX at 80MB or below. 508 * 509 * Note that the kmem_map is also used by the zone allocator, 510 * so make sure that there is enough space. 511 */ 512 vm_kmem_size = VM_KMEM_SIZE; 513 mem_size = cnt.v_page_count * PAGE_SIZE; 514 515 #if defined(VM_KMEM_SIZE_SCALE) 516 if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size) 517 vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE; 518 #endif 519 520 #if defined(VM_KMEM_SIZE_MAX) 521 if (vm_kmem_size >= VM_KMEM_SIZE_MAX) 522 vm_kmem_size = VM_KMEM_SIZE_MAX; 523 #endif 524 525 /* Allow final override from the kernel environment */ 526 TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size); 527 528 /* 529 * Limit kmem virtual size to twice the physical memory. 530 * This allows for kmem map sparseness, but limits the size 531 * to something sane. Be careful to not overflow the 32bit 532 * ints while doing the check. 533 */ 534 if ((vm_kmem_size / 2) > (cnt.v_page_count * PAGE_SIZE)) 535 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE; 536 537 /* 538 * In mbuf_init(), we set up submaps for mbufs and clusters, in which 539 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES), 540 * respectively. Mathematically, this means that what we do here may 541 * amount to slightly more address space than we need for the submaps, 542 * but it never hurts to have an extra page in kmem_map. 543 */ 544 npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * 545 sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; 546 547 kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, 548 (vm_size_t)(npg * sizeof(struct kmemusage))); 549 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, 550 (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); 551 kmem_map->system_map = 1; 552 for (indx = 0; indx < MINBUCKET + 16; indx++) { 553 if (1 << indx >= PAGE_SIZE) 554 bucket[indx].kb_elmpercl = 1; 555 else 556 bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx); 557 bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl; 558 } 559 } 560 561 void 562 malloc_init(data) 563 void *data; 564 { 565 struct malloc_type *type = (struct malloc_type *)data; 566 567 if (type->ks_magic != M_MAGIC) 568 panic("malloc type lacks magic"); 569 570 if (type->ks_limit != 0) 571 return; 572 573 if (cnt.v_page_count == 0) 574 panic("malloc_init not allowed before vm init"); 575 576 /* 577 * The default limits for each malloc region is 1/2 of the 578 * malloc portion of the kmem map size. 579 */ 580 type->ks_limit = vm_kmem_size / 2; 581 type->ks_next = kmemstatistics; 582 kmemstatistics = type; 583 } 584 585 void 586 malloc_uninit(data) 587 void *data; 588 { 589 struct malloc_type *type = (struct malloc_type *)data; 590 struct malloc_type *t; 591 #ifdef INVARIANTS 592 struct kmembuckets *kbp; 593 struct freelist *freep; 594 long indx; 595 int s; 596 #endif 597 598 if (type->ks_magic != M_MAGIC) 599 panic("malloc type lacks magic"); 600 601 if (cnt.v_page_count == 0) 602 panic("malloc_uninit not allowed before vm init"); 603 604 if (type->ks_limit == 0) 605 panic("malloc_uninit on uninitialized type"); 606 607 #ifdef INVARIANTS 608 s = splmem(); 609 mtx_lock(&malloc_mtx); 610 for (indx = 0; indx < MINBUCKET + 16; indx++) { 611 kbp = bucket + indx; 612 freep = (struct freelist*)kbp->kb_next; 613 while (freep) { 614 if (freep->type == type) 615 freep->type = M_FREE; 616 freep = (struct freelist*)freep->next; 617 } 618 } 619 splx(s); 620 mtx_unlock(&malloc_mtx); 621 622 if (type->ks_memuse != 0) 623 printf("malloc_uninit: %ld bytes of '%s' still allocated\n", 624 type->ks_memuse, type->ks_shortdesc); 625 #endif 626 627 if (type == kmemstatistics) 628 kmemstatistics = type->ks_next; 629 else { 630 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) { 631 if (t->ks_next == type) { 632 t->ks_next = type->ks_next; 633 break; 634 } 635 } 636 } 637 type->ks_next = NULL; 638 type->ks_limit = 0; 639 } 640