1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mtmalloc.h> 30 #include "mtmalloc_impl.h" 31 #include <unistd.h> 32 #include <synch.h> 33 #include <thread.h> 34 #include <stdio.h> 35 #include <limits.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <strings.h> 39 #include <sys/param.h> 40 #include <sys/sysmacros.h> 41 42 /* 43 * To turn on the asserts just compile -DDEBUG 44 */ 45 46 #ifndef DEBUG 47 #define NDEBUG 48 #endif 49 50 #include <assert.h> 51 52 /* 53 * The MT hot malloc implementation contained herein is designed to be 54 * plug-compatible with the libc version of malloc. It is not intended 55 * to replace that implementation until we decide that it is ok to break 56 * customer apps (Solaris 3.0). 57 * 58 * For requests up to 2^^16, the allocator initializes itself into NCPUS 59 * worth of chains of caches. When a memory request is made, the calling thread 60 * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap, 61 * contention-reducing index to use, eventually, this should be replaced with 62 * the actual CPU sequence number, when an interface to get it is available. 63 * 64 * Once the thread is vectored into one of the list of caches the real 65 * allocation of the memory begins. The size is determined to figure out which 66 * bucket the allocation should be satisfied from. The management of free 67 * buckets is done via a bitmask. A free bucket is represented by a 1. The 68 * first free bit represents the first free bucket. The position of the bit, 69 * represents the position of the bucket in the arena. 70 * 71 * When the memory from the arena is handed out, the address of the cache 72 * control structure is written in the word preceeding the returned memory. 73 * This cache control address is used during free() to mark the buffer free 74 * in the cache control structure. 75 * 76 * When all available memory in a cache has been depleted, a new chunk of memory 77 * is allocated via sbrk(). The new cache is allocated from this chunk of memory 78 * and initialized in the function create_cache(). New caches are installed at 79 * the front of a singly linked list of the same size memory pools. This helps 80 * to ensure that there will tend to be available memory in the beginning of the 81 * list. 82 * 83 * Long linked lists hurt performance. To decrease this effect, there is a 84 * tunable, requestsize, that bumps up the sbrk allocation size and thus 85 * increases the number of available blocks within an arena. We also keep 86 * a "hint" for each cache list, which is the last cache in the list allocated 87 * from. This lowers the cost of searching if there are a lot of fully 88 * allocated blocks at the front of the list. 89 * 90 * For requests greater than 2^^16 (oversize allocations), there are two pieces 91 * of overhead. There is the OVERHEAD used to hold the cache addr 92 * (&oversize_list), plus an oversize_t structure to further describe the block. 93 * 94 * The oversize list is kept as defragmented as possible by coalescing 95 * freed oversized allocations with adjacent neighbors. 96 * 97 * Addresses handed out are stored in a hash table, and are aligned on 98 * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up 99 * where necessary in order to achieve this. This eases the implementation of 100 * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs. 101 * 102 * A memalign allocation takes memalign header overhead. There's two 103 * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC 104 * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to 105 * get to the aligned address from malloc'ed address is the minimum size 106 * OVERHEAD, we create a header taking only one OVERHEAD space with magic 107 * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD 108 * from memaligned address, we can get to the malloc'ed address. Otherwise, 109 * we create a memalign header taking two OVERHEAD space, one stores 110 * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the 111 * malloc'ed address. 112 */ 113 114 #if defined(__i386) || defined(__amd64) 115 #include <arpa/inet.h> /* for htonl() */ 116 #endif 117 118 static void * morecore(size_t); 119 static int setup_caches(void); 120 static void create_cache(cache_t *, size_t bufsize, uint_t hunks); 121 static void * malloc_internal(size_t, percpu_t *); 122 static void * oversize(size_t); 123 static oversize_t *find_oversize(size_t); 124 static void add_oversize(oversize_t *); 125 static void copy_pattern(uint32_t, void *, size_t); 126 static void * verify_pattern(uint32_t, void *, size_t); 127 static void reinit_cpu_list(void); 128 static void reinit_cache(cache_t *); 129 static void free_oversize(oversize_t *); 130 static oversize_t *oversize_header_alloc(uintptr_t, size_t); 131 132 /* 133 * oversize hash table stuff 134 */ 135 #define NUM_BUCKETS 67 /* must be prime */ 136 #define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS) 137 oversize_t *ovsz_hashtab[NUM_BUCKETS]; 138 139 /* 140 * Gets a decent "current cpu identifier", to be used to reduce contention. 141 * Eventually, this should be replaced by an interface to get the actual 142 * CPU sequence number in libthread/liblwp. 143 */ 144 extern uint_t _thr_self(); 145 #pragma weak _thr_self 146 #define get_curcpu_func() (curcpu_func)_thr_self 147 148 #define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \ 149 & ~((uintptr_t)(a) - 1))) 150 151 /* need this to deal with little endianess of x86 */ 152 #if defined(__i386) || defined(__amd64) 153 #define FLIP_EM(x) htonl((x)) 154 #else 155 #define FLIP_EM(x) (x) 156 #endif 157 158 #define INSERT_ONLY 0 159 #define COALESCE_LEFT 0x00000001 160 #define COALESCE_RIGHT 0x00000002 161 #define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT) 162 163 #define OVERHEAD 8 /* size needed to write cache addr */ 164 #define HUNKSIZE 8192 /* just a multiplier */ 165 166 #define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */ 167 #define MAX_CACHED (1 << MAX_CACHED_SHIFT) 168 #define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */ 169 #define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */ 170 171 #define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1) 172 #define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \ 173 CACHE_COHERENCY_UNIT) 174 175 #define MINSIZE 9 /* for requestsize, tunable */ 176 #define MAXSIZE 256 /* arbitrary, big enough, for requestsize */ 177 178 #define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */ 179 #define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */ 180 181 #define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1)) 182 #define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0)) 183 184 static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */ 185 186 static uint_t cpu_mask; 187 static curcpu_func curcpu; 188 189 static int32_t debugopt; 190 static int32_t reinit; 191 192 static percpu_t *cpu_list; 193 static oversize_t oversize_list; 194 static mutex_t oversize_lock; 195 196 static int ncpus; 197 198 #define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list) 199 #define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1) 200 #define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2) 201 202 /* 203 * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte 204 * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that 205 * this is achieved. 206 */ 207 #define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN)) 208 #define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD) 209 210 /* 211 * memalign header takes 2 OVERHEAD space. One for memalign magic, and the 212 * other one points back to the start address of originally allocated space. 213 */ 214 #define MEMALIGN_HEADER_SIZE 2 * OVERHEAD 215 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\ 216 if (shift == OVERHEAD)\ 217 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 218 MTMALLOC_MEMALIGN_MIN_MAGIC; \ 219 else {\ 220 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 221 MTMALLOC_MEMALIGN_MAGIC; \ 222 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \ 223 (uintptr_t)malloc_addr; \ 224 } 225 226 void * 227 malloc(size_t bytes) 228 { 229 percpu_t *list_rotor; 230 uint_t list_index; 231 232 /* 233 * this test is due to linking with libthread. 234 * There are malloc calls prior to this library 235 * being initialized. 236 * 237 * If setup_caches fails, we set ENOMEM and return NULL 238 */ 239 if (cpu_list == (percpu_t *)NULL) { 240 if (setup_caches() == 0) { 241 errno = ENOMEM; 242 return (NULL); 243 } 244 } 245 246 if (bytes > MAX_CACHED) 247 return (oversize(bytes)); 248 249 list_index = (curcpu() & cpu_mask); 250 251 list_rotor = &cpu_list[list_index]; 252 253 return (malloc_internal(bytes, list_rotor)); 254 } 255 256 void * 257 realloc(void * ptr, size_t bytes) 258 { 259 void *new, *data_ptr; 260 cache_t *cacheptr; 261 caddr_t mem; 262 size_t shift = 0; 263 264 if (ptr == NULL) 265 return (malloc(bytes)); 266 267 if (bytes == 0) { 268 free(ptr); 269 return (NULL); 270 } 271 272 data_ptr = ptr; 273 mem = (caddr_t)ptr - OVERHEAD; 274 275 new = malloc(bytes); 276 277 if (new == NULL) 278 return (NULL); 279 280 /* 281 * If new == ptr, ptr has previously been freed. Passing a freed pointer 282 * to realloc() is not allowed - unless the caller specifically states 283 * otherwise, in which case we must avoid freeing ptr (ie new) before we 284 * return new. There is (obviously) no requirement to memcpy() ptr to 285 * new before we return. 286 */ 287 if (new == ptr) { 288 if (!(debugopt & MTDOUBLEFREE)) 289 abort(); 290 return (new); 291 } 292 293 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 294 mem -= OVERHEAD; 295 ptr = (void *)*(uintptr_t *)mem; 296 mem = (caddr_t)ptr - OVERHEAD; 297 shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr); 298 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 299 ptr = (void *) mem; 300 mem -= OVERHEAD; 301 shift = OVERHEAD; 302 } 303 304 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 305 oversize_t *old; 306 307 old = (oversize_t *)(mem - OVSZ_SIZE); 308 (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift)); 309 free(ptr); 310 return (new); 311 } 312 313 cacheptr = (cache_t *)*(uintptr_t *)mem; 314 315 (void) memcpy(new, data_ptr, 316 MIN(cacheptr->mt_size - OVERHEAD - shift, bytes)); 317 free(ptr); 318 319 return (new); 320 } 321 322 void * 323 calloc(size_t nelem, size_t bytes) 324 { 325 void * ptr; 326 size_t size = nelem * bytes; 327 328 ptr = malloc(size); 329 if (ptr == NULL) 330 return (NULL); 331 bzero(ptr, size); 332 333 return (ptr); 334 } 335 336 void 337 free(void * ptr) 338 { 339 cache_t *cacheptr; 340 caddr_t mem; 341 int32_t i; 342 caddr_t freeblocks; 343 uintptr_t offset; 344 uchar_t mask; 345 int32_t which_bit, num_bytes; 346 347 if (ptr == NULL) 348 return; 349 350 mem = (caddr_t)ptr - OVERHEAD; 351 352 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 353 mem -= OVERHEAD; 354 ptr = (void *)*(uintptr_t *)mem; 355 mem = (caddr_t)ptr - OVERHEAD; 356 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 357 ptr = (void *) mem; 358 mem -= OVERHEAD; 359 } 360 361 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 362 oversize_t *big, **opp; 363 int bucket; 364 365 big = (oversize_t *)(mem - OVSZ_SIZE); 366 (void) mutex_lock(&oversize_lock); 367 368 bucket = HASH_OVERSIZE(big->addr); 369 for (opp = &ovsz_hashtab[bucket]; *opp != NULL; 370 opp = &(*opp)->hash_next) 371 if (*opp == big) 372 break; 373 374 if (*opp == NULL) { 375 if (!(debugopt & MTDOUBLEFREE)) 376 abort(); 377 (void) mutex_unlock(&oversize_lock); 378 return; 379 } 380 381 *opp = big->hash_next; /* remove big from the hash table */ 382 big->hash_next = NULL; 383 384 if (debugopt & MTDEBUGPATTERN) 385 copy_pattern(FREEPATTERN, ptr, big->size); 386 add_oversize(big); 387 (void) mutex_unlock(&oversize_lock); 388 return; 389 } 390 391 cacheptr = (cache_t *)*(uintptr_t *)mem; 392 freeblocks = cacheptr->mt_freelist; 393 394 /* 395 * This is the distance measured in bits into the arena. 396 * The value of offset is in bytes but there is a 1-1 correlation 397 * between distance into the arena and distance into the 398 * freelist bitmask. 399 */ 400 offset = mem - cacheptr->mt_arena; 401 402 /* 403 * i is total number of bits to offset into freelist bitmask. 404 */ 405 406 i = offset / cacheptr->mt_size; 407 408 num_bytes = i >> 3; 409 410 /* 411 * which_bit is the bit offset into the byte in the freelist. 412 * if our freelist bitmask looks like 0xf3 and we are freeing 413 * block 5 (ie: the 6th block) our mask will be 0xf7 after 414 * the free. Things go left to right that's why the mask is 0x80 415 * and not 0x01. 416 */ 417 which_bit = i - (num_bytes << 3); 418 419 mask = 0x80 >> which_bit; 420 421 freeblocks += num_bytes; 422 423 if (debugopt & MTDEBUGPATTERN) 424 copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD); 425 426 (void) mutex_lock(&cacheptr->mt_cache_lock); 427 428 if (*freeblocks & mask) { 429 if (!(debugopt & MTDOUBLEFREE)) 430 abort(); 431 } else { 432 *freeblocks |= mask; 433 cacheptr->mt_nfree++; 434 } 435 436 (void) mutex_unlock(&cacheptr->mt_cache_lock); 437 } 438 439 void * 440 memalign(size_t alignment, size_t size) 441 { 442 size_t alloc_size; 443 uintptr_t offset; 444 void *alloc_buf; 445 void *ret_buf; 446 447 if (size == 0 || alignment == 0 || 448 misaligned(alignment) || 449 (alignment & (alignment - 1)) != 0) { 450 errno = EINVAL; 451 return (NULL); 452 } 453 454 /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */ 455 if (alignment <= MTMALLOC_MIN_ALIGN) 456 return (malloc(size)); 457 458 alloc_size = size + alignment - MTMALLOC_MIN_ALIGN; 459 460 if (alloc_size < size) { /* overflow */ 461 errno = ENOMEM; 462 return (NULL); 463 } 464 465 alloc_buf = malloc(alloc_size); 466 467 if (alloc_buf == NULL) 468 /* malloc sets errno */ 469 return (NULL); 470 471 /* 472 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of 473 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since 474 * we will use alloc_size to return the excess fragments to the free 475 * list, we also round-up alloc_size if necessary. 476 */ 477 if ((alloc_size > MAX_CACHED) && 478 (alloc_size & (MTMALLOC_MIN_ALIGN - 1))) 479 alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN); 480 481 if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) { 482 /* aligned correctly */ 483 484 size_t frag_size = alloc_size - 485 (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 486 487 /* 488 * If the leftover piece of the memory > MAX_CACHED, 489 * split off the piece and return it back to the freelist. 490 */ 491 if (IS_OVERSIZE(frag_size, alloc_size)) { 492 oversize_t *orig, *tail; 493 uintptr_t taddr; 494 size_t data_size; 495 taddr = ALIGN((uintptr_t)alloc_buf + size, 496 MTMALLOC_MIN_ALIGN); 497 data_size = taddr - (uintptr_t)alloc_buf; 498 orig = (oversize_t *)((uintptr_t)alloc_buf - 499 OVSZ_HEADER_SIZE); 500 frag_size = orig->size - data_size - 501 OVSZ_HEADER_SIZE; 502 orig->size = data_size; 503 tail = oversize_header_alloc(taddr, frag_size); 504 free_oversize(tail); 505 } 506 ret_buf = alloc_buf; 507 } else { 508 uchar_t oversize_bits = 0; 509 size_t head_sz, data_sz, tail_sz; 510 uintptr_t ret_addr, taddr, shift, tshift; 511 oversize_t *orig, *tail; 512 size_t tsize; 513 514 /* needs to be aligned */ 515 shift = alignment - offset; 516 517 assert(shift >= MTMALLOC_MIN_ALIGN); 518 519 ret_addr = ((uintptr_t)alloc_buf + shift); 520 ret_buf = (void *)ret_addr; 521 522 if (alloc_size <= MAX_CACHED) { 523 MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf); 524 return (ret_buf); 525 } 526 527 /* 528 * Only check for the fragments when the memory is allocted 529 * from oversize_list. Split off a fragment and return it 530 * to the oversize freelist when it's > MAX_CACHED. 531 */ 532 533 head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE); 534 535 tail_sz = alloc_size - 536 (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 537 538 oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) | 539 IS_OVERSIZE(size, alloc_size) << DATA_SHIFT | 540 IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT; 541 542 switch (oversize_bits) { 543 case NONE_OVERSIZE: 544 case DATA_OVERSIZE: 545 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 546 alloc_buf); 547 break; 548 case HEAD_OVERSIZE: 549 /* 550 * If we can extend data > MAX_CACHED and have 551 * head still > MAX_CACHED, we split head-end 552 * as the case of head-end and data oversized, 553 * otherwise just create memalign header. 554 */ 555 tsize = (shift + size) - (MAX_CACHED + 8 + 556 MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 557 558 if (!IS_OVERSIZE(tsize, alloc_size)) { 559 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 560 alloc_buf); 561 break; 562 } else { 563 tsize += OVSZ_HEADER_SIZE; 564 taddr = ALIGN((uintptr_t)alloc_buf + 565 tsize, MTMALLOC_MIN_ALIGN); 566 tshift = ret_addr - taddr; 567 MEMALIGN_HEADER_ALLOC(ret_addr, tshift, 568 taddr); 569 ret_addr = taddr; 570 shift = ret_addr - (uintptr_t)alloc_buf; 571 } 572 /* FALLTHROUGH */ 573 case HEAD_AND_DATA_OVERSIZE: 574 /* 575 * Split off the head fragment and 576 * return it back to oversize freelist. 577 * Create oversize header for the piece 578 * of (data + tail fragment). 579 */ 580 orig = (oversize_t *)((uintptr_t)alloc_buf - 581 OVSZ_HEADER_SIZE); 582 (void) oversize_header_alloc(ret_addr - 583 OVSZ_HEADER_SIZE, 584 (orig->size - shift)); 585 orig->size = shift - OVSZ_HEADER_SIZE; 586 587 /* free up the head fragment */ 588 free_oversize(orig); 589 break; 590 case TAIL_OVERSIZE: 591 /* 592 * If we can extend data > MAX_CACHED and have 593 * tail-end still > MAX_CACHED, we split tail 594 * end, otherwise just create memalign header. 595 */ 596 orig = (oversize_t *)((uintptr_t)alloc_buf - 597 OVSZ_HEADER_SIZE); 598 tsize = orig->size - (MAX_CACHED + 8 + 599 shift + OVSZ_HEADER_SIZE + 600 MTMALLOC_MIN_ALIGN); 601 if (!IS_OVERSIZE(tsize, alloc_size)) { 602 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 603 alloc_buf); 604 break; 605 } else { 606 size = MAX_CACHED + 8; 607 } 608 /* FALLTHROUGH */ 609 case DATA_AND_TAIL_OVERSIZE: 610 /* 611 * Split off the tail fragment and 612 * return it back to oversize freelist. 613 * Create memalign header and adjust 614 * the size for the piece of 615 * (head fragment + data). 616 */ 617 taddr = ALIGN(ret_addr + size, 618 MTMALLOC_MIN_ALIGN); 619 data_sz = (size_t)(taddr - 620 (uintptr_t)alloc_buf); 621 orig = (oversize_t *)((uintptr_t)alloc_buf - 622 OVSZ_HEADER_SIZE); 623 tsize = orig->size - data_sz; 624 orig->size = data_sz; 625 MEMALIGN_HEADER_ALLOC(ret_buf, shift, 626 alloc_buf); 627 tsize -= OVSZ_HEADER_SIZE; 628 tail = oversize_header_alloc(taddr, tsize); 629 free_oversize(tail); 630 break; 631 case HEAD_AND_TAIL_OVERSIZE: 632 /* 633 * Split off the head fragment. 634 * We try to free up tail-end when we can 635 * extend data size to (MAX_CACHED + 8) 636 * and remain tail-end oversized. 637 * The bottom line is all split pieces 638 * should be oversize in size. 639 */ 640 orig = (oversize_t *)((uintptr_t)alloc_buf - 641 OVSZ_HEADER_SIZE); 642 tsize = orig->size - (MAX_CACHED + 8 + 643 OVSZ_HEADER_SIZE + shift + 644 MTMALLOC_MIN_ALIGN); 645 646 if (!IS_OVERSIZE(tsize, alloc_size)) { 647 /* 648 * If the chunk is not big enough 649 * to make both data and tail oversize 650 * we just keep them as one piece. 651 */ 652 (void) oversize_header_alloc(ret_addr - 653 OVSZ_HEADER_SIZE, 654 orig->size - shift); 655 orig->size = shift - 656 OVSZ_HEADER_SIZE; 657 free_oversize(orig); 658 break; 659 } else { 660 /* 661 * extend data size > MAX_CACHED 662 * and handle it as head, data, tail 663 * are all oversized. 664 */ 665 size = MAX_CACHED + 8; 666 } 667 /* FALLTHROUGH */ 668 case ALL_OVERSIZE: 669 /* 670 * split off the head and tail fragments, 671 * return them back to the oversize freelist. 672 * Alloc oversize header for data seg. 673 */ 674 orig = (oversize_t *)((uintptr_t)alloc_buf - 675 OVSZ_HEADER_SIZE); 676 tsize = orig->size; 677 orig->size = shift - OVSZ_HEADER_SIZE; 678 free_oversize(orig); 679 680 taddr = ALIGN(ret_addr + size, 681 MTMALLOC_MIN_ALIGN); 682 data_sz = taddr - ret_addr; 683 assert(tsize > (shift + data_sz + 684 OVSZ_HEADER_SIZE)); 685 tail_sz = tsize - 686 (shift + data_sz + OVSZ_HEADER_SIZE); 687 688 /* create oversize header for data seg */ 689 (void) oversize_header_alloc(ret_addr - 690 OVSZ_HEADER_SIZE, data_sz); 691 692 /* create oversize header for tail fragment */ 693 tail = oversize_header_alloc(taddr, tail_sz); 694 free_oversize(tail); 695 break; 696 default: 697 /* should not reach here */ 698 assert(0); 699 } 700 } 701 return (ret_buf); 702 } 703 704 705 void * 706 valloc(size_t size) 707 { 708 static unsigned pagesize; 709 710 if (size == 0) 711 return (NULL); 712 713 if (!pagesize) 714 pagesize = sysconf(_SC_PAGESIZE); 715 716 return (memalign(pagesize, size)); 717 } 718 719 void 720 mallocctl(int cmd, long value) 721 { 722 switch (cmd) { 723 724 case MTDEBUGPATTERN: 725 /* 726 * Reinitialize free blocks in case malloc() is called prior 727 * to mallocctl(). 728 */ 729 if (value && !(debugopt & cmd)) { 730 reinit++; 731 debugopt |= cmd; 732 reinit_cpu_list(); 733 } 734 /*FALLTHRU*/ 735 case MTDOUBLEFREE: 736 case MTINITBUFFER: 737 if (value) 738 debugopt |= cmd; 739 else 740 debugopt &= ~cmd; 741 break; 742 case MTCHUNKSIZE: 743 if (value >= MINSIZE && value <= MAXSIZE) 744 requestsize = value; 745 break; 746 default: 747 break; 748 } 749 } 750 751 /* 752 * if this function is changed, update the fallback code in setup_caches to 753 * set ncpus to the number of possible return values. (currently 1) 754 */ 755 static uint_t 756 fallback_curcpu(void) 757 { 758 return (0); 759 } 760 761 /* 762 * Returns non-zero on success, zero on failure. 763 * 764 * This carefully doesn't set cpu_list until initialization is finished. 765 */ 766 static int 767 setup_caches(void) 768 { 769 static mutex_t init_lock = DEFAULTMUTEX; 770 771 uintptr_t oldbrk; 772 uintptr_t newbrk; 773 774 size_t cache_space_needed; 775 size_t padding; 776 777 curcpu_func new_curcpu; 778 uint_t new_cpu_mask; 779 percpu_t *new_cpu_list; 780 781 uint_t i, j; 782 uintptr_t list_addr; 783 784 (void) mutex_lock(&init_lock); 785 if (cpu_list != NULL) { 786 (void) mutex_unlock(&init_lock); 787 return (1); /* success -- already initialized */ 788 } 789 790 new_curcpu = get_curcpu_func(); 791 if (new_curcpu == NULL) { 792 new_curcpu = fallback_curcpu; 793 ncpus = 1; 794 } else { 795 if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0) 796 ncpus = 4; /* decent default value */ 797 } 798 assert(ncpus > 0); 799 800 /* round ncpus up to a power of 2 */ 801 while (ncpus & (ncpus - 1)) 802 ncpus++; 803 804 new_cpu_mask = ncpus - 1; /* create the cpu mask */ 805 806 /* 807 * We now do some magic with the brk. What we want to get in the 808 * end is a bunch of well-aligned stuff in a big initial allocation. 809 * Along the way, we do sanity checks to make sure no one else has 810 * touched the brk (which shouldn't happen, but it's always good to 811 * check) 812 * 813 * First, make sure sbrk is sane, and store the current brk in oldbrk. 814 */ 815 oldbrk = (uintptr_t)sbrk(0); 816 if ((void *)oldbrk == (void *)-1) { 817 (void) mutex_unlock(&init_lock); 818 return (0); /* sbrk is broken -- we're doomed. */ 819 } 820 821 /* 822 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that 823 * the percpu structures and cache lists will be properly aligned. 824 * 825 * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE, 826 * so they can be paged out individually. 827 */ 828 newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT); 829 if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) { 830 (void) mutex_unlock(&init_lock); 831 return (0); /* someone else sbrked */ 832 } 833 834 /* 835 * For each cpu, there is one percpu_t and a list of caches 836 */ 837 cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE); 838 839 new_cpu_list = (percpu_t *)sbrk(cache_space_needed); 840 841 if (new_cpu_list == (percpu_t *)-1 || 842 (uintptr_t)new_cpu_list != newbrk) { 843 (void) mutex_unlock(&init_lock); 844 return (0); /* someone else sbrked */ 845 } 846 847 /* 848 * Finally, align the brk to HUNKSIZE so that all hunks are 849 * page-aligned, to avoid edge-effects. 850 */ 851 852 newbrk = (uintptr_t)new_cpu_list + cache_space_needed; 853 854 padding = ALIGN(newbrk, HUNKSIZE) - newbrk; 855 856 if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) { 857 (void) mutex_unlock(&init_lock); 858 return (0); /* someone else sbrked */ 859 } 860 861 list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus)); 862 863 /* initialize the percpu list */ 864 for (i = 0; i < ncpus; i++) { 865 new_cpu_list[i].mt_caches = (cache_head_t *)list_addr; 866 for (j = 0; j < NUM_CACHES; j++) { 867 new_cpu_list[i].mt_caches[j].mt_cache = NULL; 868 new_cpu_list[i].mt_caches[j].mt_hint = NULL; 869 } 870 871 bzero(&new_cpu_list[i].mt_parent_lock, sizeof (mutex_t)); 872 873 /* get the correct cache list alignment */ 874 list_addr += CACHELIST_SIZE; 875 } 876 877 /* 878 * Initialize oversize listhead 879 */ 880 oversize_list.next_bysize = &oversize_list; 881 oversize_list.prev_bysize = &oversize_list; 882 oversize_list.next_byaddr = &oversize_list; 883 oversize_list.prev_byaddr = &oversize_list; 884 oversize_list.addr = NULL; 885 oversize_list.size = 0; /* sentinal */ 886 887 /* 888 * now install the global variables, leaving cpu_list for last, so that 889 * there aren't any race conditions. 890 */ 891 curcpu = new_curcpu; 892 cpu_mask = new_cpu_mask; 893 cpu_list = new_cpu_list; 894 895 (void) mutex_unlock(&init_lock); 896 897 return (1); 898 } 899 900 static void 901 create_cache(cache_t *cp, size_t size, uint_t chunksize) 902 { 903 long nblocks; 904 905 bzero(&cp->mt_cache_lock, sizeof (mutex_t)); 906 cp->mt_size = size; 907 cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t)); 908 cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t); 909 cp->mt_hunks = chunksize; 910 /* 911 * rough calculation. We will need to adjust later. 912 */ 913 nblocks = cp->mt_span / cp->mt_size; 914 nblocks >>= 3; 915 if (nblocks == 0) { /* less than 8 free blocks in this pool */ 916 int32_t numblocks = 0; 917 long i = cp->mt_span; 918 size_t sub = cp->mt_size; 919 uchar_t mask = 0; 920 921 while (i > sub) { 922 numblocks++; 923 i -= sub; 924 } 925 nblocks = numblocks; 926 cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8); 927 cp->mt_nfree = numblocks; 928 while (numblocks--) { 929 mask |= 0x80 >> numblocks; 930 } 931 *(cp->mt_freelist) = mask; 932 } else { 933 cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist + 934 nblocks, 32); 935 /* recompute nblocks */ 936 nblocks = (uintptr_t)((caddr_t)cp->mt_freelist + 937 cp->mt_span - cp->mt_arena) / cp->mt_size; 938 cp->mt_nfree = ((nblocks >> 3) << 3); 939 /* Set everything to free */ 940 (void) memset(cp->mt_freelist, 0xff, nblocks >> 3); 941 } 942 943 if (debugopt & MTDEBUGPATTERN) 944 copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks); 945 946 cp->mt_next = NULL; 947 } 948 949 static void 950 reinit_cpu_list(void) 951 { 952 oversize_t *wp = oversize_list.next_bysize; 953 percpu_t *cpuptr; 954 cache_t *thiscache; 955 cache_head_t *cachehead; 956 957 if (wp == NULL || cpu_list == NULL) { 958 reinit = 0; 959 return; 960 } 961 962 /* Reinitialize free oversize blocks. */ 963 (void) mutex_lock(&oversize_lock); 964 if (debugopt & MTDEBUGPATTERN) 965 for (; wp != &oversize_list; wp = wp->next_bysize) 966 copy_pattern(FREEPATTERN, wp->addr, wp->size); 967 (void) mutex_unlock(&oversize_lock); 968 969 /* Reinitialize free blocks. */ 970 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) { 971 (void) mutex_lock(&cpuptr->mt_parent_lock); 972 for (cachehead = &cpuptr->mt_caches[0]; cachehead < 973 &cpuptr->mt_caches[NUM_CACHES]; cachehead++) { 974 for (thiscache = cachehead->mt_cache; thiscache != NULL; 975 thiscache = thiscache->mt_next) { 976 (void) mutex_lock(&thiscache->mt_cache_lock); 977 if (thiscache->mt_nfree == 0) { 978 (void) mutex_unlock( 979 &thiscache->mt_cache_lock); 980 continue; 981 } 982 if (thiscache != NULL) 983 reinit_cache(thiscache); 984 (void) mutex_unlock(&thiscache->mt_cache_lock); 985 } 986 } 987 (void) mutex_unlock(&cpuptr->mt_parent_lock); 988 } 989 reinit = 0; 990 } 991 992 static void 993 reinit_cache(cache_t *thiscache) 994 { 995 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 996 int32_t i, n; 997 caddr_t ret; 998 999 freeblocks = (uint32_t *)thiscache->mt_freelist; 1000 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 1001 if (*freeblocks & 0xffffffff) { 1002 for (i = 0; i < 32; i++) { 1003 if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) { 1004 n = (uintptr_t)(((freeblocks - 1005 (uint32_t *)thiscache->mt_freelist) << 5) 1006 + i) * thiscache->mt_size; 1007 ret = thiscache->mt_arena + n; 1008 ret += OVERHEAD; 1009 copy_pattern(FREEPATTERN, ret, 1010 thiscache->mt_size); 1011 } 1012 } 1013 } 1014 freeblocks++; 1015 } 1016 } 1017 1018 static void * 1019 malloc_internal(size_t size, percpu_t *cpuptr) 1020 { 1021 cache_head_t *cachehead; 1022 cache_t *thiscache, *hintcache; 1023 int32_t i, n, logsz, bucket; 1024 uint32_t index; 1025 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 1026 caddr_t ret; 1027 1028 logsz = MIN_CACHED_SHIFT; 1029 1030 while (size > (1 << logsz)) 1031 logsz++; 1032 1033 bucket = logsz - MIN_CACHED_SHIFT; 1034 1035 (void) mutex_lock(&cpuptr->mt_parent_lock); 1036 1037 /* 1038 * Find a cache of the appropriate size with free buffers. 1039 * 1040 * We don't need to lock each cache as we check their mt_nfree count, 1041 * since: 1042 * 1. We are only looking for caches with mt_nfree > 0. If a 1043 * free happens during our search, it will increment mt_nfree, 1044 * which will not effect the test. 1045 * 2. Allocations can decrement mt_nfree, but they can't happen 1046 * as long as we hold mt_parent_lock. 1047 */ 1048 1049 cachehead = &cpuptr->mt_caches[bucket]; 1050 1051 /* Search through the list, starting at the mt_hint */ 1052 thiscache = cachehead->mt_hint; 1053 1054 while (thiscache != NULL && thiscache->mt_nfree == 0) 1055 thiscache = thiscache->mt_next; 1056 1057 if (thiscache == NULL) { 1058 /* wrap around -- search up to the hint */ 1059 thiscache = cachehead->mt_cache; 1060 hintcache = cachehead->mt_hint; 1061 1062 while (thiscache != NULL && thiscache != hintcache && 1063 thiscache->mt_nfree == 0) 1064 thiscache = thiscache->mt_next; 1065 1066 if (thiscache == hintcache) 1067 thiscache = NULL; 1068 } 1069 1070 1071 if (thiscache == NULL) { /* there are no free caches */ 1072 int32_t thisrequest = requestsize; 1073 int32_t buffer_size = (1 << logsz) + OVERHEAD; 1074 1075 thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE); 1076 1077 if (thiscache == (cache_t *)-1) { 1078 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1079 errno = EAGAIN; 1080 return (NULL); 1081 } 1082 create_cache(thiscache, buffer_size, thisrequest); 1083 1084 /* link in the new block at the beginning of the list */ 1085 thiscache->mt_next = cachehead->mt_cache; 1086 cachehead->mt_cache = thiscache; 1087 } 1088 1089 /* update the hint to the cache we found or created */ 1090 cachehead->mt_hint = thiscache; 1091 1092 /* thiscache now points to a cache with available space */ 1093 (void) mutex_lock(&thiscache->mt_cache_lock); 1094 1095 freeblocks = (uint32_t *)thiscache->mt_freelist; 1096 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 1097 if (*freeblocks & 0xffffffff) 1098 break; 1099 freeblocks++; 1100 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1101 *freeblocks & 0xffffffff) 1102 break; 1103 freeblocks++; 1104 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1105 *freeblocks & 0xffffffff) 1106 break; 1107 freeblocks++; 1108 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1109 *freeblocks & 0xffffffff) 1110 break; 1111 freeblocks++; 1112 } 1113 1114 /* 1115 * the offset from mt_freelist to freeblocks is the offset into 1116 * the arena. Be sure to include the offset into freeblocks 1117 * of the bitmask. n is the offset. 1118 */ 1119 for (i = 0; i < 32; ) { 1120 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1121 break; 1122 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1123 break; 1124 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1125 break; 1126 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1127 break; 1128 } 1129 index = 0x80000000 >> --i; 1130 1131 1132 *freeblocks &= FLIP_EM(~index); 1133 1134 thiscache->mt_nfree--; 1135 1136 (void) mutex_unlock(&thiscache->mt_cache_lock); 1137 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1138 1139 n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5) 1140 + i) * thiscache->mt_size; 1141 /* 1142 * Now you have the offset in n, you've changed the free mask 1143 * in the freelist. Nothing left to do but find the block 1144 * in the arena and put the value of thiscache in the word 1145 * ahead of the handed out address and return the memory 1146 * back to the user. 1147 */ 1148 ret = thiscache->mt_arena + n; 1149 1150 /* Store the cache addr for this buf. Makes free go fast. */ 1151 *(uintptr_t *)ret = (uintptr_t)thiscache; 1152 1153 /* 1154 * This assert makes sure we don't hand out memory that is not 1155 * owned by this cache. 1156 */ 1157 assert(ret + thiscache->mt_size <= thiscache->mt_freelist + 1158 thiscache->mt_span); 1159 1160 ret += OVERHEAD; 1161 1162 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1163 1164 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1165 if (verify_pattern(FREEPATTERN, ret, size)) 1166 abort(); /* reference after free */ 1167 1168 if (debugopt & MTINITBUFFER) 1169 copy_pattern(INITPATTERN, ret, size); 1170 return ((void *)ret); 1171 } 1172 1173 static void * 1174 morecore(size_t bytes) 1175 { 1176 void * ret; 1177 1178 if (bytes > LONG_MAX) { 1179 intptr_t wad; 1180 /* 1181 * The request size is too big. We need to do this in 1182 * chunks. Sbrk only takes an int for an arg. 1183 */ 1184 if (bytes == ULONG_MAX) 1185 return ((void *)-1); 1186 1187 ret = sbrk(0); 1188 wad = LONG_MAX; 1189 while (wad > 0) { 1190 if (sbrk(wad) == (void *)-1) { 1191 if (ret != sbrk(0)) 1192 (void) sbrk(-LONG_MAX); 1193 return ((void *)-1); 1194 } 1195 bytes -= LONG_MAX; 1196 wad = bytes; 1197 } 1198 } else 1199 ret = sbrk(bytes); 1200 1201 return (ret); 1202 } 1203 1204 1205 static void * 1206 oversize(size_t size) 1207 { 1208 caddr_t ret; 1209 oversize_t *big; 1210 int bucket; 1211 1212 /* 1213 * The idea with the global lock is that we are sure to 1214 * block in the kernel anyway since given an oversize alloc 1215 * we are sure to have to call morecore(); 1216 */ 1217 (void) mutex_lock(&oversize_lock); 1218 1219 /* 1220 * Since we ensure every address we hand back is 1221 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the 1222 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends. 1223 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN, 1224 * particularly where coalescing occurs. 1225 */ 1226 size = ALIGN(size, MTMALLOC_MIN_ALIGN); 1227 1228 if ((big = find_oversize(size)) != NULL) { 1229 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1230 if (verify_pattern(FREEPATTERN, big->addr, size)) 1231 abort(); /* reference after free */ 1232 } else { 1233 /* Get more 8-byte aligned memory from heap */ 1234 ret = morecore(size + OVSZ_HEADER_SIZE); 1235 if (ret == (caddr_t)-1) { 1236 (void) mutex_unlock(&oversize_lock); 1237 errno = ENOMEM; 1238 return (NULL); 1239 } 1240 big = oversize_header_alloc((uintptr_t)ret, size); 1241 } 1242 ret = big->addr; 1243 1244 /* Add big to the hash table at the head of the relevant bucket. */ 1245 bucket = HASH_OVERSIZE(ret); 1246 big->hash_next = ovsz_hashtab[bucket]; 1247 ovsz_hashtab[bucket] = big; 1248 1249 if (debugopt & MTINITBUFFER) 1250 copy_pattern(INITPATTERN, ret, size); 1251 1252 (void) mutex_unlock(&oversize_lock); 1253 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1254 return ((void *)ret); 1255 } 1256 1257 static void 1258 insert_oversize(oversize_t *op, oversize_t *nx) 1259 { 1260 oversize_t *sp; 1261 1262 /* locate correct insertion point in size-ordered list */ 1263 for (sp = oversize_list.next_bysize; 1264 sp != &oversize_list && (op->size > sp->size); 1265 sp = sp->next_bysize) 1266 ; 1267 1268 /* link into size-ordered list */ 1269 op->next_bysize = sp; 1270 op->prev_bysize = sp->prev_bysize; 1271 op->prev_bysize->next_bysize = op; 1272 op->next_bysize->prev_bysize = op; 1273 1274 /* 1275 * link item into address-ordered list 1276 * (caller provides insertion point as an optimization) 1277 */ 1278 op->next_byaddr = nx; 1279 op->prev_byaddr = nx->prev_byaddr; 1280 op->prev_byaddr->next_byaddr = op; 1281 op->next_byaddr->prev_byaddr = op; 1282 1283 } 1284 1285 static void 1286 unlink_oversize(oversize_t *lp) 1287 { 1288 /* unlink from address list */ 1289 lp->prev_byaddr->next_byaddr = lp->next_byaddr; 1290 lp->next_byaddr->prev_byaddr = lp->prev_byaddr; 1291 1292 /* unlink from size list */ 1293 lp->prev_bysize->next_bysize = lp->next_bysize; 1294 lp->next_bysize->prev_bysize = lp->prev_bysize; 1295 } 1296 1297 static void 1298 position_oversize_by_size(oversize_t *op) 1299 { 1300 oversize_t *sp; 1301 1302 if (op->size > op->next_bysize->size || 1303 op->size < op->prev_bysize->size) { 1304 1305 /* unlink from size list */ 1306 op->prev_bysize->next_bysize = op->next_bysize; 1307 op->next_bysize->prev_bysize = op->prev_bysize; 1308 1309 /* locate correct insertion point in size-ordered list */ 1310 for (sp = oversize_list.next_bysize; 1311 sp != &oversize_list && (op->size > sp->size); 1312 sp = sp->next_bysize) 1313 ; 1314 1315 /* link into size-ordered list */ 1316 op->next_bysize = sp; 1317 op->prev_bysize = sp->prev_bysize; 1318 op->prev_bysize->next_bysize = op; 1319 op->next_bysize->prev_bysize = op; 1320 } 1321 } 1322 1323 static void 1324 add_oversize(oversize_t *lp) 1325 { 1326 int merge_flags = INSERT_ONLY; 1327 oversize_t *nx; /* ptr to item right of insertion point */ 1328 oversize_t *pv; /* ptr to item left of insertion point */ 1329 uint_t size_lp, size_pv, size_nx; 1330 uintptr_t endp_lp, endp_pv, endp_nx; 1331 1332 /* 1333 * Locate insertion point in address-ordered list 1334 */ 1335 1336 for (nx = oversize_list.next_byaddr; 1337 nx != &oversize_list && (lp->addr > nx->addr); 1338 nx = nx->next_byaddr) 1339 ; 1340 1341 /* 1342 * Determine how to add chunk to oversize freelist 1343 */ 1344 1345 size_lp = OVSZ_HEADER_SIZE + lp->size; 1346 endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN); 1347 size_lp = endp_lp - (uintptr_t)lp; 1348 1349 pv = nx->prev_byaddr; 1350 1351 if (pv->size) { 1352 1353 size_pv = OVSZ_HEADER_SIZE + pv->size; 1354 endp_pv = ALIGN((uintptr_t)pv + size_pv, 1355 MTMALLOC_MIN_ALIGN); 1356 size_pv = endp_pv - (uintptr_t)pv; 1357 1358 /* Check for adjacency with left chunk */ 1359 if ((uintptr_t)lp == endp_pv) 1360 merge_flags |= COALESCE_LEFT; 1361 } 1362 1363 if (nx->size) { 1364 1365 /* Check for adjacency with right chunk */ 1366 if ((uintptr_t)nx == endp_lp) { 1367 size_nx = OVSZ_HEADER_SIZE + nx->size; 1368 endp_nx = ALIGN((uintptr_t)nx + size_nx, 1369 MTMALLOC_MIN_ALIGN); 1370 size_nx = endp_nx - (uintptr_t)nx; 1371 merge_flags |= COALESCE_RIGHT; 1372 } 1373 } 1374 1375 /* 1376 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with 1377 * FREEPATTERN for lp->size bytes. If we can merge, the oversize 1378 * header(s) that will also become part of the memory available for 1379 * reallocation (ie lp and/or nx) must also be overwritten with 1380 * FREEPATTERN or we will SIGABRT when this memory is next reallocated. 1381 */ 1382 switch (merge_flags) { 1383 1384 case INSERT_ONLY: /* Coalescing not possible */ 1385 insert_oversize(lp, nx); 1386 break; 1387 case COALESCE_LEFT: 1388 pv->size += size_lp; 1389 position_oversize_by_size(pv); 1390 if (debugopt & MTDEBUGPATTERN) 1391 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1392 break; 1393 case COALESCE_RIGHT: 1394 unlink_oversize(nx); 1395 lp->size += size_nx; 1396 insert_oversize(lp, pv->next_byaddr); 1397 if (debugopt & MTDEBUGPATTERN) 1398 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1399 break; 1400 case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */ 1401 pv->size += size_lp + size_nx; 1402 unlink_oversize(nx); 1403 position_oversize_by_size(pv); 1404 if (debugopt & MTDEBUGPATTERN) { 1405 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1406 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1407 } 1408 break; 1409 } 1410 } 1411 1412 /* 1413 * Find memory on our list that is at least size big. If we find a block that is 1414 * big enough, we break it up and return the associated oversize_t struct back 1415 * to the calling client. Any leftover piece of that block is returned to the 1416 * freelist. 1417 */ 1418 static oversize_t * 1419 find_oversize(size_t size) 1420 { 1421 oversize_t *wp = oversize_list.next_bysize; 1422 while (wp != &oversize_list && size > wp->size) 1423 wp = wp->next_bysize; 1424 1425 if (wp == &oversize_list) /* empty list or nothing big enough */ 1426 return (NULL); 1427 /* breaking up a chunk of memory */ 1428 if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN))) 1429 > MAX_CACHED) { 1430 caddr_t off; 1431 oversize_t *np; 1432 size_t osize; 1433 off = (caddr_t)ALIGN(wp->addr + size, 1434 MTMALLOC_MIN_ALIGN); 1435 osize = wp->size; 1436 wp->size = (size_t)(off - wp->addr); 1437 np = oversize_header_alloc((uintptr_t)off, 1438 osize - (wp->size + OVSZ_HEADER_SIZE)); 1439 if ((long)np->size < 0) 1440 abort(); 1441 unlink_oversize(wp); 1442 add_oversize(np); 1443 } else { 1444 unlink_oversize(wp); 1445 } 1446 return (wp); 1447 } 1448 1449 static void 1450 copy_pattern(uint32_t pattern, void *buf_arg, size_t size) 1451 { 1452 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1453 uint32_t *buf = buf_arg; 1454 1455 while (buf < bufend - 3) { 1456 buf[3] = buf[2] = buf[1] = buf[0] = pattern; 1457 buf += 4; 1458 } 1459 while (buf < bufend) 1460 *buf++ = pattern; 1461 } 1462 1463 static void * 1464 verify_pattern(uint32_t pattern, void *buf_arg, size_t size) 1465 { 1466 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1467 uint32_t *buf; 1468 1469 for (buf = buf_arg; buf < bufend; buf++) 1470 if (*buf != pattern) 1471 return (buf); 1472 return (NULL); 1473 } 1474 1475 static void 1476 free_oversize(oversize_t *ovp) 1477 { 1478 assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */ 1479 assert(ovp->size > MAX_CACHED); 1480 1481 ovp->next_bysize = ovp->prev_bysize = NULL; 1482 ovp->next_byaddr = ovp->prev_byaddr = NULL; 1483 (void) mutex_lock(&oversize_lock); 1484 add_oversize(ovp); 1485 (void) mutex_unlock(&oversize_lock); 1486 } 1487 1488 static oversize_t * 1489 oversize_header_alloc(uintptr_t mem, size_t size) 1490 { 1491 oversize_t *ovsz_hdr; 1492 1493 assert(size > MAX_CACHED); 1494 1495 ovsz_hdr = (oversize_t *)mem; 1496 ovsz_hdr->prev_bysize = NULL; 1497 ovsz_hdr->next_bysize = NULL; 1498 ovsz_hdr->prev_byaddr = NULL; 1499 ovsz_hdr->next_byaddr = NULL; 1500 ovsz_hdr->hash_next = NULL; 1501 ovsz_hdr->size = size; 1502 mem += OVSZ_SIZE; 1503 *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC; 1504 mem += OVERHEAD; 1505 assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */ 1506 ovsz_hdr->addr = (caddr_t)mem; 1507 return (ovsz_hdr); 1508 } 1509