1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <mtmalloc.h> 28 #include "mtmalloc_impl.h" 29 #include <unistd.h> 30 #include <synch.h> 31 #include <thread.h> 32 #include <pthread.h> 33 #include <stdio.h> 34 #include <limits.h> 35 #include <errno.h> 36 #include <string.h> 37 #include <strings.h> 38 #include <sys/param.h> 39 #include <sys/sysmacros.h> 40 41 /* 42 * To turn on the asserts just compile -DDEBUG 43 */ 44 45 #ifndef DEBUG 46 #define NDEBUG 47 #endif 48 49 #include <assert.h> 50 51 /* 52 * The MT hot malloc implementation contained herein is designed to be 53 * plug-compatible with the libc version of malloc. It is not intended 54 * to replace that implementation until we decide that it is ok to break 55 * customer apps (Solaris 3.0). 56 * 57 * For requests up to 2^^16, the allocator initializes itself into NCPUS 58 * worth of chains of caches. When a memory request is made, the calling thread 59 * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap, 60 * contention-reducing index to use, eventually, this should be replaced with 61 * the actual CPU sequence number, when an interface to get it is available. 62 * 63 * Once the thread is vectored into one of the list of caches the real 64 * allocation of the memory begins. The size is determined to figure out which 65 * bucket the allocation should be satisfied from. The management of free 66 * buckets is done via a bitmask. A free bucket is represented by a 1. The 67 * first free bit represents the first free bucket. The position of the bit, 68 * represents the position of the bucket in the arena. 69 * 70 * When the memory from the arena is handed out, the address of the cache 71 * control structure is written in the word preceeding the returned memory. 72 * This cache control address is used during free() to mark the buffer free 73 * in the cache control structure. 74 * 75 * When all available memory in a cache has been depleted, a new chunk of memory 76 * is allocated via sbrk(). The new cache is allocated from this chunk of memory 77 * and initialized in the function create_cache(). New caches are installed at 78 * the front of a singly linked list of the same size memory pools. This helps 79 * to ensure that there will tend to be available memory in the beginning of the 80 * list. 81 * 82 * Long linked lists hurt performance. To decrease this effect, there is a 83 * tunable, requestsize, that bumps up the sbrk allocation size and thus 84 * increases the number of available blocks within an arena. We also keep 85 * a "hint" for each cache list, which is the last cache in the list allocated 86 * from. This lowers the cost of searching if there are a lot of fully 87 * allocated blocks at the front of the list. 88 * 89 * For requests greater than 2^^16 (oversize allocations), there are two pieces 90 * of overhead. There is the OVERHEAD used to hold the cache addr 91 * (&oversize_list), plus an oversize_t structure to further describe the block. 92 * 93 * The oversize list is kept as defragmented as possible by coalescing 94 * freed oversized allocations with adjacent neighbors. 95 * 96 * Addresses handed out are stored in a hash table, and are aligned on 97 * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up 98 * where necessary in order to achieve this. This eases the implementation of 99 * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs. 100 * 101 * A memalign allocation takes memalign header overhead. There's two 102 * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC 103 * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to 104 * get to the aligned address from malloc'ed address is the minimum size 105 * OVERHEAD, we create a header taking only one OVERHEAD space with magic 106 * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD 107 * from memaligned address, we can get to the malloc'ed address. Otherwise, 108 * we create a memalign header taking two OVERHEAD space, one stores 109 * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the 110 * malloc'ed address. 111 */ 112 113 #if defined(__i386) || defined(__amd64) 114 #include <arpa/inet.h> /* for htonl() */ 115 #endif 116 117 static void * morecore(size_t); 118 static void create_cache(cache_t *, size_t bufsize, uint_t hunks); 119 static void * malloc_internal(size_t, percpu_t *); 120 static void * oversize(size_t); 121 static oversize_t *find_oversize(size_t); 122 static void add_oversize(oversize_t *); 123 static void copy_pattern(uint32_t, void *, size_t); 124 static void * verify_pattern(uint32_t, void *, size_t); 125 static void reinit_cpu_list(void); 126 static void reinit_cache(cache_t *); 127 static void free_oversize(oversize_t *); 128 static oversize_t *oversize_header_alloc(uintptr_t, size_t); 129 130 /* 131 * oversize hash table stuff 132 */ 133 #define NUM_BUCKETS 67 /* must be prime */ 134 #define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS) 135 oversize_t *ovsz_hashtab[NUM_BUCKETS]; 136 137 #define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \ 138 & ~((uintptr_t)(a) - 1))) 139 140 /* need this to deal with little endianess of x86 */ 141 #if defined(__i386) || defined(__amd64) 142 #define FLIP_EM(x) htonl((x)) 143 #else 144 #define FLIP_EM(x) (x) 145 #endif 146 147 #define INSERT_ONLY 0 148 #define COALESCE_LEFT 0x00000001 149 #define COALESCE_RIGHT 0x00000002 150 #define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT) 151 152 #define OVERHEAD 8 /* size needed to write cache addr */ 153 #define HUNKSIZE 8192 /* just a multiplier */ 154 155 #define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */ 156 #define MAX_CACHED (1 << MAX_CACHED_SHIFT) 157 #define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */ 158 #define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */ 159 160 /* maximum size before overflow */ 161 #define MAX_MTMALLOC (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \ 162 - OVSZ_HEADER_SIZE) 163 164 #define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1) 165 #define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \ 166 CACHE_COHERENCY_UNIT) 167 168 #define MINSIZE 9 /* for requestsize, tunable */ 169 #define MAXSIZE 256 /* arbitrary, big enough, for requestsize */ 170 171 #define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */ 172 #define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */ 173 174 #define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1)) 175 #define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0)) 176 177 static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */ 178 179 static uint_t cpu_mask; 180 static curcpu_func curcpu; 181 182 static int32_t debugopt; 183 static int32_t reinit; 184 185 static percpu_t *cpu_list; 186 static oversize_t oversize_list; 187 static mutex_t oversize_lock = DEFAULTMUTEX; 188 189 static int ncpus = 0; 190 191 #define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list) 192 #define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1) 193 #define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2) 194 195 /* 196 * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte 197 * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that 198 * this is achieved. 199 */ 200 #define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN)) 201 #define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD) 202 203 /* 204 * memalign header takes 2 OVERHEAD space. One for memalign magic, and the 205 * other one points back to the start address of originally allocated space. 206 */ 207 #define MEMALIGN_HEADER_SIZE 2 * OVERHEAD 208 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\ 209 if (shift == OVERHEAD)\ 210 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 211 MTMALLOC_MEMALIGN_MIN_MAGIC; \ 212 else {\ 213 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 214 MTMALLOC_MEMALIGN_MAGIC; \ 215 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \ 216 (uintptr_t)malloc_addr; \ 217 } 218 219 /* 220 * Add big to the oversize hash table at the head of the relevant bucket. 221 */ 222 static void 223 insert_hash(oversize_t *big) 224 { 225 caddr_t ret = big->addr; 226 int bucket = HASH_OVERSIZE(ret); 227 228 assert(MUTEX_HELD(&oversize_lock)); 229 big->hash_next = ovsz_hashtab[bucket]; 230 ovsz_hashtab[bucket] = big; 231 } 232 233 void * 234 malloc(size_t bytes) 235 { 236 percpu_t *list_rotor; 237 uint_t list_index; 238 239 if (bytes > MAX_CACHED) 240 return (oversize(bytes)); 241 242 list_index = (curcpu() & cpu_mask); 243 244 list_rotor = &cpu_list[list_index]; 245 246 return (malloc_internal(bytes, list_rotor)); 247 } 248 249 void * 250 realloc(void * ptr, size_t bytes) 251 { 252 void *new, *data_ptr; 253 cache_t *cacheptr; 254 caddr_t mem; 255 size_t shift = 0; 256 257 if (ptr == NULL) 258 return (malloc(bytes)); 259 260 if (bytes == 0) { 261 free(ptr); 262 return (NULL); 263 } 264 265 data_ptr = ptr; 266 mem = (caddr_t)ptr - OVERHEAD; 267 268 /* 269 * Optimization possibility : 270 * p = malloc(64); 271 * q = realloc(p, 64); 272 * q can be same as p. 273 * Apply this optimization for the normal 274 * sized caches for now. 275 */ 276 if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC || 277 *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) { 278 cacheptr = (cache_t *)*(uintptr_t *)mem; 279 if (bytes <= (cacheptr->mt_size - OVERHEAD)) 280 return (ptr); 281 } 282 283 new = malloc(bytes); 284 285 if (new == NULL) 286 return (NULL); 287 288 /* 289 * If new == ptr, ptr has previously been freed. Passing a freed pointer 290 * to realloc() is not allowed - unless the caller specifically states 291 * otherwise, in which case we must avoid freeing ptr (ie new) before we 292 * return new. There is (obviously) no requirement to memcpy() ptr to 293 * new before we return. 294 */ 295 if (new == ptr) { 296 if (!(debugopt & MTDOUBLEFREE)) 297 abort(); 298 return (new); 299 } 300 301 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 302 mem -= OVERHEAD; 303 ptr = (void *)*(uintptr_t *)mem; 304 mem = (caddr_t)ptr - OVERHEAD; 305 shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr); 306 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 307 ptr = (void *) mem; 308 mem -= OVERHEAD; 309 shift = OVERHEAD; 310 } 311 312 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 313 oversize_t *old; 314 315 old = (oversize_t *)(mem - OVSZ_SIZE); 316 (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift)); 317 free(ptr); 318 return (new); 319 } 320 321 cacheptr = (cache_t *)*(uintptr_t *)mem; 322 323 (void) memcpy(new, data_ptr, 324 MIN(cacheptr->mt_size - OVERHEAD - shift, bytes)); 325 free(ptr); 326 327 return (new); 328 } 329 330 void * 331 calloc(size_t nelem, size_t bytes) 332 { 333 void * ptr; 334 size_t size; 335 336 if (nelem == 0 || bytes == 0) { 337 size = 0; 338 } else { 339 size = nelem * bytes; 340 341 /* check for overflow */ 342 if ((size / nelem) != bytes) { 343 errno = ENOMEM; 344 return (NULL); 345 } 346 } 347 348 ptr = malloc(size); 349 if (ptr == NULL) 350 return (NULL); 351 (void) memset(ptr, 0, size); 352 353 return (ptr); 354 } 355 356 void 357 free(void * ptr) 358 { 359 cache_t *cacheptr; 360 caddr_t mem; 361 int32_t i; 362 caddr_t freeblocks; 363 uintptr_t offset; 364 uchar_t mask; 365 int32_t which_bit, num_bytes; 366 367 if (ptr == NULL) 368 return; 369 370 mem = (caddr_t)ptr - OVERHEAD; 371 372 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 373 mem -= OVERHEAD; 374 ptr = (void *)*(uintptr_t *)mem; 375 mem = (caddr_t)ptr - OVERHEAD; 376 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 377 ptr = (void *) mem; 378 mem -= OVERHEAD; 379 } 380 381 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 382 oversize_t *big, **opp; 383 int bucket; 384 385 big = (oversize_t *)(mem - OVSZ_SIZE); 386 (void) mutex_lock(&oversize_lock); 387 388 bucket = HASH_OVERSIZE(big->addr); 389 for (opp = &ovsz_hashtab[bucket]; *opp != NULL; 390 opp = &(*opp)->hash_next) 391 if (*opp == big) 392 break; 393 394 if (*opp == NULL) { 395 if (!(debugopt & MTDOUBLEFREE)) 396 abort(); 397 (void) mutex_unlock(&oversize_lock); 398 return; 399 } 400 401 *opp = big->hash_next; /* remove big from the hash table */ 402 big->hash_next = NULL; 403 404 if (debugopt & MTDEBUGPATTERN) 405 copy_pattern(FREEPATTERN, ptr, big->size); 406 add_oversize(big); 407 (void) mutex_unlock(&oversize_lock); 408 return; 409 } 410 411 cacheptr = (cache_t *)*(uintptr_t *)mem; 412 freeblocks = cacheptr->mt_freelist; 413 414 /* 415 * This is the distance measured in bits into the arena. 416 * The value of offset is in bytes but there is a 1-1 correlation 417 * between distance into the arena and distance into the 418 * freelist bitmask. 419 */ 420 offset = mem - cacheptr->mt_arena; 421 422 /* 423 * i is total number of bits to offset into freelist bitmask. 424 */ 425 426 i = offset / cacheptr->mt_size; 427 428 num_bytes = i >> 3; 429 430 /* 431 * which_bit is the bit offset into the byte in the freelist. 432 * if our freelist bitmask looks like 0xf3 and we are freeing 433 * block 5 (ie: the 6th block) our mask will be 0xf7 after 434 * the free. Things go left to right that's why the mask is 0x80 435 * and not 0x01. 436 */ 437 which_bit = i - (num_bytes << 3); 438 439 mask = 0x80 >> which_bit; 440 441 freeblocks += num_bytes; 442 443 if (debugopt & MTDEBUGPATTERN) 444 copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD); 445 446 (void) mutex_lock(&cacheptr->mt_cache_lock); 447 448 if (*freeblocks & mask) { 449 if (!(debugopt & MTDOUBLEFREE)) 450 abort(); 451 } else { 452 *freeblocks |= mask; 453 cacheptr->mt_nfree++; 454 } 455 456 (void) mutex_unlock(&cacheptr->mt_cache_lock); 457 } 458 459 void * 460 memalign(size_t alignment, size_t size) 461 { 462 size_t alloc_size; 463 uintptr_t offset; 464 void *alloc_buf; 465 void *ret_buf; 466 467 if (size == 0 || alignment == 0 || misaligned(alignment) || 468 (alignment & (alignment - 1)) != 0) { 469 errno = EINVAL; 470 return (NULL); 471 } 472 473 /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */ 474 if (alignment <= MTMALLOC_MIN_ALIGN) 475 return (malloc(size)); 476 477 alloc_size = size + alignment - MTMALLOC_MIN_ALIGN; 478 479 if (alloc_size < size) { /* overflow */ 480 errno = ENOMEM; 481 return (NULL); 482 } 483 484 alloc_buf = malloc(alloc_size); 485 486 if (alloc_buf == NULL) 487 /* malloc sets errno */ 488 return (NULL); 489 490 /* 491 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of 492 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since 493 * we will use alloc_size to return the excess fragments to the free 494 * list, we also round-up alloc_size if necessary. 495 */ 496 if ((alloc_size > MAX_CACHED) && 497 (alloc_size & (MTMALLOC_MIN_ALIGN - 1))) 498 alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN); 499 500 if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) { 501 /* aligned correctly */ 502 503 size_t frag_size = alloc_size - 504 (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 505 506 /* 507 * If the leftover piece of the memory > MAX_CACHED, 508 * split off the piece and return it back to the freelist. 509 */ 510 if (IS_OVERSIZE(frag_size, alloc_size)) { 511 oversize_t *orig, *tail; 512 uintptr_t taddr; 513 size_t data_size; 514 taddr = ALIGN((uintptr_t)alloc_buf + size, 515 MTMALLOC_MIN_ALIGN); 516 data_size = taddr - (uintptr_t)alloc_buf; 517 orig = (oversize_t *)((uintptr_t)alloc_buf - 518 OVSZ_HEADER_SIZE); 519 frag_size = orig->size - data_size - 520 OVSZ_HEADER_SIZE; 521 orig->size = data_size; 522 tail = oversize_header_alloc(taddr, frag_size); 523 free_oversize(tail); 524 } 525 ret_buf = alloc_buf; 526 } else { 527 uchar_t oversize_bits = 0; 528 size_t head_sz, data_sz, tail_sz; 529 uintptr_t ret_addr, taddr, shift, tshift; 530 oversize_t *orig, *tail, *big; 531 size_t tsize; 532 533 /* needs to be aligned */ 534 shift = alignment - offset; 535 536 assert(shift >= MTMALLOC_MIN_ALIGN); 537 538 ret_addr = ((uintptr_t)alloc_buf + shift); 539 ret_buf = (void *)ret_addr; 540 541 if (alloc_size <= MAX_CACHED) { 542 MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf); 543 return (ret_buf); 544 } 545 546 /* 547 * Only check for the fragments when the memory is allocted 548 * from oversize_list. Split off a fragment and return it 549 * to the oversize freelist when it's > MAX_CACHED. 550 */ 551 552 head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE); 553 554 tail_sz = alloc_size - 555 (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 556 557 oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) | 558 IS_OVERSIZE(size, alloc_size) << DATA_SHIFT | 559 IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT; 560 561 switch (oversize_bits) { 562 case NONE_OVERSIZE: 563 case DATA_OVERSIZE: 564 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 565 alloc_buf); 566 break; 567 case HEAD_OVERSIZE: 568 /* 569 * If we can extend data > MAX_CACHED and have 570 * head still > MAX_CACHED, we split head-end 571 * as the case of head-end and data oversized, 572 * otherwise just create memalign header. 573 */ 574 tsize = (shift + size) - (MAX_CACHED + 8 + 575 MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 576 577 if (!IS_OVERSIZE(tsize, alloc_size)) { 578 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 579 alloc_buf); 580 break; 581 } else { 582 tsize += OVSZ_HEADER_SIZE; 583 taddr = ALIGN((uintptr_t)alloc_buf + 584 tsize, MTMALLOC_MIN_ALIGN); 585 tshift = ret_addr - taddr; 586 MEMALIGN_HEADER_ALLOC(ret_addr, tshift, 587 taddr); 588 ret_addr = taddr; 589 shift = ret_addr - (uintptr_t)alloc_buf; 590 } 591 /* FALLTHROUGH */ 592 case HEAD_AND_DATA_OVERSIZE: 593 /* 594 * Split off the head fragment and 595 * return it back to oversize freelist. 596 * Create oversize header for the piece 597 * of (data + tail fragment). 598 */ 599 orig = (oversize_t *)((uintptr_t)alloc_buf - 600 OVSZ_HEADER_SIZE); 601 big = oversize_header_alloc(ret_addr - 602 OVSZ_HEADER_SIZE, (orig->size - shift)); 603 (void) mutex_lock(&oversize_lock); 604 insert_hash(big); 605 (void) mutex_unlock(&oversize_lock); 606 orig->size = shift - OVSZ_HEADER_SIZE; 607 608 /* free up the head fragment */ 609 free_oversize(orig); 610 break; 611 case TAIL_OVERSIZE: 612 /* 613 * If we can extend data > MAX_CACHED and have 614 * tail-end still > MAX_CACHED, we split tail 615 * end, otherwise just create memalign header. 616 */ 617 orig = (oversize_t *)((uintptr_t)alloc_buf - 618 OVSZ_HEADER_SIZE); 619 tsize = orig->size - (MAX_CACHED + 8 + 620 shift + OVSZ_HEADER_SIZE + 621 MTMALLOC_MIN_ALIGN); 622 if (!IS_OVERSIZE(tsize, alloc_size)) { 623 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 624 alloc_buf); 625 break; 626 } else { 627 size = MAX_CACHED + 8; 628 } 629 /* FALLTHROUGH */ 630 case DATA_AND_TAIL_OVERSIZE: 631 /* 632 * Split off the tail fragment and 633 * return it back to oversize freelist. 634 * Create memalign header and adjust 635 * the size for the piece of 636 * (head fragment + data). 637 */ 638 taddr = ALIGN(ret_addr + size, 639 MTMALLOC_MIN_ALIGN); 640 data_sz = (size_t)(taddr - 641 (uintptr_t)alloc_buf); 642 orig = (oversize_t *)((uintptr_t)alloc_buf - 643 OVSZ_HEADER_SIZE); 644 tsize = orig->size - data_sz; 645 orig->size = data_sz; 646 MEMALIGN_HEADER_ALLOC(ret_buf, shift, 647 alloc_buf); 648 tsize -= OVSZ_HEADER_SIZE; 649 tail = oversize_header_alloc(taddr, tsize); 650 free_oversize(tail); 651 break; 652 case HEAD_AND_TAIL_OVERSIZE: 653 /* 654 * Split off the head fragment. 655 * We try to free up tail-end when we can 656 * extend data size to (MAX_CACHED + 8) 657 * and remain tail-end oversized. 658 * The bottom line is all split pieces 659 * should be oversize in size. 660 */ 661 orig = (oversize_t *)((uintptr_t)alloc_buf - 662 OVSZ_HEADER_SIZE); 663 tsize = orig->size - (MAX_CACHED + 8 + 664 OVSZ_HEADER_SIZE + shift + 665 MTMALLOC_MIN_ALIGN); 666 667 if (!IS_OVERSIZE(tsize, alloc_size)) { 668 /* 669 * If the chunk is not big enough 670 * to make both data and tail oversize 671 * we just keep them as one piece. 672 */ 673 big = oversize_header_alloc(ret_addr - 674 OVSZ_HEADER_SIZE, 675 orig->size - shift); 676 (void) mutex_lock(&oversize_lock); 677 insert_hash(big); 678 (void) mutex_unlock(&oversize_lock); 679 orig->size = shift - OVSZ_HEADER_SIZE; 680 free_oversize(orig); 681 break; 682 } else { 683 /* 684 * extend data size > MAX_CACHED 685 * and handle it as head, data, tail 686 * are all oversized. 687 */ 688 size = MAX_CACHED + 8; 689 } 690 /* FALLTHROUGH */ 691 case ALL_OVERSIZE: 692 /* 693 * split off the head and tail fragments, 694 * return them back to the oversize freelist. 695 * Alloc oversize header for data seg. 696 */ 697 orig = (oversize_t *)((uintptr_t)alloc_buf - 698 OVSZ_HEADER_SIZE); 699 tsize = orig->size; 700 orig->size = shift - OVSZ_HEADER_SIZE; 701 free_oversize(orig); 702 703 taddr = ALIGN(ret_addr + size, 704 MTMALLOC_MIN_ALIGN); 705 data_sz = taddr - ret_addr; 706 assert(tsize > (shift + data_sz + 707 OVSZ_HEADER_SIZE)); 708 tail_sz = tsize - 709 (shift + data_sz + OVSZ_HEADER_SIZE); 710 711 /* create oversize header for data seg */ 712 big = oversize_header_alloc(ret_addr - 713 OVSZ_HEADER_SIZE, data_sz); 714 (void) mutex_lock(&oversize_lock); 715 insert_hash(big); 716 (void) mutex_unlock(&oversize_lock); 717 718 /* create oversize header for tail fragment */ 719 tail = oversize_header_alloc(taddr, tail_sz); 720 free_oversize(tail); 721 break; 722 default: 723 /* should not reach here */ 724 assert(0); 725 } 726 } 727 return (ret_buf); 728 } 729 730 731 void * 732 valloc(size_t size) 733 { 734 static unsigned pagesize; 735 736 if (size == 0) 737 return (NULL); 738 739 if (!pagesize) 740 pagesize = sysconf(_SC_PAGESIZE); 741 742 return (memalign(pagesize, size)); 743 } 744 745 void 746 mallocctl(int cmd, long value) 747 { 748 switch (cmd) { 749 750 case MTDEBUGPATTERN: 751 /* 752 * Reinitialize free blocks in case malloc() is called prior 753 * to mallocctl(). 754 */ 755 if (value && !(debugopt & cmd)) { 756 reinit++; 757 debugopt |= cmd; 758 reinit_cpu_list(); 759 } 760 /*FALLTHRU*/ 761 case MTDOUBLEFREE: 762 case MTINITBUFFER: 763 if (value) 764 debugopt |= cmd; 765 else 766 debugopt &= ~cmd; 767 break; 768 case MTCHUNKSIZE: 769 if (value >= MINSIZE && value <= MAXSIZE) 770 requestsize = value; 771 break; 772 default: 773 break; 774 } 775 } 776 777 /* 778 * Initialization function, called from the init section of the library. 779 * No locking is required here because we are single-threaded during 780 * library initialization. 781 */ 782 static void 783 setup_caches(void) 784 { 785 uintptr_t oldbrk; 786 uintptr_t newbrk; 787 788 size_t cache_space_needed; 789 size_t padding; 790 791 curcpu_func new_curcpu; 792 uint_t new_cpu_mask; 793 percpu_t *new_cpu_list; 794 795 uint_t i, j; 796 uintptr_t list_addr; 797 798 /* 799 * Get a decent "current cpu identifier", to be used to reduce 800 * contention. Eventually, this should be replaced by an interface 801 * to get the actual CPU sequence number in libthread/liblwp. 802 */ 803 new_curcpu = (curcpu_func)thr_self; 804 if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0) 805 ncpus = 4; /* decent default value */ 806 807 /* round ncpus up to a power of 2 */ 808 while (ncpus & (ncpus - 1)) 809 ncpus++; 810 811 new_cpu_mask = ncpus - 1; /* create the cpu mask */ 812 813 /* 814 * We now do some magic with the brk. What we want to get in the 815 * end is a bunch of well-aligned stuff in a big initial allocation. 816 * Along the way, we do sanity checks to make sure no one else has 817 * touched the brk (which shouldn't happen, but it's always good to 818 * check) 819 * 820 * First, make sure sbrk is sane, and store the current brk in oldbrk. 821 */ 822 oldbrk = (uintptr_t)sbrk(0); 823 if ((void *)oldbrk == (void *)-1) 824 abort(); /* sbrk is broken -- we're doomed. */ 825 826 /* 827 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that 828 * the percpu structures and cache lists will be properly aligned. 829 * 830 * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE, 831 * so they can be paged out individually. 832 */ 833 newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT); 834 if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) 835 abort(); /* sbrk is broken -- we're doomed. */ 836 837 /* 838 * For each cpu, there is one percpu_t and a list of caches 839 */ 840 cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE); 841 842 new_cpu_list = (percpu_t *)sbrk(cache_space_needed); 843 844 if (new_cpu_list == (percpu_t *)-1 || 845 (uintptr_t)new_cpu_list != newbrk) 846 abort(); /* sbrk is broken -- we're doomed. */ 847 848 /* 849 * Finally, align the brk to HUNKSIZE so that all hunks are 850 * page-aligned, to avoid edge-effects. 851 */ 852 853 newbrk = (uintptr_t)new_cpu_list + cache_space_needed; 854 855 padding = ALIGN(newbrk, HUNKSIZE) - newbrk; 856 857 if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) 858 abort(); /* sbrk is broken -- we're doomed. */ 859 860 list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus)); 861 862 /* initialize the percpu list */ 863 for (i = 0; i < ncpus; i++) { 864 new_cpu_list[i].mt_caches = (cache_head_t *)list_addr; 865 for (j = 0; j < NUM_CACHES; j++) { 866 new_cpu_list[i].mt_caches[j].mt_cache = NULL; 867 new_cpu_list[i].mt_caches[j].mt_hint = NULL; 868 } 869 870 (void) mutex_init(&new_cpu_list[i].mt_parent_lock, 871 USYNC_THREAD, NULL); 872 873 /* get the correct cache list alignment */ 874 list_addr += CACHELIST_SIZE; 875 } 876 877 /* 878 * Initialize oversize listhead 879 */ 880 oversize_list.next_bysize = &oversize_list; 881 oversize_list.prev_bysize = &oversize_list; 882 oversize_list.next_byaddr = &oversize_list; 883 oversize_list.prev_byaddr = &oversize_list; 884 oversize_list.addr = NULL; 885 oversize_list.size = 0; /* sentinal */ 886 887 /* 888 * Now install the global variables. 889 */ 890 curcpu = new_curcpu; 891 cpu_mask = new_cpu_mask; 892 cpu_list = new_cpu_list; 893 } 894 895 static void 896 create_cache(cache_t *cp, size_t size, uint_t chunksize) 897 { 898 long nblocks; 899 900 (void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL); 901 cp->mt_size = size; 902 cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t)); 903 cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t); 904 cp->mt_hunks = chunksize; 905 /* 906 * rough calculation. We will need to adjust later. 907 */ 908 nblocks = cp->mt_span / cp->mt_size; 909 nblocks >>= 3; 910 if (nblocks == 0) { /* less than 8 free blocks in this pool */ 911 int32_t numblocks = 0; 912 long i = cp->mt_span; 913 size_t sub = cp->mt_size; 914 uchar_t mask = 0; 915 916 while (i > sub) { 917 numblocks++; 918 i -= sub; 919 } 920 nblocks = numblocks; 921 cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8); 922 cp->mt_nfree = numblocks; 923 while (numblocks--) { 924 mask |= 0x80 >> numblocks; 925 } 926 *(cp->mt_freelist) = mask; 927 } else { 928 cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist + 929 nblocks, 32); 930 /* recompute nblocks */ 931 nblocks = (uintptr_t)((caddr_t)cp->mt_freelist + 932 cp->mt_span - cp->mt_arena) / cp->mt_size; 933 cp->mt_nfree = ((nblocks >> 3) << 3); 934 /* Set everything to free */ 935 (void) memset(cp->mt_freelist, 0xff, nblocks >> 3); 936 } 937 938 if (debugopt & MTDEBUGPATTERN) 939 copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks); 940 941 cp->mt_next = NULL; 942 } 943 944 static void 945 reinit_cpu_list(void) 946 { 947 oversize_t *wp = oversize_list.next_bysize; 948 percpu_t *cpuptr; 949 cache_t *thiscache; 950 cache_head_t *cachehead; 951 952 /* Reinitialize free oversize blocks. */ 953 (void) mutex_lock(&oversize_lock); 954 if (debugopt & MTDEBUGPATTERN) 955 for (; wp != &oversize_list; wp = wp->next_bysize) 956 copy_pattern(FREEPATTERN, wp->addr, wp->size); 957 (void) mutex_unlock(&oversize_lock); 958 959 /* Reinitialize free blocks. */ 960 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) { 961 (void) mutex_lock(&cpuptr->mt_parent_lock); 962 for (cachehead = &cpuptr->mt_caches[0]; cachehead < 963 &cpuptr->mt_caches[NUM_CACHES]; cachehead++) { 964 for (thiscache = cachehead->mt_cache; thiscache != NULL; 965 thiscache = thiscache->mt_next) { 966 (void) mutex_lock(&thiscache->mt_cache_lock); 967 if (thiscache->mt_nfree == 0) { 968 (void) mutex_unlock( 969 &thiscache->mt_cache_lock); 970 continue; 971 } 972 if (thiscache != NULL) 973 reinit_cache(thiscache); 974 (void) mutex_unlock(&thiscache->mt_cache_lock); 975 } 976 } 977 (void) mutex_unlock(&cpuptr->mt_parent_lock); 978 } 979 reinit = 0; 980 } 981 982 static void 983 reinit_cache(cache_t *thiscache) 984 { 985 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 986 int32_t i, n; 987 caddr_t ret; 988 989 freeblocks = (uint32_t *)thiscache->mt_freelist; 990 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 991 if (*freeblocks & 0xffffffff) { 992 for (i = 0; i < 32; i++) { 993 if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) { 994 n = (uintptr_t)(((freeblocks - 995 (uint32_t *)thiscache->mt_freelist) 996 << 5) + i) * thiscache->mt_size; 997 ret = thiscache->mt_arena + n; 998 ret += OVERHEAD; 999 copy_pattern(FREEPATTERN, ret, 1000 thiscache->mt_size); 1001 } 1002 } 1003 } 1004 freeblocks++; 1005 } 1006 } 1007 1008 static void * 1009 malloc_internal(size_t size, percpu_t *cpuptr) 1010 { 1011 cache_head_t *cachehead; 1012 cache_t *thiscache, *hintcache; 1013 int32_t i, n, logsz, bucket; 1014 uint32_t index; 1015 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 1016 caddr_t ret; 1017 1018 logsz = MIN_CACHED_SHIFT; 1019 1020 while (size > (1 << logsz)) 1021 logsz++; 1022 1023 bucket = logsz - MIN_CACHED_SHIFT; 1024 1025 (void) mutex_lock(&cpuptr->mt_parent_lock); 1026 1027 /* 1028 * Find a cache of the appropriate size with free buffers. 1029 * 1030 * We don't need to lock each cache as we check their mt_nfree count, 1031 * since: 1032 * 1. We are only looking for caches with mt_nfree > 0. If a 1033 * free happens during our search, it will increment mt_nfree, 1034 * which will not effect the test. 1035 * 2. Allocations can decrement mt_nfree, but they can't happen 1036 * as long as we hold mt_parent_lock. 1037 */ 1038 1039 cachehead = &cpuptr->mt_caches[bucket]; 1040 1041 /* Search through the list, starting at the mt_hint */ 1042 thiscache = cachehead->mt_hint; 1043 1044 while (thiscache != NULL && thiscache->mt_nfree == 0) 1045 thiscache = thiscache->mt_next; 1046 1047 if (thiscache == NULL) { 1048 /* wrap around -- search up to the hint */ 1049 thiscache = cachehead->mt_cache; 1050 hintcache = cachehead->mt_hint; 1051 1052 while (thiscache != NULL && thiscache != hintcache && 1053 thiscache->mt_nfree == 0) 1054 thiscache = thiscache->mt_next; 1055 1056 if (thiscache == hintcache) 1057 thiscache = NULL; 1058 } 1059 1060 1061 if (thiscache == NULL) { /* there are no free caches */ 1062 int32_t thisrequest = requestsize; 1063 int32_t buffer_size = (1 << logsz) + OVERHEAD; 1064 1065 thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE); 1066 1067 if (thiscache == (cache_t *)-1) { 1068 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1069 errno = EAGAIN; 1070 return (NULL); 1071 } 1072 create_cache(thiscache, buffer_size, thisrequest); 1073 1074 /* link in the new block at the beginning of the list */ 1075 thiscache->mt_next = cachehead->mt_cache; 1076 cachehead->mt_cache = thiscache; 1077 } 1078 1079 /* update the hint to the cache we found or created */ 1080 cachehead->mt_hint = thiscache; 1081 1082 /* thiscache now points to a cache with available space */ 1083 (void) mutex_lock(&thiscache->mt_cache_lock); 1084 1085 freeblocks = (uint32_t *)thiscache->mt_freelist; 1086 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 1087 if (*freeblocks & 0xffffffff) 1088 break; 1089 freeblocks++; 1090 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1091 *freeblocks & 0xffffffff) 1092 break; 1093 freeblocks++; 1094 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1095 *freeblocks & 0xffffffff) 1096 break; 1097 freeblocks++; 1098 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1099 *freeblocks & 0xffffffff) 1100 break; 1101 freeblocks++; 1102 } 1103 1104 /* 1105 * the offset from mt_freelist to freeblocks is the offset into 1106 * the arena. Be sure to include the offset into freeblocks 1107 * of the bitmask. n is the offset. 1108 */ 1109 for (i = 0; i < 32; ) { 1110 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1111 break; 1112 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1113 break; 1114 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1115 break; 1116 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1117 break; 1118 } 1119 index = 0x80000000 >> --i; 1120 1121 1122 *freeblocks &= FLIP_EM(~index); 1123 1124 thiscache->mt_nfree--; 1125 1126 (void) mutex_unlock(&thiscache->mt_cache_lock); 1127 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1128 1129 n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5) 1130 + i) * thiscache->mt_size; 1131 /* 1132 * Now you have the offset in n, you've changed the free mask 1133 * in the freelist. Nothing left to do but find the block 1134 * in the arena and put the value of thiscache in the word 1135 * ahead of the handed out address and return the memory 1136 * back to the user. 1137 */ 1138 ret = thiscache->mt_arena + n; 1139 1140 /* Store the cache addr for this buf. Makes free go fast. */ 1141 *(uintptr_t *)ret = (uintptr_t)thiscache; 1142 1143 /* 1144 * This assert makes sure we don't hand out memory that is not 1145 * owned by this cache. 1146 */ 1147 assert(ret + thiscache->mt_size <= thiscache->mt_freelist + 1148 thiscache->mt_span); 1149 1150 ret += OVERHEAD; 1151 1152 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1153 1154 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1155 if (verify_pattern(FREEPATTERN, ret, size)) 1156 abort(); /* reference after free */ 1157 1158 if (debugopt & MTINITBUFFER) 1159 copy_pattern(INITPATTERN, ret, size); 1160 return ((void *)ret); 1161 } 1162 1163 static void * 1164 morecore(size_t bytes) 1165 { 1166 void * ret; 1167 1168 if (bytes > LONG_MAX) { 1169 intptr_t wad; 1170 /* 1171 * The request size is too big. We need to do this in 1172 * chunks. Sbrk only takes an int for an arg. 1173 */ 1174 if (bytes == ULONG_MAX) 1175 return ((void *)-1); 1176 1177 ret = sbrk(0); 1178 wad = LONG_MAX; 1179 while (wad > 0) { 1180 if (sbrk(wad) == (void *)-1) { 1181 if (ret != sbrk(0)) 1182 (void) sbrk(-LONG_MAX); 1183 return ((void *)-1); 1184 } 1185 bytes -= LONG_MAX; 1186 wad = bytes; 1187 } 1188 } else 1189 ret = sbrk(bytes); 1190 1191 return (ret); 1192 } 1193 1194 1195 static void * 1196 oversize(size_t size) 1197 { 1198 caddr_t ret; 1199 oversize_t *big; 1200 1201 /* make sure we will not overflow */ 1202 if (size > MAX_MTMALLOC) { 1203 errno = ENOMEM; 1204 return (NULL); 1205 } 1206 1207 /* 1208 * Since we ensure every address we hand back is 1209 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the 1210 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends. 1211 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN, 1212 * particularly where coalescing occurs. 1213 */ 1214 size = ALIGN(size, MTMALLOC_MIN_ALIGN); 1215 1216 /* 1217 * The idea with the global lock is that we are sure to 1218 * block in the kernel anyway since given an oversize alloc 1219 * we are sure to have to call morecore(); 1220 */ 1221 (void) mutex_lock(&oversize_lock); 1222 1223 if ((big = find_oversize(size)) != NULL) { 1224 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1225 if (verify_pattern(FREEPATTERN, big->addr, size)) 1226 abort(); /* reference after free */ 1227 } else { 1228 /* Get more 8-byte aligned memory from heap */ 1229 ret = morecore(size + OVSZ_HEADER_SIZE); 1230 if (ret == (caddr_t)-1) { 1231 (void) mutex_unlock(&oversize_lock); 1232 errno = ENOMEM; 1233 return (NULL); 1234 } 1235 big = oversize_header_alloc((uintptr_t)ret, size); 1236 } 1237 ret = big->addr; 1238 1239 insert_hash(big); 1240 1241 if (debugopt & MTINITBUFFER) 1242 copy_pattern(INITPATTERN, ret, size); 1243 1244 (void) mutex_unlock(&oversize_lock); 1245 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1246 return ((void *)ret); 1247 } 1248 1249 static void 1250 insert_oversize(oversize_t *op, oversize_t *nx) 1251 { 1252 oversize_t *sp; 1253 1254 /* locate correct insertion point in size-ordered list */ 1255 for (sp = oversize_list.next_bysize; 1256 sp != &oversize_list && (op->size > sp->size); 1257 sp = sp->next_bysize) 1258 ; 1259 1260 /* link into size-ordered list */ 1261 op->next_bysize = sp; 1262 op->prev_bysize = sp->prev_bysize; 1263 op->prev_bysize->next_bysize = op; 1264 op->next_bysize->prev_bysize = op; 1265 1266 /* 1267 * link item into address-ordered list 1268 * (caller provides insertion point as an optimization) 1269 */ 1270 op->next_byaddr = nx; 1271 op->prev_byaddr = nx->prev_byaddr; 1272 op->prev_byaddr->next_byaddr = op; 1273 op->next_byaddr->prev_byaddr = op; 1274 1275 } 1276 1277 static void 1278 unlink_oversize(oversize_t *lp) 1279 { 1280 /* unlink from address list */ 1281 lp->prev_byaddr->next_byaddr = lp->next_byaddr; 1282 lp->next_byaddr->prev_byaddr = lp->prev_byaddr; 1283 1284 /* unlink from size list */ 1285 lp->prev_bysize->next_bysize = lp->next_bysize; 1286 lp->next_bysize->prev_bysize = lp->prev_bysize; 1287 } 1288 1289 static void 1290 position_oversize_by_size(oversize_t *op) 1291 { 1292 oversize_t *sp; 1293 1294 if (op->size > op->next_bysize->size || 1295 op->size < op->prev_bysize->size) { 1296 1297 /* unlink from size list */ 1298 op->prev_bysize->next_bysize = op->next_bysize; 1299 op->next_bysize->prev_bysize = op->prev_bysize; 1300 1301 /* locate correct insertion point in size-ordered list */ 1302 for (sp = oversize_list.next_bysize; 1303 sp != &oversize_list && (op->size > sp->size); 1304 sp = sp->next_bysize) 1305 ; 1306 1307 /* link into size-ordered list */ 1308 op->next_bysize = sp; 1309 op->prev_bysize = sp->prev_bysize; 1310 op->prev_bysize->next_bysize = op; 1311 op->next_bysize->prev_bysize = op; 1312 } 1313 } 1314 1315 static void 1316 add_oversize(oversize_t *lp) 1317 { 1318 int merge_flags = INSERT_ONLY; 1319 oversize_t *nx; /* ptr to item right of insertion point */ 1320 oversize_t *pv; /* ptr to item left of insertion point */ 1321 uint_t size_lp, size_pv, size_nx; 1322 uintptr_t endp_lp, endp_pv, endp_nx; 1323 1324 /* 1325 * Locate insertion point in address-ordered list 1326 */ 1327 1328 for (nx = oversize_list.next_byaddr; 1329 nx != &oversize_list && (lp->addr > nx->addr); 1330 nx = nx->next_byaddr) 1331 ; 1332 1333 /* 1334 * Determine how to add chunk to oversize freelist 1335 */ 1336 1337 size_lp = OVSZ_HEADER_SIZE + lp->size; 1338 endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN); 1339 size_lp = endp_lp - (uintptr_t)lp; 1340 1341 pv = nx->prev_byaddr; 1342 1343 if (pv->size) { 1344 1345 size_pv = OVSZ_HEADER_SIZE + pv->size; 1346 endp_pv = ALIGN((uintptr_t)pv + size_pv, 1347 MTMALLOC_MIN_ALIGN); 1348 size_pv = endp_pv - (uintptr_t)pv; 1349 1350 /* Check for adjacency with left chunk */ 1351 if ((uintptr_t)lp == endp_pv) 1352 merge_flags |= COALESCE_LEFT; 1353 } 1354 1355 if (nx->size) { 1356 1357 /* Check for adjacency with right chunk */ 1358 if ((uintptr_t)nx == endp_lp) { 1359 size_nx = OVSZ_HEADER_SIZE + nx->size; 1360 endp_nx = ALIGN((uintptr_t)nx + size_nx, 1361 MTMALLOC_MIN_ALIGN); 1362 size_nx = endp_nx - (uintptr_t)nx; 1363 merge_flags |= COALESCE_RIGHT; 1364 } 1365 } 1366 1367 /* 1368 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with 1369 * FREEPATTERN for lp->size bytes. If we can merge, the oversize 1370 * header(s) that will also become part of the memory available for 1371 * reallocation (ie lp and/or nx) must also be overwritten with 1372 * FREEPATTERN or we will SIGABRT when this memory is next reallocated. 1373 */ 1374 switch (merge_flags) { 1375 1376 case INSERT_ONLY: /* Coalescing not possible */ 1377 insert_oversize(lp, nx); 1378 break; 1379 case COALESCE_LEFT: 1380 pv->size += size_lp; 1381 position_oversize_by_size(pv); 1382 if (debugopt & MTDEBUGPATTERN) 1383 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1384 break; 1385 case COALESCE_RIGHT: 1386 unlink_oversize(nx); 1387 lp->size += size_nx; 1388 insert_oversize(lp, pv->next_byaddr); 1389 if (debugopt & MTDEBUGPATTERN) 1390 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1391 break; 1392 case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */ 1393 pv->size += size_lp + size_nx; 1394 unlink_oversize(nx); 1395 position_oversize_by_size(pv); 1396 if (debugopt & MTDEBUGPATTERN) { 1397 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1398 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1399 } 1400 break; 1401 } 1402 } 1403 1404 /* 1405 * Find memory on our list that is at least size big. If we find a block that is 1406 * big enough, we break it up and return the associated oversize_t struct back 1407 * to the calling client. Any leftover piece of that block is returned to the 1408 * freelist. 1409 */ 1410 static oversize_t * 1411 find_oversize(size_t size) 1412 { 1413 oversize_t *wp = oversize_list.next_bysize; 1414 while (wp != &oversize_list && size > wp->size) 1415 wp = wp->next_bysize; 1416 1417 if (wp == &oversize_list) /* empty list or nothing big enough */ 1418 return (NULL); 1419 /* breaking up a chunk of memory */ 1420 if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN))) 1421 > MAX_CACHED) { 1422 caddr_t off; 1423 oversize_t *np; 1424 size_t osize; 1425 off = (caddr_t)ALIGN(wp->addr + size, 1426 MTMALLOC_MIN_ALIGN); 1427 osize = wp->size; 1428 wp->size = (size_t)(off - wp->addr); 1429 np = oversize_header_alloc((uintptr_t)off, 1430 osize - (wp->size + OVSZ_HEADER_SIZE)); 1431 if ((long)np->size < 0) 1432 abort(); 1433 unlink_oversize(wp); 1434 add_oversize(np); 1435 } else { 1436 unlink_oversize(wp); 1437 } 1438 return (wp); 1439 } 1440 1441 static void 1442 copy_pattern(uint32_t pattern, void *buf_arg, size_t size) 1443 { 1444 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1445 uint32_t *buf = buf_arg; 1446 1447 while (buf < bufend - 3) { 1448 buf[3] = buf[2] = buf[1] = buf[0] = pattern; 1449 buf += 4; 1450 } 1451 while (buf < bufend) 1452 *buf++ = pattern; 1453 } 1454 1455 static void * 1456 verify_pattern(uint32_t pattern, void *buf_arg, size_t size) 1457 { 1458 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1459 uint32_t *buf; 1460 1461 for (buf = buf_arg; buf < bufend; buf++) 1462 if (*buf != pattern) 1463 return (buf); 1464 return (NULL); 1465 } 1466 1467 static void 1468 free_oversize(oversize_t *ovp) 1469 { 1470 assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */ 1471 assert(ovp->size > MAX_CACHED); 1472 1473 ovp->next_bysize = ovp->prev_bysize = NULL; 1474 ovp->next_byaddr = ovp->prev_byaddr = NULL; 1475 (void) mutex_lock(&oversize_lock); 1476 add_oversize(ovp); 1477 (void) mutex_unlock(&oversize_lock); 1478 } 1479 1480 static oversize_t * 1481 oversize_header_alloc(uintptr_t mem, size_t size) 1482 { 1483 oversize_t *ovsz_hdr; 1484 1485 assert(size > MAX_CACHED); 1486 1487 ovsz_hdr = (oversize_t *)mem; 1488 ovsz_hdr->prev_bysize = NULL; 1489 ovsz_hdr->next_bysize = NULL; 1490 ovsz_hdr->prev_byaddr = NULL; 1491 ovsz_hdr->next_byaddr = NULL; 1492 ovsz_hdr->hash_next = NULL; 1493 ovsz_hdr->size = size; 1494 mem += OVSZ_SIZE; 1495 *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC; 1496 mem += OVERHEAD; 1497 assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */ 1498 ovsz_hdr->addr = (caddr_t)mem; 1499 return (ovsz_hdr); 1500 } 1501 1502 static void 1503 malloc_prepare() 1504 { 1505 percpu_t *cpuptr; 1506 cache_head_t *cachehead; 1507 cache_t *thiscache; 1508 1509 (void) mutex_lock(&oversize_lock); 1510 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) { 1511 (void) mutex_lock(&cpuptr->mt_parent_lock); 1512 for (cachehead = &cpuptr->mt_caches[0]; 1513 cachehead < &cpuptr->mt_caches[NUM_CACHES]; 1514 cachehead++) { 1515 for (thiscache = cachehead->mt_cache; 1516 thiscache != NULL; 1517 thiscache = thiscache->mt_next) { 1518 (void) mutex_lock( 1519 &thiscache->mt_cache_lock); 1520 } 1521 } 1522 } 1523 } 1524 1525 static void 1526 malloc_release() 1527 { 1528 percpu_t *cpuptr; 1529 cache_head_t *cachehead; 1530 cache_t *thiscache; 1531 1532 for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) { 1533 for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1]; 1534 cachehead >= &cpuptr->mt_caches[0]; 1535 cachehead--) { 1536 for (thiscache = cachehead->mt_cache; 1537 thiscache != NULL; 1538 thiscache = thiscache->mt_next) { 1539 (void) mutex_unlock( 1540 &thiscache->mt_cache_lock); 1541 } 1542 } 1543 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1544 } 1545 (void) mutex_unlock(&oversize_lock); 1546 } 1547 1548 #pragma init(malloc_init) 1549 static void 1550 malloc_init(void) 1551 { 1552 /* 1553 * This works in the init section for this library 1554 * because setup_caches() doesn't call anything in libc 1555 * that calls malloc(). If it did, disaster would ensue. 1556 * 1557 * For this to work properly, this library must be the first 1558 * one to have its init section called (after libc) by the 1559 * dynamic linker. If some other library's init section 1560 * ran first and called malloc(), disaster would ensue. 1561 * Because this is an interposer library for malloc(), the 1562 * dynamic linker arranges for its init section to run first. 1563 */ 1564 (void) setup_caches(); 1565 1566 (void) pthread_atfork(malloc_prepare, malloc_release, malloc_release); 1567 } 1568