1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mtmalloc.h> 30 #include "mtmalloc_impl.h" 31 #include <unistd.h> 32 #include <synch.h> 33 #include <thread.h> 34 #include <stdio.h> 35 #include <limits.h> 36 #include <errno.h> 37 #include <string.h> 38 #include <strings.h> 39 #include <sys/param.h> 40 #include <sys/sysmacros.h> 41 42 /* 43 * To turn on the asserts just compile -DDEBUG 44 */ 45 46 #ifndef DEBUG 47 #define NDEBUG 48 #endif 49 50 #include <assert.h> 51 52 /* 53 * The MT hot malloc implementation contained herein is designed to be 54 * plug-compatible with the libc version of malloc. It is not intended 55 * to replace that implementation until we decide that it is ok to break 56 * customer apps (Solaris 3.0). 57 * 58 * For requests up to 2^^16, the allocator initializes itself into NCPUS 59 * worth of chains of caches. When a memory request is made, the calling thread 60 * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap, 61 * contention-reducing index to use, eventually, this should be replaced with 62 * the actual CPU sequence number, when an interface to get it is available. 63 * 64 * Once the thread is vectored into one of the list of caches the real 65 * allocation of the memory begins. The size is determined to figure out which 66 * bucket the allocation should be satisfied from. The management of free 67 * buckets is done via a bitmask. A free bucket is represented by a 1. The 68 * first free bit represents the first free bucket. The position of the bit, 69 * represents the position of the bucket in the arena. 70 * 71 * When the memory from the arena is handed out, the address of the cache 72 * control structure is written in the word preceeding the returned memory. 73 * This cache control address is used during free() to mark the buffer free 74 * in the cache control structure. 75 * 76 * When all available memory in a cache has been depleted, a new chunk of memory 77 * is allocated via sbrk(). The new cache is allocated from this chunk of memory 78 * and initialized in the function create_cache(). New caches are installed at 79 * the front of a singly linked list of the same size memory pools. This helps 80 * to ensure that there will tend to be available memory in the beginning of the 81 * list. 82 * 83 * Long linked lists hurt performance. To decrease this effect, there is a 84 * tunable, requestsize, that bumps up the sbrk allocation size and thus 85 * increases the number of available blocks within an arena. We also keep 86 * a "hint" for each cache list, which is the last cache in the list allocated 87 * from. This lowers the cost of searching if there are a lot of fully 88 * allocated blocks at the front of the list. 89 * 90 * For requests greater than 2^^16 (oversize allocations), there are two pieces 91 * of overhead. There is the OVERHEAD used to hold the cache addr 92 * (&oversize_list), plus an oversize_t structure to further describe the block. 93 * 94 * The oversize list is kept as defragmented as possible by coalescing 95 * freed oversized allocations with adjacent neighbors. 96 * 97 * Addresses handed out are stored in a hash table, and are aligned on 98 * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up 99 * where necessary in order to achieve this. This eases the implementation of 100 * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs. 101 * 102 * A memalign allocation takes memalign header overhead. There's two 103 * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC 104 * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to 105 * get to the aligned address from malloc'ed address is the minimum size 106 * OVERHEAD, we create a header taking only one OVERHEAD space with magic 107 * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD 108 * from memaligned address, we can get to the malloc'ed address. Otherwise, 109 * we create a memalign header taking two OVERHEAD space, one stores 110 * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the 111 * malloc'ed address. 112 */ 113 114 #if defined(__i386) || defined(__amd64) 115 #include <arpa/inet.h> /* for htonl() */ 116 #endif 117 118 static void * morecore(size_t); 119 static int setup_caches(void); 120 static void create_cache(cache_t *, size_t bufsize, uint_t hunks); 121 static void * malloc_internal(size_t, percpu_t *); 122 static void * oversize(size_t); 123 static oversize_t *find_oversize(size_t); 124 static void add_oversize(oversize_t *); 125 static void copy_pattern(uint32_t, void *, size_t); 126 static void * verify_pattern(uint32_t, void *, size_t); 127 static void reinit_cpu_list(void); 128 static void reinit_cache(cache_t *); 129 static void free_oversize(oversize_t *); 130 static oversize_t *oversize_header_alloc(uintptr_t, size_t); 131 132 /* 133 * oversize hash table stuff 134 */ 135 #define NUM_BUCKETS 67 /* must be prime */ 136 #define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS) 137 oversize_t *ovsz_hashtab[NUM_BUCKETS]; 138 139 /* 140 * Gets a decent "current cpu identifier", to be used to reduce contention. 141 * Eventually, this should be replaced by an interface to get the actual 142 * CPU sequence number in libthread/liblwp. 143 */ 144 extern uint_t _thr_self(); 145 #pragma weak _thr_self 146 #define get_curcpu_func() (curcpu_func)_thr_self 147 148 #define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \ 149 & ~((uintptr_t)(a) - 1))) 150 151 /* need this to deal with little endianess of x86 */ 152 #if defined(__i386) || defined(__amd64) 153 #define FLIP_EM(x) htonl((x)) 154 #else 155 #define FLIP_EM(x) (x) 156 #endif 157 158 #define INSERT_ONLY 0 159 #define COALESCE_LEFT 0x00000001 160 #define COALESCE_RIGHT 0x00000002 161 #define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT) 162 163 #define OVERHEAD 8 /* size needed to write cache addr */ 164 #define HUNKSIZE 8192 /* just a multiplier */ 165 166 #define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */ 167 #define MAX_CACHED (1 << MAX_CACHED_SHIFT) 168 #define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */ 169 #define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */ 170 171 /* maximum size before overflow */ 172 #define MAX_MTMALLOC (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \ 173 - OVSZ_HEADER_SIZE) 174 175 #define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1) 176 #define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \ 177 CACHE_COHERENCY_UNIT) 178 179 #define MINSIZE 9 /* for requestsize, tunable */ 180 #define MAXSIZE 256 /* arbitrary, big enough, for requestsize */ 181 182 #define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */ 183 #define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */ 184 185 #define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1)) 186 #define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0)) 187 188 static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */ 189 190 static uint_t cpu_mask; 191 static curcpu_func curcpu; 192 193 static int32_t debugopt; 194 static int32_t reinit; 195 196 static percpu_t *cpu_list; 197 static oversize_t oversize_list; 198 static mutex_t oversize_lock; 199 200 static int ncpus; 201 202 #define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list) 203 #define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1) 204 #define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2) 205 206 /* 207 * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte 208 * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that 209 * this is achieved. 210 */ 211 #define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN)) 212 #define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD) 213 214 /* 215 * memalign header takes 2 OVERHEAD space. One for memalign magic, and the 216 * other one points back to the start address of originally allocated space. 217 */ 218 #define MEMALIGN_HEADER_SIZE 2 * OVERHEAD 219 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\ 220 if (shift == OVERHEAD)\ 221 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 222 MTMALLOC_MEMALIGN_MIN_MAGIC; \ 223 else {\ 224 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \ 225 MTMALLOC_MEMALIGN_MAGIC; \ 226 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \ 227 (uintptr_t)malloc_addr; \ 228 } 229 230 void * 231 malloc(size_t bytes) 232 { 233 percpu_t *list_rotor; 234 uint_t list_index; 235 236 /* 237 * this test is due to linking with libthread. 238 * There are malloc calls prior to this library 239 * being initialized. 240 * 241 * If setup_caches fails, we set ENOMEM and return NULL 242 */ 243 if (cpu_list == (percpu_t *)NULL) { 244 if (setup_caches() == 0) { 245 errno = ENOMEM; 246 return (NULL); 247 } 248 } 249 250 if (bytes > MAX_CACHED) 251 return (oversize(bytes)); 252 253 list_index = (curcpu() & cpu_mask); 254 255 list_rotor = &cpu_list[list_index]; 256 257 return (malloc_internal(bytes, list_rotor)); 258 } 259 260 void * 261 realloc(void * ptr, size_t bytes) 262 { 263 void *new, *data_ptr; 264 cache_t *cacheptr; 265 caddr_t mem; 266 size_t shift = 0; 267 268 if (ptr == NULL) 269 return (malloc(bytes)); 270 271 if (bytes == 0) { 272 free(ptr); 273 return (NULL); 274 } 275 276 data_ptr = ptr; 277 mem = (caddr_t)ptr - OVERHEAD; 278 279 new = malloc(bytes); 280 281 if (new == NULL) 282 return (NULL); 283 284 /* 285 * If new == ptr, ptr has previously been freed. Passing a freed pointer 286 * to realloc() is not allowed - unless the caller specifically states 287 * otherwise, in which case we must avoid freeing ptr (ie new) before we 288 * return new. There is (obviously) no requirement to memcpy() ptr to 289 * new before we return. 290 */ 291 if (new == ptr) { 292 if (!(debugopt & MTDOUBLEFREE)) 293 abort(); 294 return (new); 295 } 296 297 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 298 mem -= OVERHEAD; 299 ptr = (void *)*(uintptr_t *)mem; 300 mem = (caddr_t)ptr - OVERHEAD; 301 shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr); 302 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 303 ptr = (void *) mem; 304 mem -= OVERHEAD; 305 shift = OVERHEAD; 306 } 307 308 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 309 oversize_t *old; 310 311 old = (oversize_t *)(mem - OVSZ_SIZE); 312 (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift)); 313 free(ptr); 314 return (new); 315 } 316 317 cacheptr = (cache_t *)*(uintptr_t *)mem; 318 319 (void) memcpy(new, data_ptr, 320 MIN(cacheptr->mt_size - OVERHEAD - shift, bytes)); 321 free(ptr); 322 323 return (new); 324 } 325 326 void * 327 calloc(size_t nelem, size_t bytes) 328 { 329 void * ptr; 330 size_t size = nelem * bytes; 331 332 ptr = malloc(size); 333 if (ptr == NULL) 334 return (NULL); 335 bzero(ptr, size); 336 337 return (ptr); 338 } 339 340 void 341 free(void * ptr) 342 { 343 cache_t *cacheptr; 344 caddr_t mem; 345 int32_t i; 346 caddr_t freeblocks; 347 uintptr_t offset; 348 uchar_t mask; 349 int32_t which_bit, num_bytes; 350 351 if (ptr == NULL) 352 return; 353 354 mem = (caddr_t)ptr - OVERHEAD; 355 356 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) { 357 mem -= OVERHEAD; 358 ptr = (void *)*(uintptr_t *)mem; 359 mem = (caddr_t)ptr - OVERHEAD; 360 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) { 361 ptr = (void *) mem; 362 mem -= OVERHEAD; 363 } 364 365 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) { 366 oversize_t *big, **opp; 367 int bucket; 368 369 big = (oversize_t *)(mem - OVSZ_SIZE); 370 (void) mutex_lock(&oversize_lock); 371 372 bucket = HASH_OVERSIZE(big->addr); 373 for (opp = &ovsz_hashtab[bucket]; *opp != NULL; 374 opp = &(*opp)->hash_next) 375 if (*opp == big) 376 break; 377 378 if (*opp == NULL) { 379 if (!(debugopt & MTDOUBLEFREE)) 380 abort(); 381 (void) mutex_unlock(&oversize_lock); 382 return; 383 } 384 385 *opp = big->hash_next; /* remove big from the hash table */ 386 big->hash_next = NULL; 387 388 if (debugopt & MTDEBUGPATTERN) 389 copy_pattern(FREEPATTERN, ptr, big->size); 390 add_oversize(big); 391 (void) mutex_unlock(&oversize_lock); 392 return; 393 } 394 395 cacheptr = (cache_t *)*(uintptr_t *)mem; 396 freeblocks = cacheptr->mt_freelist; 397 398 /* 399 * This is the distance measured in bits into the arena. 400 * The value of offset is in bytes but there is a 1-1 correlation 401 * between distance into the arena and distance into the 402 * freelist bitmask. 403 */ 404 offset = mem - cacheptr->mt_arena; 405 406 /* 407 * i is total number of bits to offset into freelist bitmask. 408 */ 409 410 i = offset / cacheptr->mt_size; 411 412 num_bytes = i >> 3; 413 414 /* 415 * which_bit is the bit offset into the byte in the freelist. 416 * if our freelist bitmask looks like 0xf3 and we are freeing 417 * block 5 (ie: the 6th block) our mask will be 0xf7 after 418 * the free. Things go left to right that's why the mask is 0x80 419 * and not 0x01. 420 */ 421 which_bit = i - (num_bytes << 3); 422 423 mask = 0x80 >> which_bit; 424 425 freeblocks += num_bytes; 426 427 if (debugopt & MTDEBUGPATTERN) 428 copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD); 429 430 (void) mutex_lock(&cacheptr->mt_cache_lock); 431 432 if (*freeblocks & mask) { 433 if (!(debugopt & MTDOUBLEFREE)) 434 abort(); 435 } else { 436 *freeblocks |= mask; 437 cacheptr->mt_nfree++; 438 } 439 440 (void) mutex_unlock(&cacheptr->mt_cache_lock); 441 } 442 443 void * 444 memalign(size_t alignment, size_t size) 445 { 446 size_t alloc_size; 447 uintptr_t offset; 448 void *alloc_buf; 449 void *ret_buf; 450 451 if (size == 0 || alignment == 0 || 452 misaligned(alignment) || 453 (alignment & (alignment - 1)) != 0) { 454 errno = EINVAL; 455 return (NULL); 456 } 457 458 /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */ 459 if (alignment <= MTMALLOC_MIN_ALIGN) 460 return (malloc(size)); 461 462 alloc_size = size + alignment - MTMALLOC_MIN_ALIGN; 463 464 if (alloc_size < size) { /* overflow */ 465 errno = ENOMEM; 466 return (NULL); 467 } 468 469 alloc_buf = malloc(alloc_size); 470 471 if (alloc_buf == NULL) 472 /* malloc sets errno */ 473 return (NULL); 474 475 /* 476 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of 477 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since 478 * we will use alloc_size to return the excess fragments to the free 479 * list, we also round-up alloc_size if necessary. 480 */ 481 if ((alloc_size > MAX_CACHED) && 482 (alloc_size & (MTMALLOC_MIN_ALIGN - 1))) 483 alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN); 484 485 if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) { 486 /* aligned correctly */ 487 488 size_t frag_size = alloc_size - 489 (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 490 491 /* 492 * If the leftover piece of the memory > MAX_CACHED, 493 * split off the piece and return it back to the freelist. 494 */ 495 if (IS_OVERSIZE(frag_size, alloc_size)) { 496 oversize_t *orig, *tail; 497 uintptr_t taddr; 498 size_t data_size; 499 taddr = ALIGN((uintptr_t)alloc_buf + size, 500 MTMALLOC_MIN_ALIGN); 501 data_size = taddr - (uintptr_t)alloc_buf; 502 orig = (oversize_t *)((uintptr_t)alloc_buf - 503 OVSZ_HEADER_SIZE); 504 frag_size = orig->size - data_size - 505 OVSZ_HEADER_SIZE; 506 orig->size = data_size; 507 tail = oversize_header_alloc(taddr, frag_size); 508 free_oversize(tail); 509 } 510 ret_buf = alloc_buf; 511 } else { 512 uchar_t oversize_bits = 0; 513 size_t head_sz, data_sz, tail_sz; 514 uintptr_t ret_addr, taddr, shift, tshift; 515 oversize_t *orig, *tail; 516 size_t tsize; 517 518 /* needs to be aligned */ 519 shift = alignment - offset; 520 521 assert(shift >= MTMALLOC_MIN_ALIGN); 522 523 ret_addr = ((uintptr_t)alloc_buf + shift); 524 ret_buf = (void *)ret_addr; 525 526 if (alloc_size <= MAX_CACHED) { 527 MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf); 528 return (ret_buf); 529 } 530 531 /* 532 * Only check for the fragments when the memory is allocted 533 * from oversize_list. Split off a fragment and return it 534 * to the oversize freelist when it's > MAX_CACHED. 535 */ 536 537 head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE); 538 539 tail_sz = alloc_size - 540 (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 541 542 oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) | 543 IS_OVERSIZE(size, alloc_size) << DATA_SHIFT | 544 IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT; 545 546 switch (oversize_bits) { 547 case NONE_OVERSIZE: 548 case DATA_OVERSIZE: 549 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 550 alloc_buf); 551 break; 552 case HEAD_OVERSIZE: 553 /* 554 * If we can extend data > MAX_CACHED and have 555 * head still > MAX_CACHED, we split head-end 556 * as the case of head-end and data oversized, 557 * otherwise just create memalign header. 558 */ 559 tsize = (shift + size) - (MAX_CACHED + 8 + 560 MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE); 561 562 if (!IS_OVERSIZE(tsize, alloc_size)) { 563 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 564 alloc_buf); 565 break; 566 } else { 567 tsize += OVSZ_HEADER_SIZE; 568 taddr = ALIGN((uintptr_t)alloc_buf + 569 tsize, MTMALLOC_MIN_ALIGN); 570 tshift = ret_addr - taddr; 571 MEMALIGN_HEADER_ALLOC(ret_addr, tshift, 572 taddr); 573 ret_addr = taddr; 574 shift = ret_addr - (uintptr_t)alloc_buf; 575 } 576 /* FALLTHROUGH */ 577 case HEAD_AND_DATA_OVERSIZE: 578 /* 579 * Split off the head fragment and 580 * return it back to oversize freelist. 581 * Create oversize header for the piece 582 * of (data + tail fragment). 583 */ 584 orig = (oversize_t *)((uintptr_t)alloc_buf - 585 OVSZ_HEADER_SIZE); 586 (void) oversize_header_alloc(ret_addr - 587 OVSZ_HEADER_SIZE, 588 (orig->size - shift)); 589 orig->size = shift - OVSZ_HEADER_SIZE; 590 591 /* free up the head fragment */ 592 free_oversize(orig); 593 break; 594 case TAIL_OVERSIZE: 595 /* 596 * If we can extend data > MAX_CACHED and have 597 * tail-end still > MAX_CACHED, we split tail 598 * end, otherwise just create memalign header. 599 */ 600 orig = (oversize_t *)((uintptr_t)alloc_buf - 601 OVSZ_HEADER_SIZE); 602 tsize = orig->size - (MAX_CACHED + 8 + 603 shift + OVSZ_HEADER_SIZE + 604 MTMALLOC_MIN_ALIGN); 605 if (!IS_OVERSIZE(tsize, alloc_size)) { 606 MEMALIGN_HEADER_ALLOC(ret_addr, shift, 607 alloc_buf); 608 break; 609 } else { 610 size = MAX_CACHED + 8; 611 } 612 /* FALLTHROUGH */ 613 case DATA_AND_TAIL_OVERSIZE: 614 /* 615 * Split off the tail fragment and 616 * return it back to oversize freelist. 617 * Create memalign header and adjust 618 * the size for the piece of 619 * (head fragment + data). 620 */ 621 taddr = ALIGN(ret_addr + size, 622 MTMALLOC_MIN_ALIGN); 623 data_sz = (size_t)(taddr - 624 (uintptr_t)alloc_buf); 625 orig = (oversize_t *)((uintptr_t)alloc_buf - 626 OVSZ_HEADER_SIZE); 627 tsize = orig->size - data_sz; 628 orig->size = data_sz; 629 MEMALIGN_HEADER_ALLOC(ret_buf, shift, 630 alloc_buf); 631 tsize -= OVSZ_HEADER_SIZE; 632 tail = oversize_header_alloc(taddr, tsize); 633 free_oversize(tail); 634 break; 635 case HEAD_AND_TAIL_OVERSIZE: 636 /* 637 * Split off the head fragment. 638 * We try to free up tail-end when we can 639 * extend data size to (MAX_CACHED + 8) 640 * and remain tail-end oversized. 641 * The bottom line is all split pieces 642 * should be oversize in size. 643 */ 644 orig = (oversize_t *)((uintptr_t)alloc_buf - 645 OVSZ_HEADER_SIZE); 646 tsize = orig->size - (MAX_CACHED + 8 + 647 OVSZ_HEADER_SIZE + shift + 648 MTMALLOC_MIN_ALIGN); 649 650 if (!IS_OVERSIZE(tsize, alloc_size)) { 651 /* 652 * If the chunk is not big enough 653 * to make both data and tail oversize 654 * we just keep them as one piece. 655 */ 656 (void) oversize_header_alloc(ret_addr - 657 OVSZ_HEADER_SIZE, 658 orig->size - shift); 659 orig->size = shift - 660 OVSZ_HEADER_SIZE; 661 free_oversize(orig); 662 break; 663 } else { 664 /* 665 * extend data size > MAX_CACHED 666 * and handle it as head, data, tail 667 * are all oversized. 668 */ 669 size = MAX_CACHED + 8; 670 } 671 /* FALLTHROUGH */ 672 case ALL_OVERSIZE: 673 /* 674 * split off the head and tail fragments, 675 * return them back to the oversize freelist. 676 * Alloc oversize header for data seg. 677 */ 678 orig = (oversize_t *)((uintptr_t)alloc_buf - 679 OVSZ_HEADER_SIZE); 680 tsize = orig->size; 681 orig->size = shift - OVSZ_HEADER_SIZE; 682 free_oversize(orig); 683 684 taddr = ALIGN(ret_addr + size, 685 MTMALLOC_MIN_ALIGN); 686 data_sz = taddr - ret_addr; 687 assert(tsize > (shift + data_sz + 688 OVSZ_HEADER_SIZE)); 689 tail_sz = tsize - 690 (shift + data_sz + OVSZ_HEADER_SIZE); 691 692 /* create oversize header for data seg */ 693 (void) oversize_header_alloc(ret_addr - 694 OVSZ_HEADER_SIZE, data_sz); 695 696 /* create oversize header for tail fragment */ 697 tail = oversize_header_alloc(taddr, tail_sz); 698 free_oversize(tail); 699 break; 700 default: 701 /* should not reach here */ 702 assert(0); 703 } 704 } 705 return (ret_buf); 706 } 707 708 709 void * 710 valloc(size_t size) 711 { 712 static unsigned pagesize; 713 714 if (size == 0) 715 return (NULL); 716 717 if (!pagesize) 718 pagesize = sysconf(_SC_PAGESIZE); 719 720 return (memalign(pagesize, size)); 721 } 722 723 void 724 mallocctl(int cmd, long value) 725 { 726 switch (cmd) { 727 728 case MTDEBUGPATTERN: 729 /* 730 * Reinitialize free blocks in case malloc() is called prior 731 * to mallocctl(). 732 */ 733 if (value && !(debugopt & cmd)) { 734 reinit++; 735 debugopt |= cmd; 736 reinit_cpu_list(); 737 } 738 /*FALLTHRU*/ 739 case MTDOUBLEFREE: 740 case MTINITBUFFER: 741 if (value) 742 debugopt |= cmd; 743 else 744 debugopt &= ~cmd; 745 break; 746 case MTCHUNKSIZE: 747 if (value >= MINSIZE && value <= MAXSIZE) 748 requestsize = value; 749 break; 750 default: 751 break; 752 } 753 } 754 755 /* 756 * if this function is changed, update the fallback code in setup_caches to 757 * set ncpus to the number of possible return values. (currently 1) 758 */ 759 static uint_t 760 fallback_curcpu(void) 761 { 762 return (0); 763 } 764 765 /* 766 * Returns non-zero on success, zero on failure. 767 * 768 * This carefully doesn't set cpu_list until initialization is finished. 769 */ 770 static int 771 setup_caches(void) 772 { 773 static mutex_t init_lock = DEFAULTMUTEX; 774 775 uintptr_t oldbrk; 776 uintptr_t newbrk; 777 778 size_t cache_space_needed; 779 size_t padding; 780 781 curcpu_func new_curcpu; 782 uint_t new_cpu_mask; 783 percpu_t *new_cpu_list; 784 785 uint_t i, j; 786 uintptr_t list_addr; 787 788 (void) mutex_lock(&init_lock); 789 if (cpu_list != NULL) { 790 (void) mutex_unlock(&init_lock); 791 return (1); /* success -- already initialized */ 792 } 793 794 new_curcpu = get_curcpu_func(); 795 if (new_curcpu == NULL) { 796 new_curcpu = fallback_curcpu; 797 ncpus = 1; 798 } else { 799 if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0) 800 ncpus = 4; /* decent default value */ 801 } 802 assert(ncpus > 0); 803 804 /* round ncpus up to a power of 2 */ 805 while (ncpus & (ncpus - 1)) 806 ncpus++; 807 808 new_cpu_mask = ncpus - 1; /* create the cpu mask */ 809 810 /* 811 * We now do some magic with the brk. What we want to get in the 812 * end is a bunch of well-aligned stuff in a big initial allocation. 813 * Along the way, we do sanity checks to make sure no one else has 814 * touched the brk (which shouldn't happen, but it's always good to 815 * check) 816 * 817 * First, make sure sbrk is sane, and store the current brk in oldbrk. 818 */ 819 oldbrk = (uintptr_t)sbrk(0); 820 if ((void *)oldbrk == (void *)-1) { 821 (void) mutex_unlock(&init_lock); 822 return (0); /* sbrk is broken -- we're doomed. */ 823 } 824 825 /* 826 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that 827 * the percpu structures and cache lists will be properly aligned. 828 * 829 * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE, 830 * so they can be paged out individually. 831 */ 832 newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT); 833 if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk) { 834 (void) mutex_unlock(&init_lock); 835 return (0); /* someone else sbrked */ 836 } 837 838 /* 839 * For each cpu, there is one percpu_t and a list of caches 840 */ 841 cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE); 842 843 new_cpu_list = (percpu_t *)sbrk(cache_space_needed); 844 845 if (new_cpu_list == (percpu_t *)-1 || 846 (uintptr_t)new_cpu_list != newbrk) { 847 (void) mutex_unlock(&init_lock); 848 return (0); /* someone else sbrked */ 849 } 850 851 /* 852 * Finally, align the brk to HUNKSIZE so that all hunks are 853 * page-aligned, to avoid edge-effects. 854 */ 855 856 newbrk = (uintptr_t)new_cpu_list + cache_space_needed; 857 858 padding = ALIGN(newbrk, HUNKSIZE) - newbrk; 859 860 if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk) { 861 (void) mutex_unlock(&init_lock); 862 return (0); /* someone else sbrked */ 863 } 864 865 list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus)); 866 867 /* initialize the percpu list */ 868 for (i = 0; i < ncpus; i++) { 869 new_cpu_list[i].mt_caches = (cache_head_t *)list_addr; 870 for (j = 0; j < NUM_CACHES; j++) { 871 new_cpu_list[i].mt_caches[j].mt_cache = NULL; 872 new_cpu_list[i].mt_caches[j].mt_hint = NULL; 873 } 874 875 bzero(&new_cpu_list[i].mt_parent_lock, sizeof (mutex_t)); 876 877 /* get the correct cache list alignment */ 878 list_addr += CACHELIST_SIZE; 879 } 880 881 /* 882 * Initialize oversize listhead 883 */ 884 oversize_list.next_bysize = &oversize_list; 885 oversize_list.prev_bysize = &oversize_list; 886 oversize_list.next_byaddr = &oversize_list; 887 oversize_list.prev_byaddr = &oversize_list; 888 oversize_list.addr = NULL; 889 oversize_list.size = 0; /* sentinal */ 890 891 /* 892 * now install the global variables, leaving cpu_list for last, so that 893 * there aren't any race conditions. 894 */ 895 curcpu = new_curcpu; 896 cpu_mask = new_cpu_mask; 897 cpu_list = new_cpu_list; 898 899 (void) mutex_unlock(&init_lock); 900 901 return (1); 902 } 903 904 static void 905 create_cache(cache_t *cp, size_t size, uint_t chunksize) 906 { 907 long nblocks; 908 909 bzero(&cp->mt_cache_lock, sizeof (mutex_t)); 910 cp->mt_size = size; 911 cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t)); 912 cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t); 913 cp->mt_hunks = chunksize; 914 /* 915 * rough calculation. We will need to adjust later. 916 */ 917 nblocks = cp->mt_span / cp->mt_size; 918 nblocks >>= 3; 919 if (nblocks == 0) { /* less than 8 free blocks in this pool */ 920 int32_t numblocks = 0; 921 long i = cp->mt_span; 922 size_t sub = cp->mt_size; 923 uchar_t mask = 0; 924 925 while (i > sub) { 926 numblocks++; 927 i -= sub; 928 } 929 nblocks = numblocks; 930 cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8); 931 cp->mt_nfree = numblocks; 932 while (numblocks--) { 933 mask |= 0x80 >> numblocks; 934 } 935 *(cp->mt_freelist) = mask; 936 } else { 937 cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist + 938 nblocks, 32); 939 /* recompute nblocks */ 940 nblocks = (uintptr_t)((caddr_t)cp->mt_freelist + 941 cp->mt_span - cp->mt_arena) / cp->mt_size; 942 cp->mt_nfree = ((nblocks >> 3) << 3); 943 /* Set everything to free */ 944 (void) memset(cp->mt_freelist, 0xff, nblocks >> 3); 945 } 946 947 if (debugopt & MTDEBUGPATTERN) 948 copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks); 949 950 cp->mt_next = NULL; 951 } 952 953 static void 954 reinit_cpu_list(void) 955 { 956 oversize_t *wp = oversize_list.next_bysize; 957 percpu_t *cpuptr; 958 cache_t *thiscache; 959 cache_head_t *cachehead; 960 961 if (wp == NULL || cpu_list == NULL) { 962 reinit = 0; 963 return; 964 } 965 966 /* Reinitialize free oversize blocks. */ 967 (void) mutex_lock(&oversize_lock); 968 if (debugopt & MTDEBUGPATTERN) 969 for (; wp != &oversize_list; wp = wp->next_bysize) 970 copy_pattern(FREEPATTERN, wp->addr, wp->size); 971 (void) mutex_unlock(&oversize_lock); 972 973 /* Reinitialize free blocks. */ 974 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) { 975 (void) mutex_lock(&cpuptr->mt_parent_lock); 976 for (cachehead = &cpuptr->mt_caches[0]; cachehead < 977 &cpuptr->mt_caches[NUM_CACHES]; cachehead++) { 978 for (thiscache = cachehead->mt_cache; thiscache != NULL; 979 thiscache = thiscache->mt_next) { 980 (void) mutex_lock(&thiscache->mt_cache_lock); 981 if (thiscache->mt_nfree == 0) { 982 (void) mutex_unlock( 983 &thiscache->mt_cache_lock); 984 continue; 985 } 986 if (thiscache != NULL) 987 reinit_cache(thiscache); 988 (void) mutex_unlock(&thiscache->mt_cache_lock); 989 } 990 } 991 (void) mutex_unlock(&cpuptr->mt_parent_lock); 992 } 993 reinit = 0; 994 } 995 996 static void 997 reinit_cache(cache_t *thiscache) 998 { 999 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 1000 int32_t i, n; 1001 caddr_t ret; 1002 1003 freeblocks = (uint32_t *)thiscache->mt_freelist; 1004 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 1005 if (*freeblocks & 0xffffffff) { 1006 for (i = 0; i < 32; i++) { 1007 if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) { 1008 n = (uintptr_t)(((freeblocks - 1009 (uint32_t *)thiscache->mt_freelist) << 5) 1010 + i) * thiscache->mt_size; 1011 ret = thiscache->mt_arena + n; 1012 ret += OVERHEAD; 1013 copy_pattern(FREEPATTERN, ret, 1014 thiscache->mt_size); 1015 } 1016 } 1017 } 1018 freeblocks++; 1019 } 1020 } 1021 1022 static void * 1023 malloc_internal(size_t size, percpu_t *cpuptr) 1024 { 1025 cache_head_t *cachehead; 1026 cache_t *thiscache, *hintcache; 1027 int32_t i, n, logsz, bucket; 1028 uint32_t index; 1029 uint32_t *freeblocks; /* not a uintptr_t on purpose */ 1030 caddr_t ret; 1031 1032 logsz = MIN_CACHED_SHIFT; 1033 1034 while (size > (1 << logsz)) 1035 logsz++; 1036 1037 bucket = logsz - MIN_CACHED_SHIFT; 1038 1039 (void) mutex_lock(&cpuptr->mt_parent_lock); 1040 1041 /* 1042 * Find a cache of the appropriate size with free buffers. 1043 * 1044 * We don't need to lock each cache as we check their mt_nfree count, 1045 * since: 1046 * 1. We are only looking for caches with mt_nfree > 0. If a 1047 * free happens during our search, it will increment mt_nfree, 1048 * which will not effect the test. 1049 * 2. Allocations can decrement mt_nfree, but they can't happen 1050 * as long as we hold mt_parent_lock. 1051 */ 1052 1053 cachehead = &cpuptr->mt_caches[bucket]; 1054 1055 /* Search through the list, starting at the mt_hint */ 1056 thiscache = cachehead->mt_hint; 1057 1058 while (thiscache != NULL && thiscache->mt_nfree == 0) 1059 thiscache = thiscache->mt_next; 1060 1061 if (thiscache == NULL) { 1062 /* wrap around -- search up to the hint */ 1063 thiscache = cachehead->mt_cache; 1064 hintcache = cachehead->mt_hint; 1065 1066 while (thiscache != NULL && thiscache != hintcache && 1067 thiscache->mt_nfree == 0) 1068 thiscache = thiscache->mt_next; 1069 1070 if (thiscache == hintcache) 1071 thiscache = NULL; 1072 } 1073 1074 1075 if (thiscache == NULL) { /* there are no free caches */ 1076 int32_t thisrequest = requestsize; 1077 int32_t buffer_size = (1 << logsz) + OVERHEAD; 1078 1079 thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE); 1080 1081 if (thiscache == (cache_t *)-1) { 1082 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1083 errno = EAGAIN; 1084 return (NULL); 1085 } 1086 create_cache(thiscache, buffer_size, thisrequest); 1087 1088 /* link in the new block at the beginning of the list */ 1089 thiscache->mt_next = cachehead->mt_cache; 1090 cachehead->mt_cache = thiscache; 1091 } 1092 1093 /* update the hint to the cache we found or created */ 1094 cachehead->mt_hint = thiscache; 1095 1096 /* thiscache now points to a cache with available space */ 1097 (void) mutex_lock(&thiscache->mt_cache_lock); 1098 1099 freeblocks = (uint32_t *)thiscache->mt_freelist; 1100 while (freeblocks < (uint32_t *)thiscache->mt_arena) { 1101 if (*freeblocks & 0xffffffff) 1102 break; 1103 freeblocks++; 1104 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1105 *freeblocks & 0xffffffff) 1106 break; 1107 freeblocks++; 1108 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1109 *freeblocks & 0xffffffff) 1110 break; 1111 freeblocks++; 1112 if (freeblocks < (uint32_t *)thiscache->mt_arena && 1113 *freeblocks & 0xffffffff) 1114 break; 1115 freeblocks++; 1116 } 1117 1118 /* 1119 * the offset from mt_freelist to freeblocks is the offset into 1120 * the arena. Be sure to include the offset into freeblocks 1121 * of the bitmask. n is the offset. 1122 */ 1123 for (i = 0; i < 32; ) { 1124 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1125 break; 1126 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1127 break; 1128 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1129 break; 1130 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++)) 1131 break; 1132 } 1133 index = 0x80000000 >> --i; 1134 1135 1136 *freeblocks &= FLIP_EM(~index); 1137 1138 thiscache->mt_nfree--; 1139 1140 (void) mutex_unlock(&thiscache->mt_cache_lock); 1141 (void) mutex_unlock(&cpuptr->mt_parent_lock); 1142 1143 n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5) 1144 + i) * thiscache->mt_size; 1145 /* 1146 * Now you have the offset in n, you've changed the free mask 1147 * in the freelist. Nothing left to do but find the block 1148 * in the arena and put the value of thiscache in the word 1149 * ahead of the handed out address and return the memory 1150 * back to the user. 1151 */ 1152 ret = thiscache->mt_arena + n; 1153 1154 /* Store the cache addr for this buf. Makes free go fast. */ 1155 *(uintptr_t *)ret = (uintptr_t)thiscache; 1156 1157 /* 1158 * This assert makes sure we don't hand out memory that is not 1159 * owned by this cache. 1160 */ 1161 assert(ret + thiscache->mt_size <= thiscache->mt_freelist + 1162 thiscache->mt_span); 1163 1164 ret += OVERHEAD; 1165 1166 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1167 1168 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1169 if (verify_pattern(FREEPATTERN, ret, size)) 1170 abort(); /* reference after free */ 1171 1172 if (debugopt & MTINITBUFFER) 1173 copy_pattern(INITPATTERN, ret, size); 1174 return ((void *)ret); 1175 } 1176 1177 static void * 1178 morecore(size_t bytes) 1179 { 1180 void * ret; 1181 1182 if (bytes > LONG_MAX) { 1183 intptr_t wad; 1184 /* 1185 * The request size is too big. We need to do this in 1186 * chunks. Sbrk only takes an int for an arg. 1187 */ 1188 if (bytes == ULONG_MAX) 1189 return ((void *)-1); 1190 1191 ret = sbrk(0); 1192 wad = LONG_MAX; 1193 while (wad > 0) { 1194 if (sbrk(wad) == (void *)-1) { 1195 if (ret != sbrk(0)) 1196 (void) sbrk(-LONG_MAX); 1197 return ((void *)-1); 1198 } 1199 bytes -= LONG_MAX; 1200 wad = bytes; 1201 } 1202 } else 1203 ret = sbrk(bytes); 1204 1205 return (ret); 1206 } 1207 1208 1209 static void * 1210 oversize(size_t size) 1211 { 1212 caddr_t ret; 1213 oversize_t *big; 1214 int bucket; 1215 1216 /* make sure we will not overflow */ 1217 if (size > MAX_MTMALLOC) { 1218 errno = ENOMEM; 1219 return (NULL); 1220 } 1221 1222 /* 1223 * Since we ensure every address we hand back is 1224 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the 1225 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends. 1226 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN, 1227 * particularly where coalescing occurs. 1228 */ 1229 size = ALIGN(size, MTMALLOC_MIN_ALIGN); 1230 1231 /* 1232 * The idea with the global lock is that we are sure to 1233 * block in the kernel anyway since given an oversize alloc 1234 * we are sure to have to call morecore(); 1235 */ 1236 (void) mutex_lock(&oversize_lock); 1237 1238 if ((big = find_oversize(size)) != NULL) { 1239 if (reinit == 0 && (debugopt & MTDEBUGPATTERN)) 1240 if (verify_pattern(FREEPATTERN, big->addr, size)) 1241 abort(); /* reference after free */ 1242 } else { 1243 /* Get more 8-byte aligned memory from heap */ 1244 ret = morecore(size + OVSZ_HEADER_SIZE); 1245 if (ret == (caddr_t)-1) { 1246 (void) mutex_unlock(&oversize_lock); 1247 errno = ENOMEM; 1248 return (NULL); 1249 } 1250 big = oversize_header_alloc((uintptr_t)ret, size); 1251 } 1252 ret = big->addr; 1253 1254 /* Add big to the hash table at the head of the relevant bucket. */ 1255 bucket = HASH_OVERSIZE(ret); 1256 big->hash_next = ovsz_hashtab[bucket]; 1257 ovsz_hashtab[bucket] = big; 1258 1259 if (debugopt & MTINITBUFFER) 1260 copy_pattern(INITPATTERN, ret, size); 1261 1262 (void) mutex_unlock(&oversize_lock); 1263 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */ 1264 return ((void *)ret); 1265 } 1266 1267 static void 1268 insert_oversize(oversize_t *op, oversize_t *nx) 1269 { 1270 oversize_t *sp; 1271 1272 /* locate correct insertion point in size-ordered list */ 1273 for (sp = oversize_list.next_bysize; 1274 sp != &oversize_list && (op->size > sp->size); 1275 sp = sp->next_bysize) 1276 ; 1277 1278 /* link into size-ordered list */ 1279 op->next_bysize = sp; 1280 op->prev_bysize = sp->prev_bysize; 1281 op->prev_bysize->next_bysize = op; 1282 op->next_bysize->prev_bysize = op; 1283 1284 /* 1285 * link item into address-ordered list 1286 * (caller provides insertion point as an optimization) 1287 */ 1288 op->next_byaddr = nx; 1289 op->prev_byaddr = nx->prev_byaddr; 1290 op->prev_byaddr->next_byaddr = op; 1291 op->next_byaddr->prev_byaddr = op; 1292 1293 } 1294 1295 static void 1296 unlink_oversize(oversize_t *lp) 1297 { 1298 /* unlink from address list */ 1299 lp->prev_byaddr->next_byaddr = lp->next_byaddr; 1300 lp->next_byaddr->prev_byaddr = lp->prev_byaddr; 1301 1302 /* unlink from size list */ 1303 lp->prev_bysize->next_bysize = lp->next_bysize; 1304 lp->next_bysize->prev_bysize = lp->prev_bysize; 1305 } 1306 1307 static void 1308 position_oversize_by_size(oversize_t *op) 1309 { 1310 oversize_t *sp; 1311 1312 if (op->size > op->next_bysize->size || 1313 op->size < op->prev_bysize->size) { 1314 1315 /* unlink from size list */ 1316 op->prev_bysize->next_bysize = op->next_bysize; 1317 op->next_bysize->prev_bysize = op->prev_bysize; 1318 1319 /* locate correct insertion point in size-ordered list */ 1320 for (sp = oversize_list.next_bysize; 1321 sp != &oversize_list && (op->size > sp->size); 1322 sp = sp->next_bysize) 1323 ; 1324 1325 /* link into size-ordered list */ 1326 op->next_bysize = sp; 1327 op->prev_bysize = sp->prev_bysize; 1328 op->prev_bysize->next_bysize = op; 1329 op->next_bysize->prev_bysize = op; 1330 } 1331 } 1332 1333 static void 1334 add_oversize(oversize_t *lp) 1335 { 1336 int merge_flags = INSERT_ONLY; 1337 oversize_t *nx; /* ptr to item right of insertion point */ 1338 oversize_t *pv; /* ptr to item left of insertion point */ 1339 uint_t size_lp, size_pv, size_nx; 1340 uintptr_t endp_lp, endp_pv, endp_nx; 1341 1342 /* 1343 * Locate insertion point in address-ordered list 1344 */ 1345 1346 for (nx = oversize_list.next_byaddr; 1347 nx != &oversize_list && (lp->addr > nx->addr); 1348 nx = nx->next_byaddr) 1349 ; 1350 1351 /* 1352 * Determine how to add chunk to oversize freelist 1353 */ 1354 1355 size_lp = OVSZ_HEADER_SIZE + lp->size; 1356 endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN); 1357 size_lp = endp_lp - (uintptr_t)lp; 1358 1359 pv = nx->prev_byaddr; 1360 1361 if (pv->size) { 1362 1363 size_pv = OVSZ_HEADER_SIZE + pv->size; 1364 endp_pv = ALIGN((uintptr_t)pv + size_pv, 1365 MTMALLOC_MIN_ALIGN); 1366 size_pv = endp_pv - (uintptr_t)pv; 1367 1368 /* Check for adjacency with left chunk */ 1369 if ((uintptr_t)lp == endp_pv) 1370 merge_flags |= COALESCE_LEFT; 1371 } 1372 1373 if (nx->size) { 1374 1375 /* Check for adjacency with right chunk */ 1376 if ((uintptr_t)nx == endp_lp) { 1377 size_nx = OVSZ_HEADER_SIZE + nx->size; 1378 endp_nx = ALIGN((uintptr_t)nx + size_nx, 1379 MTMALLOC_MIN_ALIGN); 1380 size_nx = endp_nx - (uintptr_t)nx; 1381 merge_flags |= COALESCE_RIGHT; 1382 } 1383 } 1384 1385 /* 1386 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with 1387 * FREEPATTERN for lp->size bytes. If we can merge, the oversize 1388 * header(s) that will also become part of the memory available for 1389 * reallocation (ie lp and/or nx) must also be overwritten with 1390 * FREEPATTERN or we will SIGABRT when this memory is next reallocated. 1391 */ 1392 switch (merge_flags) { 1393 1394 case INSERT_ONLY: /* Coalescing not possible */ 1395 insert_oversize(lp, nx); 1396 break; 1397 case COALESCE_LEFT: 1398 pv->size += size_lp; 1399 position_oversize_by_size(pv); 1400 if (debugopt & MTDEBUGPATTERN) 1401 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1402 break; 1403 case COALESCE_RIGHT: 1404 unlink_oversize(nx); 1405 lp->size += size_nx; 1406 insert_oversize(lp, pv->next_byaddr); 1407 if (debugopt & MTDEBUGPATTERN) 1408 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1409 break; 1410 case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */ 1411 pv->size += size_lp + size_nx; 1412 unlink_oversize(nx); 1413 position_oversize_by_size(pv); 1414 if (debugopt & MTDEBUGPATTERN) { 1415 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE); 1416 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE); 1417 } 1418 break; 1419 } 1420 } 1421 1422 /* 1423 * Find memory on our list that is at least size big. If we find a block that is 1424 * big enough, we break it up and return the associated oversize_t struct back 1425 * to the calling client. Any leftover piece of that block is returned to the 1426 * freelist. 1427 */ 1428 static oversize_t * 1429 find_oversize(size_t size) 1430 { 1431 oversize_t *wp = oversize_list.next_bysize; 1432 while (wp != &oversize_list && size > wp->size) 1433 wp = wp->next_bysize; 1434 1435 if (wp == &oversize_list) /* empty list or nothing big enough */ 1436 return (NULL); 1437 /* breaking up a chunk of memory */ 1438 if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN))) 1439 > MAX_CACHED) { 1440 caddr_t off; 1441 oversize_t *np; 1442 size_t osize; 1443 off = (caddr_t)ALIGN(wp->addr + size, 1444 MTMALLOC_MIN_ALIGN); 1445 osize = wp->size; 1446 wp->size = (size_t)(off - wp->addr); 1447 np = oversize_header_alloc((uintptr_t)off, 1448 osize - (wp->size + OVSZ_HEADER_SIZE)); 1449 if ((long)np->size < 0) 1450 abort(); 1451 unlink_oversize(wp); 1452 add_oversize(np); 1453 } else { 1454 unlink_oversize(wp); 1455 } 1456 return (wp); 1457 } 1458 1459 static void 1460 copy_pattern(uint32_t pattern, void *buf_arg, size_t size) 1461 { 1462 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1463 uint32_t *buf = buf_arg; 1464 1465 while (buf < bufend - 3) { 1466 buf[3] = buf[2] = buf[1] = buf[0] = pattern; 1467 buf += 4; 1468 } 1469 while (buf < bufend) 1470 *buf++ = pattern; 1471 } 1472 1473 static void * 1474 verify_pattern(uint32_t pattern, void *buf_arg, size_t size) 1475 { 1476 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size); 1477 uint32_t *buf; 1478 1479 for (buf = buf_arg; buf < bufend; buf++) 1480 if (*buf != pattern) 1481 return (buf); 1482 return (NULL); 1483 } 1484 1485 static void 1486 free_oversize(oversize_t *ovp) 1487 { 1488 assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */ 1489 assert(ovp->size > MAX_CACHED); 1490 1491 ovp->next_bysize = ovp->prev_bysize = NULL; 1492 ovp->next_byaddr = ovp->prev_byaddr = NULL; 1493 (void) mutex_lock(&oversize_lock); 1494 add_oversize(ovp); 1495 (void) mutex_unlock(&oversize_lock); 1496 } 1497 1498 static oversize_t * 1499 oversize_header_alloc(uintptr_t mem, size_t size) 1500 { 1501 oversize_t *ovsz_hdr; 1502 1503 assert(size > MAX_CACHED); 1504 1505 ovsz_hdr = (oversize_t *)mem; 1506 ovsz_hdr->prev_bysize = NULL; 1507 ovsz_hdr->next_bysize = NULL; 1508 ovsz_hdr->prev_byaddr = NULL; 1509 ovsz_hdr->next_byaddr = NULL; 1510 ovsz_hdr->hash_next = NULL; 1511 ovsz_hdr->size = size; 1512 mem += OVSZ_SIZE; 1513 *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC; 1514 mem += OVERHEAD; 1515 assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */ 1516 ovsz_hdr->addr = (caddr_t)mem; 1517 return (ovsz_hdr); 1518 } 1519