1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <mtmalloc.h>
28 #include "mtmalloc_impl.h"
29 #include <unistd.h>
30 #include <synch.h>
31 #include <thread.h>
32 #include <pthread.h>
33 #include <stdio.h>
34 #include <limits.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <sys/param.h>
39 #include <sys/sysmacros.h>
40
41 /*
42 * To turn on the asserts just compile -DDEBUG
43 */
44
45 #ifndef DEBUG
46 #define NDEBUG
47 #endif
48
49 #include <assert.h>
50
51 /*
52 * The MT hot malloc implementation contained herein is designed to be
53 * plug-compatible with the libc version of malloc. It is not intended
54 * to replace that implementation until we decide that it is ok to break
55 * customer apps (Solaris 3.0).
56 *
57 * For requests up to 2^^16, the allocator initializes itself into NCPUS
58 * worth of chains of caches. When a memory request is made, the calling thread
59 * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap,
60 * contention-reducing index to use, eventually, this should be replaced with
61 * the actual CPU sequence number, when an interface to get it is available.
62 *
63 * Once the thread is vectored into one of the list of caches the real
64 * allocation of the memory begins. The size is determined to figure out which
65 * bucket the allocation should be satisfied from. The management of free
66 * buckets is done via a bitmask. A free bucket is represented by a 1. The
67 * first free bit represents the first free bucket. The position of the bit,
68 * represents the position of the bucket in the arena.
69 *
70 * When the memory from the arena is handed out, the address of the cache
71 * control structure is written in the word preceeding the returned memory.
72 * This cache control address is used during free() to mark the buffer free
73 * in the cache control structure.
74 *
75 * When all available memory in a cache has been depleted, a new chunk of memory
76 * is allocated via sbrk(). The new cache is allocated from this chunk of memory
77 * and initialized in the function create_cache(). New caches are installed at
78 * the front of a singly linked list of the same size memory pools. This helps
79 * to ensure that there will tend to be available memory in the beginning of the
80 * list.
81 *
82 * Long linked lists hurt performance. To decrease this effect, there is a
83 * tunable, requestsize, that bumps up the sbrk allocation size and thus
84 * increases the number of available blocks within an arena. We also keep
85 * a "hint" for each cache list, which is the last cache in the list allocated
86 * from. This lowers the cost of searching if there are a lot of fully
87 * allocated blocks at the front of the list.
88 *
89 * For requests greater than 2^^16 (oversize allocations), there are two pieces
90 * of overhead. There is the OVERHEAD used to hold the cache addr
91 * (&oversize_list), plus an oversize_t structure to further describe the block.
92 *
93 * The oversize list is kept as defragmented as possible by coalescing
94 * freed oversized allocations with adjacent neighbors.
95 *
96 * Addresses handed out are stored in a hash table, and are aligned on
97 * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
98 * where necessary in order to achieve this. This eases the implementation of
99 * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
100 *
101 * A memalign allocation takes memalign header overhead. There's two
102 * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
103 * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to
104 * get to the aligned address from malloc'ed address is the minimum size
105 * OVERHEAD, we create a header taking only one OVERHEAD space with magic
106 * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
107 * from memaligned address, we can get to the malloc'ed address. Otherwise,
108 * we create a memalign header taking two OVERHEAD space, one stores
109 * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
110 * malloc'ed address.
111 */
112
113 #if defined(__i386) || defined(__amd64)
114 #include <arpa/inet.h> /* for htonl() */
115 #endif
116
117 static void * morecore(size_t);
118 static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
119 static void * malloc_internal(size_t, percpu_t *);
120 static void * oversize(size_t);
121 static oversize_t *find_oversize(size_t);
122 static void add_oversize(oversize_t *);
123 static void copy_pattern(uint32_t, void *, size_t);
124 static void * verify_pattern(uint32_t, void *, size_t);
125 static void reinit_cpu_list(void);
126 static void reinit_cache(cache_t *);
127 static void free_oversize(oversize_t *);
128 static oversize_t *oversize_header_alloc(uintptr_t, size_t);
129
130 /*
131 * oversize hash table stuff
132 */
133 #define NUM_BUCKETS 67 /* must be prime */
134 #define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS)
135 oversize_t *ovsz_hashtab[NUM_BUCKETS];
136
137 #define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
138 & ~((uintptr_t)(a) - 1)))
139
140 /* need this to deal with little endianess of x86 */
141 #if defined(__i386) || defined(__amd64)
142 #define FLIP_EM(x) htonl((x))
143 #else
144 #define FLIP_EM(x) (x)
145 #endif
146
147 #define INSERT_ONLY 0
148 #define COALESCE_LEFT 0x00000001
149 #define COALESCE_RIGHT 0x00000002
150 #define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT)
151
152 #define OVERHEAD 8 /* size needed to write cache addr */
153 #define HUNKSIZE 8192 /* just a multiplier */
154
155 #define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */
156 #define MAX_CACHED (1 << MAX_CACHED_SHIFT)
157 #define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */
158 #define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */
159
160 /* maximum size before overflow */
161 #define MAX_MTMALLOC (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
162 - OVSZ_HEADER_SIZE)
163
164 #define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
165 #define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \
166 CACHE_COHERENCY_UNIT)
167
168 #define MINSIZE 9 /* for requestsize, tunable */
169 #define MAXSIZE 256 /* arbitrary, big enough, for requestsize */
170
171 #define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */
172 #define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */
173
174 #define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1))
175 #define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
176
177 static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
178
179 static uint_t cpu_mask;
180 static curcpu_func curcpu;
181
182 static int32_t debugopt;
183 static int32_t reinit;
184
185 static percpu_t *cpu_list;
186 static oversize_t oversize_list;
187 static mutex_t oversize_lock = DEFAULTMUTEX;
188
189 static int ncpus = 0;
190
191 #define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list)
192 #define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1)
193 #define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2)
194
195 /*
196 * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
197 * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
198 * this is achieved.
199 */
200 #define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
201 #define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD)
202
203 /*
204 * memalign header takes 2 OVERHEAD space. One for memalign magic, and the
205 * other one points back to the start address of originally allocated space.
206 */
207 #define MEMALIGN_HEADER_SIZE 2 * OVERHEAD
208 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
209 if (shift == OVERHEAD)\
210 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
211 MTMALLOC_MEMALIGN_MIN_MAGIC; \
212 else {\
213 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
214 MTMALLOC_MEMALIGN_MAGIC; \
215 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
216 (uintptr_t)malloc_addr; \
217 }
218
219 /*
220 * Add big to the oversize hash table at the head of the relevant bucket.
221 */
222 static void
insert_hash(oversize_t * big)223 insert_hash(oversize_t *big)
224 {
225 caddr_t ret = big->addr;
226 int bucket = HASH_OVERSIZE(ret);
227
228 assert(MUTEX_HELD(&oversize_lock));
229 big->hash_next = ovsz_hashtab[bucket];
230 ovsz_hashtab[bucket] = big;
231 }
232
233 void *
malloc(size_t bytes)234 malloc(size_t bytes)
235 {
236 percpu_t *list_rotor;
237 uint_t list_index;
238
239 if (bytes > MAX_CACHED)
240 return (oversize(bytes));
241
242 list_index = (curcpu() & cpu_mask);
243
244 list_rotor = &cpu_list[list_index];
245
246 return (malloc_internal(bytes, list_rotor));
247 }
248
249 void *
realloc(void * ptr,size_t bytes)250 realloc(void * ptr, size_t bytes)
251 {
252 void *new, *data_ptr;
253 cache_t *cacheptr;
254 caddr_t mem;
255 size_t shift = 0;
256
257 if (ptr == NULL)
258 return (malloc(bytes));
259
260 if (bytes == 0) {
261 free(ptr);
262 return (NULL);
263 }
264
265 data_ptr = ptr;
266 mem = (caddr_t)ptr - OVERHEAD;
267
268 /*
269 * Optimization possibility :
270 * p = malloc(64);
271 * q = realloc(p, 64);
272 * q can be same as p.
273 * Apply this optimization for the normal
274 * sized caches for now.
275 */
276 if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC ||
277 *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) {
278 cacheptr = (cache_t *)*(uintptr_t *)mem;
279 if (bytes <= (cacheptr->mt_size - OVERHEAD))
280 return (ptr);
281 }
282
283 new = malloc(bytes);
284
285 if (new == NULL)
286 return (NULL);
287
288 /*
289 * If new == ptr, ptr has previously been freed. Passing a freed pointer
290 * to realloc() is not allowed - unless the caller specifically states
291 * otherwise, in which case we must avoid freeing ptr (ie new) before we
292 * return new. There is (obviously) no requirement to memcpy() ptr to
293 * new before we return.
294 */
295 if (new == ptr) {
296 if (!(debugopt & MTDOUBLEFREE))
297 abort();
298 return (new);
299 }
300
301 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
302 mem -= OVERHEAD;
303 ptr = (void *)*(uintptr_t *)mem;
304 mem = (caddr_t)ptr - OVERHEAD;
305 shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
306 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
307 ptr = (void *) mem;
308 mem -= OVERHEAD;
309 shift = OVERHEAD;
310 }
311
312 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
313 oversize_t *old;
314
315 old = (oversize_t *)(mem - OVSZ_SIZE);
316 (void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
317 free(ptr);
318 return (new);
319 }
320
321 cacheptr = (cache_t *)*(uintptr_t *)mem;
322
323 (void) memcpy(new, data_ptr,
324 MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
325 free(ptr);
326
327 return (new);
328 }
329
330 void *
calloc(size_t nelem,size_t bytes)331 calloc(size_t nelem, size_t bytes)
332 {
333 void * ptr;
334 size_t size;
335
336 if (nelem == 0 || bytes == 0) {
337 size = 0;
338 } else {
339 size = nelem * bytes;
340
341 /* check for overflow */
342 if ((size / nelem) != bytes) {
343 errno = ENOMEM;
344 return (NULL);
345 }
346 }
347
348 ptr = malloc(size);
349 if (ptr == NULL)
350 return (NULL);
351 (void) memset(ptr, 0, size);
352
353 return (ptr);
354 }
355
356 void
free(void * ptr)357 free(void * ptr)
358 {
359 cache_t *cacheptr;
360 caddr_t mem;
361 int32_t i;
362 caddr_t freeblocks;
363 uintptr_t offset;
364 uchar_t mask;
365 int32_t which_bit, num_bytes;
366
367 if (ptr == NULL)
368 return;
369
370 mem = (caddr_t)ptr - OVERHEAD;
371
372 if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
373 mem -= OVERHEAD;
374 ptr = (void *)*(uintptr_t *)mem;
375 mem = (caddr_t)ptr - OVERHEAD;
376 } else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
377 ptr = (void *) mem;
378 mem -= OVERHEAD;
379 }
380
381 if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
382 oversize_t *big, **opp;
383 int bucket;
384
385 big = (oversize_t *)(mem - OVSZ_SIZE);
386 (void) mutex_lock(&oversize_lock);
387
388 bucket = HASH_OVERSIZE(big->addr);
389 for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
390 opp = &(*opp)->hash_next)
391 if (*opp == big)
392 break;
393
394 if (*opp == NULL) {
395 if (!(debugopt & MTDOUBLEFREE))
396 abort();
397 (void) mutex_unlock(&oversize_lock);
398 return;
399 }
400
401 *opp = big->hash_next; /* remove big from the hash table */
402 big->hash_next = NULL;
403
404 if (debugopt & MTDEBUGPATTERN)
405 copy_pattern(FREEPATTERN, ptr, big->size);
406 add_oversize(big);
407 (void) mutex_unlock(&oversize_lock);
408 return;
409 }
410
411 cacheptr = (cache_t *)*(uintptr_t *)mem;
412 freeblocks = cacheptr->mt_freelist;
413
414 /*
415 * This is the distance measured in bits into the arena.
416 * The value of offset is in bytes but there is a 1-1 correlation
417 * between distance into the arena and distance into the
418 * freelist bitmask.
419 */
420 offset = mem - cacheptr->mt_arena;
421
422 /*
423 * i is total number of bits to offset into freelist bitmask.
424 */
425
426 i = offset / cacheptr->mt_size;
427
428 num_bytes = i >> 3;
429
430 /*
431 * which_bit is the bit offset into the byte in the freelist.
432 * if our freelist bitmask looks like 0xf3 and we are freeing
433 * block 5 (ie: the 6th block) our mask will be 0xf7 after
434 * the free. Things go left to right that's why the mask is 0x80
435 * and not 0x01.
436 */
437 which_bit = i - (num_bytes << 3);
438
439 mask = 0x80 >> which_bit;
440
441 freeblocks += num_bytes;
442
443 if (debugopt & MTDEBUGPATTERN)
444 copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
445
446 (void) mutex_lock(&cacheptr->mt_cache_lock);
447
448 if (*freeblocks & mask) {
449 if (!(debugopt & MTDOUBLEFREE))
450 abort();
451 } else {
452 *freeblocks |= mask;
453 cacheptr->mt_nfree++;
454 }
455
456 (void) mutex_unlock(&cacheptr->mt_cache_lock);
457 }
458
459 void *
memalign(size_t alignment,size_t size)460 memalign(size_t alignment, size_t size)
461 {
462 size_t alloc_size;
463 uintptr_t offset;
464 void *alloc_buf;
465 void *ret_buf;
466
467 if (size == 0 || alignment == 0 || misaligned(alignment) ||
468 (alignment & (alignment - 1)) != 0) {
469 errno = EINVAL;
470 return (NULL);
471 }
472
473 /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
474 if (alignment <= MTMALLOC_MIN_ALIGN)
475 return (malloc(size));
476
477 alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
478
479 if (alloc_size < size) { /* overflow */
480 errno = ENOMEM;
481 return (NULL);
482 }
483
484 alloc_buf = malloc(alloc_size);
485
486 if (alloc_buf == NULL)
487 /* malloc sets errno */
488 return (NULL);
489
490 /*
491 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
492 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
493 * we will use alloc_size to return the excess fragments to the free
494 * list, we also round-up alloc_size if necessary.
495 */
496 if ((alloc_size > MAX_CACHED) &&
497 (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
498 alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
499
500 if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
501 /* aligned correctly */
502
503 size_t frag_size = alloc_size -
504 (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
505
506 /*
507 * If the leftover piece of the memory > MAX_CACHED,
508 * split off the piece and return it back to the freelist.
509 */
510 if (IS_OVERSIZE(frag_size, alloc_size)) {
511 oversize_t *orig, *tail;
512 uintptr_t taddr;
513 size_t data_size;
514 taddr = ALIGN((uintptr_t)alloc_buf + size,
515 MTMALLOC_MIN_ALIGN);
516 data_size = taddr - (uintptr_t)alloc_buf;
517 orig = (oversize_t *)((uintptr_t)alloc_buf -
518 OVSZ_HEADER_SIZE);
519 frag_size = orig->size - data_size -
520 OVSZ_HEADER_SIZE;
521 orig->size = data_size;
522 tail = oversize_header_alloc(taddr, frag_size);
523 free_oversize(tail);
524 }
525 ret_buf = alloc_buf;
526 } else {
527 uchar_t oversize_bits = 0;
528 size_t head_sz, data_sz, tail_sz;
529 uintptr_t ret_addr, taddr, shift, tshift;
530 oversize_t *orig, *tail, *big;
531 size_t tsize;
532
533 /* needs to be aligned */
534 shift = alignment - offset;
535
536 assert(shift >= MTMALLOC_MIN_ALIGN);
537
538 ret_addr = ((uintptr_t)alloc_buf + shift);
539 ret_buf = (void *)ret_addr;
540
541 if (alloc_size <= MAX_CACHED) {
542 MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
543 return (ret_buf);
544 }
545
546 /*
547 * Only check for the fragments when the memory is allocted
548 * from oversize_list. Split off a fragment and return it
549 * to the oversize freelist when it's > MAX_CACHED.
550 */
551
552 head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
553
554 tail_sz = alloc_size -
555 (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
556
557 oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
558 IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
559 IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
560
561 switch (oversize_bits) {
562 case NONE_OVERSIZE:
563 case DATA_OVERSIZE:
564 MEMALIGN_HEADER_ALLOC(ret_addr, shift,
565 alloc_buf);
566 break;
567 case HEAD_OVERSIZE:
568 /*
569 * If we can extend data > MAX_CACHED and have
570 * head still > MAX_CACHED, we split head-end
571 * as the case of head-end and data oversized,
572 * otherwise just create memalign header.
573 */
574 tsize = (shift + size) - (MAX_CACHED + 8 +
575 MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
576
577 if (!IS_OVERSIZE(tsize, alloc_size)) {
578 MEMALIGN_HEADER_ALLOC(ret_addr, shift,
579 alloc_buf);
580 break;
581 } else {
582 tsize += OVSZ_HEADER_SIZE;
583 taddr = ALIGN((uintptr_t)alloc_buf +
584 tsize, MTMALLOC_MIN_ALIGN);
585 tshift = ret_addr - taddr;
586 MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
587 taddr);
588 ret_addr = taddr;
589 shift = ret_addr - (uintptr_t)alloc_buf;
590 }
591 /* FALLTHROUGH */
592 case HEAD_AND_DATA_OVERSIZE:
593 /*
594 * Split off the head fragment and
595 * return it back to oversize freelist.
596 * Create oversize header for the piece
597 * of (data + tail fragment).
598 */
599 orig = (oversize_t *)((uintptr_t)alloc_buf -
600 OVSZ_HEADER_SIZE);
601 big = oversize_header_alloc(ret_addr -
602 OVSZ_HEADER_SIZE, (orig->size - shift));
603 (void) mutex_lock(&oversize_lock);
604 insert_hash(big);
605 (void) mutex_unlock(&oversize_lock);
606 orig->size = shift - OVSZ_HEADER_SIZE;
607
608 /* free up the head fragment */
609 free_oversize(orig);
610 break;
611 case TAIL_OVERSIZE:
612 /*
613 * If we can extend data > MAX_CACHED and have
614 * tail-end still > MAX_CACHED, we split tail
615 * end, otherwise just create memalign header.
616 */
617 orig = (oversize_t *)((uintptr_t)alloc_buf -
618 OVSZ_HEADER_SIZE);
619 tsize = orig->size - (MAX_CACHED + 8 +
620 shift + OVSZ_HEADER_SIZE +
621 MTMALLOC_MIN_ALIGN);
622 if (!IS_OVERSIZE(tsize, alloc_size)) {
623 MEMALIGN_HEADER_ALLOC(ret_addr, shift,
624 alloc_buf);
625 break;
626 } else {
627 size = MAX_CACHED + 8;
628 }
629 /* FALLTHROUGH */
630 case DATA_AND_TAIL_OVERSIZE:
631 /*
632 * Split off the tail fragment and
633 * return it back to oversize freelist.
634 * Create memalign header and adjust
635 * the size for the piece of
636 * (head fragment + data).
637 */
638 taddr = ALIGN(ret_addr + size,
639 MTMALLOC_MIN_ALIGN);
640 data_sz = (size_t)(taddr -
641 (uintptr_t)alloc_buf);
642 orig = (oversize_t *)((uintptr_t)alloc_buf -
643 OVSZ_HEADER_SIZE);
644 tsize = orig->size - data_sz;
645 orig->size = data_sz;
646 MEMALIGN_HEADER_ALLOC(ret_buf, shift,
647 alloc_buf);
648 tsize -= OVSZ_HEADER_SIZE;
649 tail = oversize_header_alloc(taddr, tsize);
650 free_oversize(tail);
651 break;
652 case HEAD_AND_TAIL_OVERSIZE:
653 /*
654 * Split off the head fragment.
655 * We try to free up tail-end when we can
656 * extend data size to (MAX_CACHED + 8)
657 * and remain tail-end oversized.
658 * The bottom line is all split pieces
659 * should be oversize in size.
660 */
661 orig = (oversize_t *)((uintptr_t)alloc_buf -
662 OVSZ_HEADER_SIZE);
663 tsize = orig->size - (MAX_CACHED + 8 +
664 OVSZ_HEADER_SIZE + shift +
665 MTMALLOC_MIN_ALIGN);
666
667 if (!IS_OVERSIZE(tsize, alloc_size)) {
668 /*
669 * If the chunk is not big enough
670 * to make both data and tail oversize
671 * we just keep them as one piece.
672 */
673 big = oversize_header_alloc(ret_addr -
674 OVSZ_HEADER_SIZE,
675 orig->size - shift);
676 (void) mutex_lock(&oversize_lock);
677 insert_hash(big);
678 (void) mutex_unlock(&oversize_lock);
679 orig->size = shift - OVSZ_HEADER_SIZE;
680 free_oversize(orig);
681 break;
682 } else {
683 /*
684 * extend data size > MAX_CACHED
685 * and handle it as head, data, tail
686 * are all oversized.
687 */
688 size = MAX_CACHED + 8;
689 }
690 /* FALLTHROUGH */
691 case ALL_OVERSIZE:
692 /*
693 * split off the head and tail fragments,
694 * return them back to the oversize freelist.
695 * Alloc oversize header for data seg.
696 */
697 orig = (oversize_t *)((uintptr_t)alloc_buf -
698 OVSZ_HEADER_SIZE);
699 tsize = orig->size;
700 orig->size = shift - OVSZ_HEADER_SIZE;
701 free_oversize(orig);
702
703 taddr = ALIGN(ret_addr + size,
704 MTMALLOC_MIN_ALIGN);
705 data_sz = taddr - ret_addr;
706 assert(tsize > (shift + data_sz +
707 OVSZ_HEADER_SIZE));
708 tail_sz = tsize -
709 (shift + data_sz + OVSZ_HEADER_SIZE);
710
711 /* create oversize header for data seg */
712 big = oversize_header_alloc(ret_addr -
713 OVSZ_HEADER_SIZE, data_sz);
714 (void) mutex_lock(&oversize_lock);
715 insert_hash(big);
716 (void) mutex_unlock(&oversize_lock);
717
718 /* create oversize header for tail fragment */
719 tail = oversize_header_alloc(taddr, tail_sz);
720 free_oversize(tail);
721 break;
722 default:
723 /* should not reach here */
724 assert(0);
725 }
726 }
727 return (ret_buf);
728 }
729
730
731 void *
valloc(size_t size)732 valloc(size_t size)
733 {
734 static unsigned pagesize;
735
736 if (size == 0)
737 return (NULL);
738
739 if (!pagesize)
740 pagesize = sysconf(_SC_PAGESIZE);
741
742 return (memalign(pagesize, size));
743 }
744
745 void
mallocctl(int cmd,long value)746 mallocctl(int cmd, long value)
747 {
748 switch (cmd) {
749
750 case MTDEBUGPATTERN:
751 /*
752 * Reinitialize free blocks in case malloc() is called prior
753 * to mallocctl().
754 */
755 if (value && !(debugopt & cmd)) {
756 reinit++;
757 debugopt |= cmd;
758 reinit_cpu_list();
759 }
760 /*FALLTHRU*/
761 case MTDOUBLEFREE:
762 case MTINITBUFFER:
763 if (value)
764 debugopt |= cmd;
765 else
766 debugopt &= ~cmd;
767 break;
768 case MTCHUNKSIZE:
769 if (value >= MINSIZE && value <= MAXSIZE)
770 requestsize = value;
771 break;
772 default:
773 break;
774 }
775 }
776
777 /*
778 * Initialization function, called from the init section of the library.
779 * No locking is required here because we are single-threaded during
780 * library initialization.
781 */
782 static void
setup_caches(void)783 setup_caches(void)
784 {
785 uintptr_t oldbrk;
786 uintptr_t newbrk;
787
788 size_t cache_space_needed;
789 size_t padding;
790
791 curcpu_func new_curcpu;
792 uint_t new_cpu_mask;
793 percpu_t *new_cpu_list;
794
795 uint_t i, j;
796 uintptr_t list_addr;
797
798 /*
799 * Get a decent "current cpu identifier", to be used to reduce
800 * contention. Eventually, this should be replaced by an interface
801 * to get the actual CPU sequence number in libthread/liblwp.
802 */
803 new_curcpu = (curcpu_func)thr_self;
804 if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
805 ncpus = 4; /* decent default value */
806
807 /* round ncpus up to a power of 2 */
808 while (ncpus & (ncpus - 1))
809 ncpus++;
810
811 new_cpu_mask = ncpus - 1; /* create the cpu mask */
812
813 /*
814 * We now do some magic with the brk. What we want to get in the
815 * end is a bunch of well-aligned stuff in a big initial allocation.
816 * Along the way, we do sanity checks to make sure no one else has
817 * touched the brk (which shouldn't happen, but it's always good to
818 * check)
819 *
820 * First, make sure sbrk is sane, and store the current brk in oldbrk.
821 */
822 oldbrk = (uintptr_t)sbrk(0);
823 if ((void *)oldbrk == (void *)-1)
824 abort(); /* sbrk is broken -- we're doomed. */
825
826 /*
827 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
828 * the percpu structures and cache lists will be properly aligned.
829 *
830 * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
831 * so they can be paged out individually.
832 */
833 newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
834 if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
835 abort(); /* sbrk is broken -- we're doomed. */
836
837 /*
838 * For each cpu, there is one percpu_t and a list of caches
839 */
840 cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
841
842 new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
843
844 if (new_cpu_list == (percpu_t *)-1 ||
845 (uintptr_t)new_cpu_list != newbrk)
846 abort(); /* sbrk is broken -- we're doomed. */
847
848 /*
849 * Finally, align the brk to HUNKSIZE so that all hunks are
850 * page-aligned, to avoid edge-effects.
851 */
852
853 newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
854
855 padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
856
857 if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
858 abort(); /* sbrk is broken -- we're doomed. */
859
860 list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
861
862 /* initialize the percpu list */
863 for (i = 0; i < ncpus; i++) {
864 new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
865 for (j = 0; j < NUM_CACHES; j++) {
866 new_cpu_list[i].mt_caches[j].mt_cache = NULL;
867 new_cpu_list[i].mt_caches[j].mt_hint = NULL;
868 }
869
870 (void) mutex_init(&new_cpu_list[i].mt_parent_lock,
871 USYNC_THREAD, NULL);
872
873 /* get the correct cache list alignment */
874 list_addr += CACHELIST_SIZE;
875 }
876
877 /*
878 * Initialize oversize listhead
879 */
880 oversize_list.next_bysize = &oversize_list;
881 oversize_list.prev_bysize = &oversize_list;
882 oversize_list.next_byaddr = &oversize_list;
883 oversize_list.prev_byaddr = &oversize_list;
884 oversize_list.addr = NULL;
885 oversize_list.size = 0; /* sentinal */
886
887 /*
888 * Now install the global variables.
889 */
890 curcpu = new_curcpu;
891 cpu_mask = new_cpu_mask;
892 cpu_list = new_cpu_list;
893 }
894
895 static void
create_cache(cache_t * cp,size_t size,uint_t chunksize)896 create_cache(cache_t *cp, size_t size, uint_t chunksize)
897 {
898 long nblocks;
899
900 (void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
901 cp->mt_size = size;
902 cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
903 cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
904 cp->mt_hunks = chunksize;
905 /*
906 * rough calculation. We will need to adjust later.
907 */
908 nblocks = cp->mt_span / cp->mt_size;
909 nblocks >>= 3;
910 if (nblocks == 0) { /* less than 8 free blocks in this pool */
911 int32_t numblocks = 0;
912 long i = cp->mt_span;
913 size_t sub = cp->mt_size;
914 uchar_t mask = 0;
915
916 while (i > sub) {
917 numblocks++;
918 i -= sub;
919 }
920 nblocks = numblocks;
921 cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
922 cp->mt_nfree = numblocks;
923 while (numblocks--) {
924 mask |= 0x80 >> numblocks;
925 }
926 *(cp->mt_freelist) = mask;
927 } else {
928 cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
929 nblocks, 32);
930 /* recompute nblocks */
931 nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
932 cp->mt_span - cp->mt_arena) / cp->mt_size;
933 cp->mt_nfree = ((nblocks >> 3) << 3);
934 /* Set everything to free */
935 (void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
936 }
937
938 if (debugopt & MTDEBUGPATTERN)
939 copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
940
941 cp->mt_next = NULL;
942 }
943
944 static void
reinit_cpu_list(void)945 reinit_cpu_list(void)
946 {
947 oversize_t *wp = oversize_list.next_bysize;
948 percpu_t *cpuptr;
949 cache_t *thiscache;
950 cache_head_t *cachehead;
951
952 /* Reinitialize free oversize blocks. */
953 (void) mutex_lock(&oversize_lock);
954 if (debugopt & MTDEBUGPATTERN)
955 for (; wp != &oversize_list; wp = wp->next_bysize)
956 copy_pattern(FREEPATTERN, wp->addr, wp->size);
957 (void) mutex_unlock(&oversize_lock);
958
959 /* Reinitialize free blocks. */
960 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
961 (void) mutex_lock(&cpuptr->mt_parent_lock);
962 for (cachehead = &cpuptr->mt_caches[0]; cachehead <
963 &cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
964 for (thiscache = cachehead->mt_cache; thiscache != NULL;
965 thiscache = thiscache->mt_next) {
966 (void) mutex_lock(&thiscache->mt_cache_lock);
967 if (thiscache->mt_nfree == 0) {
968 (void) mutex_unlock(
969 &thiscache->mt_cache_lock);
970 continue;
971 }
972 if (thiscache != NULL)
973 reinit_cache(thiscache);
974 (void) mutex_unlock(&thiscache->mt_cache_lock);
975 }
976 }
977 (void) mutex_unlock(&cpuptr->mt_parent_lock);
978 }
979 reinit = 0;
980 }
981
982 static void
reinit_cache(cache_t * thiscache)983 reinit_cache(cache_t *thiscache)
984 {
985 uint32_t *freeblocks; /* not a uintptr_t on purpose */
986 int32_t i, n;
987 caddr_t ret;
988
989 freeblocks = (uint32_t *)thiscache->mt_freelist;
990 while (freeblocks < (uint32_t *)thiscache->mt_arena) {
991 if (*freeblocks & 0xffffffff) {
992 for (i = 0; i < 32; i++) {
993 if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
994 n = (uintptr_t)(((freeblocks -
995 (uint32_t *)thiscache->mt_freelist)
996 << 5) + i) * thiscache->mt_size;
997 ret = thiscache->mt_arena + n;
998 ret += OVERHEAD;
999 copy_pattern(FREEPATTERN, ret,
1000 thiscache->mt_size);
1001 }
1002 }
1003 }
1004 freeblocks++;
1005 }
1006 }
1007
1008 static void *
malloc_internal(size_t size,percpu_t * cpuptr)1009 malloc_internal(size_t size, percpu_t *cpuptr)
1010 {
1011 cache_head_t *cachehead;
1012 cache_t *thiscache, *hintcache;
1013 int32_t i, n, logsz, bucket;
1014 uint32_t index;
1015 uint32_t *freeblocks; /* not a uintptr_t on purpose */
1016 caddr_t ret;
1017
1018 logsz = MIN_CACHED_SHIFT;
1019
1020 while (size > (1 << logsz))
1021 logsz++;
1022
1023 bucket = logsz - MIN_CACHED_SHIFT;
1024
1025 (void) mutex_lock(&cpuptr->mt_parent_lock);
1026
1027 /*
1028 * Find a cache of the appropriate size with free buffers.
1029 *
1030 * We don't need to lock each cache as we check their mt_nfree count,
1031 * since:
1032 * 1. We are only looking for caches with mt_nfree > 0. If a
1033 * free happens during our search, it will increment mt_nfree,
1034 * which will not effect the test.
1035 * 2. Allocations can decrement mt_nfree, but they can't happen
1036 * as long as we hold mt_parent_lock.
1037 */
1038
1039 cachehead = &cpuptr->mt_caches[bucket];
1040
1041 /* Search through the list, starting at the mt_hint */
1042 thiscache = cachehead->mt_hint;
1043
1044 while (thiscache != NULL && thiscache->mt_nfree == 0)
1045 thiscache = thiscache->mt_next;
1046
1047 if (thiscache == NULL) {
1048 /* wrap around -- search up to the hint */
1049 thiscache = cachehead->mt_cache;
1050 hintcache = cachehead->mt_hint;
1051
1052 while (thiscache != NULL && thiscache != hintcache &&
1053 thiscache->mt_nfree == 0)
1054 thiscache = thiscache->mt_next;
1055
1056 if (thiscache == hintcache)
1057 thiscache = NULL;
1058 }
1059
1060
1061 if (thiscache == NULL) { /* there are no free caches */
1062 int32_t thisrequest = requestsize;
1063 int32_t buffer_size = (1 << logsz) + OVERHEAD;
1064
1065 thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
1066
1067 if (thiscache == (cache_t *)-1) {
1068 (void) mutex_unlock(&cpuptr->mt_parent_lock);
1069 errno = EAGAIN;
1070 return (NULL);
1071 }
1072 create_cache(thiscache, buffer_size, thisrequest);
1073
1074 /* link in the new block at the beginning of the list */
1075 thiscache->mt_next = cachehead->mt_cache;
1076 cachehead->mt_cache = thiscache;
1077 }
1078
1079 /* update the hint to the cache we found or created */
1080 cachehead->mt_hint = thiscache;
1081
1082 /* thiscache now points to a cache with available space */
1083 (void) mutex_lock(&thiscache->mt_cache_lock);
1084
1085 freeblocks = (uint32_t *)thiscache->mt_freelist;
1086 while (freeblocks < (uint32_t *)thiscache->mt_arena) {
1087 if (*freeblocks & 0xffffffff)
1088 break;
1089 freeblocks++;
1090 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1091 *freeblocks & 0xffffffff)
1092 break;
1093 freeblocks++;
1094 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1095 *freeblocks & 0xffffffff)
1096 break;
1097 freeblocks++;
1098 if (freeblocks < (uint32_t *)thiscache->mt_arena &&
1099 *freeblocks & 0xffffffff)
1100 break;
1101 freeblocks++;
1102 }
1103
1104 /*
1105 * the offset from mt_freelist to freeblocks is the offset into
1106 * the arena. Be sure to include the offset into freeblocks
1107 * of the bitmask. n is the offset.
1108 */
1109 for (i = 0; i < 32; ) {
1110 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1111 break;
1112 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1113 break;
1114 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1115 break;
1116 if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
1117 break;
1118 }
1119 index = 0x80000000 >> --i;
1120
1121
1122 *freeblocks &= FLIP_EM(~index);
1123
1124 thiscache->mt_nfree--;
1125
1126 (void) mutex_unlock(&thiscache->mt_cache_lock);
1127 (void) mutex_unlock(&cpuptr->mt_parent_lock);
1128
1129 n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
1130 + i) * thiscache->mt_size;
1131 /*
1132 * Now you have the offset in n, you've changed the free mask
1133 * in the freelist. Nothing left to do but find the block
1134 * in the arena and put the value of thiscache in the word
1135 * ahead of the handed out address and return the memory
1136 * back to the user.
1137 */
1138 ret = thiscache->mt_arena + n;
1139
1140 /* Store the cache addr for this buf. Makes free go fast. */
1141 *(uintptr_t *)ret = (uintptr_t)thiscache;
1142
1143 /*
1144 * This assert makes sure we don't hand out memory that is not
1145 * owned by this cache.
1146 */
1147 assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
1148 thiscache->mt_span);
1149
1150 ret += OVERHEAD;
1151
1152 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1153
1154 if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1155 if (verify_pattern(FREEPATTERN, ret, size))
1156 abort(); /* reference after free */
1157
1158 if (debugopt & MTINITBUFFER)
1159 copy_pattern(INITPATTERN, ret, size);
1160 return ((void *)ret);
1161 }
1162
1163 static void *
morecore(size_t bytes)1164 morecore(size_t bytes)
1165 {
1166 void * ret;
1167
1168 if (bytes > LONG_MAX) {
1169 intptr_t wad;
1170 /*
1171 * The request size is too big. We need to do this in
1172 * chunks. Sbrk only takes an int for an arg.
1173 */
1174 if (bytes == ULONG_MAX)
1175 return ((void *)-1);
1176
1177 ret = sbrk(0);
1178 wad = LONG_MAX;
1179 while (wad > 0) {
1180 if (sbrk(wad) == (void *)-1) {
1181 if (ret != sbrk(0))
1182 (void) sbrk(-LONG_MAX);
1183 return ((void *)-1);
1184 }
1185 bytes -= LONG_MAX;
1186 wad = bytes;
1187 }
1188 } else
1189 ret = sbrk(bytes);
1190
1191 return (ret);
1192 }
1193
1194
1195 static void *
oversize(size_t size)1196 oversize(size_t size)
1197 {
1198 caddr_t ret;
1199 oversize_t *big;
1200
1201 /* make sure we will not overflow */
1202 if (size > MAX_MTMALLOC) {
1203 errno = ENOMEM;
1204 return (NULL);
1205 }
1206
1207 /*
1208 * Since we ensure every address we hand back is
1209 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
1210 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
1211 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
1212 * particularly where coalescing occurs.
1213 */
1214 size = ALIGN(size, MTMALLOC_MIN_ALIGN);
1215
1216 /*
1217 * The idea with the global lock is that we are sure to
1218 * block in the kernel anyway since given an oversize alloc
1219 * we are sure to have to call morecore();
1220 */
1221 (void) mutex_lock(&oversize_lock);
1222
1223 if ((big = find_oversize(size)) != NULL) {
1224 if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
1225 if (verify_pattern(FREEPATTERN, big->addr, size))
1226 abort(); /* reference after free */
1227 } else {
1228 /* Get more 8-byte aligned memory from heap */
1229 ret = morecore(size + OVSZ_HEADER_SIZE);
1230 if (ret == (caddr_t)-1) {
1231 (void) mutex_unlock(&oversize_lock);
1232 errno = ENOMEM;
1233 return (NULL);
1234 }
1235 big = oversize_header_alloc((uintptr_t)ret, size);
1236 }
1237 ret = big->addr;
1238
1239 insert_hash(big);
1240
1241 if (debugopt & MTINITBUFFER)
1242 copy_pattern(INITPATTERN, ret, size);
1243
1244 (void) mutex_unlock(&oversize_lock);
1245 assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
1246 return ((void *)ret);
1247 }
1248
1249 static void
insert_oversize(oversize_t * op,oversize_t * nx)1250 insert_oversize(oversize_t *op, oversize_t *nx)
1251 {
1252 oversize_t *sp;
1253
1254 /* locate correct insertion point in size-ordered list */
1255 for (sp = oversize_list.next_bysize;
1256 sp != &oversize_list && (op->size > sp->size);
1257 sp = sp->next_bysize)
1258 ;
1259
1260 /* link into size-ordered list */
1261 op->next_bysize = sp;
1262 op->prev_bysize = sp->prev_bysize;
1263 op->prev_bysize->next_bysize = op;
1264 op->next_bysize->prev_bysize = op;
1265
1266 /*
1267 * link item into address-ordered list
1268 * (caller provides insertion point as an optimization)
1269 */
1270 op->next_byaddr = nx;
1271 op->prev_byaddr = nx->prev_byaddr;
1272 op->prev_byaddr->next_byaddr = op;
1273 op->next_byaddr->prev_byaddr = op;
1274
1275 }
1276
1277 static void
unlink_oversize(oversize_t * lp)1278 unlink_oversize(oversize_t *lp)
1279 {
1280 /* unlink from address list */
1281 lp->prev_byaddr->next_byaddr = lp->next_byaddr;
1282 lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
1283
1284 /* unlink from size list */
1285 lp->prev_bysize->next_bysize = lp->next_bysize;
1286 lp->next_bysize->prev_bysize = lp->prev_bysize;
1287 }
1288
1289 static void
position_oversize_by_size(oversize_t * op)1290 position_oversize_by_size(oversize_t *op)
1291 {
1292 oversize_t *sp;
1293
1294 if (op->size > op->next_bysize->size ||
1295 op->size < op->prev_bysize->size) {
1296
1297 /* unlink from size list */
1298 op->prev_bysize->next_bysize = op->next_bysize;
1299 op->next_bysize->prev_bysize = op->prev_bysize;
1300
1301 /* locate correct insertion point in size-ordered list */
1302 for (sp = oversize_list.next_bysize;
1303 sp != &oversize_list && (op->size > sp->size);
1304 sp = sp->next_bysize)
1305 ;
1306
1307 /* link into size-ordered list */
1308 op->next_bysize = sp;
1309 op->prev_bysize = sp->prev_bysize;
1310 op->prev_bysize->next_bysize = op;
1311 op->next_bysize->prev_bysize = op;
1312 }
1313 }
1314
1315 static void
add_oversize(oversize_t * lp)1316 add_oversize(oversize_t *lp)
1317 {
1318 int merge_flags = INSERT_ONLY;
1319 oversize_t *nx; /* ptr to item right of insertion point */
1320 oversize_t *pv; /* ptr to item left of insertion point */
1321 uint_t size_lp, size_pv, size_nx;
1322 uintptr_t endp_lp, endp_pv, endp_nx;
1323
1324 /*
1325 * Locate insertion point in address-ordered list
1326 */
1327
1328 for (nx = oversize_list.next_byaddr;
1329 nx != &oversize_list && (lp->addr > nx->addr);
1330 nx = nx->next_byaddr)
1331 ;
1332
1333 /*
1334 * Determine how to add chunk to oversize freelist
1335 */
1336
1337 size_lp = OVSZ_HEADER_SIZE + lp->size;
1338 endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
1339 size_lp = endp_lp - (uintptr_t)lp;
1340
1341 pv = nx->prev_byaddr;
1342
1343 if (pv->size) {
1344
1345 size_pv = OVSZ_HEADER_SIZE + pv->size;
1346 endp_pv = ALIGN((uintptr_t)pv + size_pv,
1347 MTMALLOC_MIN_ALIGN);
1348 size_pv = endp_pv - (uintptr_t)pv;
1349
1350 /* Check for adjacency with left chunk */
1351 if ((uintptr_t)lp == endp_pv)
1352 merge_flags |= COALESCE_LEFT;
1353 }
1354
1355 if (nx->size) {
1356
1357 /* Check for adjacency with right chunk */
1358 if ((uintptr_t)nx == endp_lp) {
1359 size_nx = OVSZ_HEADER_SIZE + nx->size;
1360 endp_nx = ALIGN((uintptr_t)nx + size_nx,
1361 MTMALLOC_MIN_ALIGN);
1362 size_nx = endp_nx - (uintptr_t)nx;
1363 merge_flags |= COALESCE_RIGHT;
1364 }
1365 }
1366
1367 /*
1368 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
1369 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
1370 * header(s) that will also become part of the memory available for
1371 * reallocation (ie lp and/or nx) must also be overwritten with
1372 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
1373 */
1374 switch (merge_flags) {
1375
1376 case INSERT_ONLY: /* Coalescing not possible */
1377 insert_oversize(lp, nx);
1378 break;
1379 case COALESCE_LEFT:
1380 pv->size += size_lp;
1381 position_oversize_by_size(pv);
1382 if (debugopt & MTDEBUGPATTERN)
1383 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1384 break;
1385 case COALESCE_RIGHT:
1386 unlink_oversize(nx);
1387 lp->size += size_nx;
1388 insert_oversize(lp, pv->next_byaddr);
1389 if (debugopt & MTDEBUGPATTERN)
1390 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1391 break;
1392 case COALESCE_WITH_BOTH_SIDES: /* Merge (with right) to the left */
1393 pv->size += size_lp + size_nx;
1394 unlink_oversize(nx);
1395 position_oversize_by_size(pv);
1396 if (debugopt & MTDEBUGPATTERN) {
1397 copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
1398 copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
1399 }
1400 break;
1401 }
1402 }
1403
1404 /*
1405 * Find memory on our list that is at least size big. If we find a block that is
1406 * big enough, we break it up and return the associated oversize_t struct back
1407 * to the calling client. Any leftover piece of that block is returned to the
1408 * freelist.
1409 */
1410 static oversize_t *
find_oversize(size_t size)1411 find_oversize(size_t size)
1412 {
1413 oversize_t *wp = oversize_list.next_bysize;
1414 while (wp != &oversize_list && size > wp->size)
1415 wp = wp->next_bysize;
1416
1417 if (wp == &oversize_list) /* empty list or nothing big enough */
1418 return (NULL);
1419 /* breaking up a chunk of memory */
1420 if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
1421 > MAX_CACHED) {
1422 caddr_t off;
1423 oversize_t *np;
1424 size_t osize;
1425 off = (caddr_t)ALIGN(wp->addr + size,
1426 MTMALLOC_MIN_ALIGN);
1427 osize = wp->size;
1428 wp->size = (size_t)(off - wp->addr);
1429 np = oversize_header_alloc((uintptr_t)off,
1430 osize - (wp->size + OVSZ_HEADER_SIZE));
1431 if ((long)np->size < 0)
1432 abort();
1433 unlink_oversize(wp);
1434 add_oversize(np);
1435 } else {
1436 unlink_oversize(wp);
1437 }
1438 return (wp);
1439 }
1440
1441 static void
copy_pattern(uint32_t pattern,void * buf_arg,size_t size)1442 copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
1443 {
1444 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1445 uint32_t *buf = buf_arg;
1446
1447 while (buf < bufend - 3) {
1448 buf[3] = buf[2] = buf[1] = buf[0] = pattern;
1449 buf += 4;
1450 }
1451 while (buf < bufend)
1452 *buf++ = pattern;
1453 }
1454
1455 static void *
verify_pattern(uint32_t pattern,void * buf_arg,size_t size)1456 verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
1457 {
1458 uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
1459 uint32_t *buf;
1460
1461 for (buf = buf_arg; buf < bufend; buf++)
1462 if (*buf != pattern)
1463 return (buf);
1464 return (NULL);
1465 }
1466
1467 static void
free_oversize(oversize_t * ovp)1468 free_oversize(oversize_t *ovp)
1469 {
1470 assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
1471 assert(ovp->size > MAX_CACHED);
1472
1473 ovp->next_bysize = ovp->prev_bysize = NULL;
1474 ovp->next_byaddr = ovp->prev_byaddr = NULL;
1475 (void) mutex_lock(&oversize_lock);
1476 add_oversize(ovp);
1477 (void) mutex_unlock(&oversize_lock);
1478 }
1479
1480 static oversize_t *
oversize_header_alloc(uintptr_t mem,size_t size)1481 oversize_header_alloc(uintptr_t mem, size_t size)
1482 {
1483 oversize_t *ovsz_hdr;
1484
1485 assert(size > MAX_CACHED);
1486
1487 ovsz_hdr = (oversize_t *)mem;
1488 ovsz_hdr->prev_bysize = NULL;
1489 ovsz_hdr->next_bysize = NULL;
1490 ovsz_hdr->prev_byaddr = NULL;
1491 ovsz_hdr->next_byaddr = NULL;
1492 ovsz_hdr->hash_next = NULL;
1493 ovsz_hdr->size = size;
1494 mem += OVSZ_SIZE;
1495 *(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
1496 mem += OVERHEAD;
1497 assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
1498 ovsz_hdr->addr = (caddr_t)mem;
1499 return (ovsz_hdr);
1500 }
1501
1502 static void
malloc_prepare()1503 malloc_prepare()
1504 {
1505 percpu_t *cpuptr;
1506 cache_head_t *cachehead;
1507 cache_t *thiscache;
1508
1509 (void) mutex_lock(&oversize_lock);
1510 for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
1511 (void) mutex_lock(&cpuptr->mt_parent_lock);
1512 for (cachehead = &cpuptr->mt_caches[0];
1513 cachehead < &cpuptr->mt_caches[NUM_CACHES];
1514 cachehead++) {
1515 for (thiscache = cachehead->mt_cache;
1516 thiscache != NULL;
1517 thiscache = thiscache->mt_next) {
1518 (void) mutex_lock(
1519 &thiscache->mt_cache_lock);
1520 }
1521 }
1522 }
1523 }
1524
1525 static void
malloc_release()1526 malloc_release()
1527 {
1528 percpu_t *cpuptr;
1529 cache_head_t *cachehead;
1530 cache_t *thiscache;
1531
1532 for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
1533 for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
1534 cachehead >= &cpuptr->mt_caches[0];
1535 cachehead--) {
1536 for (thiscache = cachehead->mt_cache;
1537 thiscache != NULL;
1538 thiscache = thiscache->mt_next) {
1539 (void) mutex_unlock(
1540 &thiscache->mt_cache_lock);
1541 }
1542 }
1543 (void) mutex_unlock(&cpuptr->mt_parent_lock);
1544 }
1545 (void) mutex_unlock(&oversize_lock);
1546 }
1547
1548 #pragma init(malloc_init)
1549 static void
malloc_init(void)1550 malloc_init(void)
1551 {
1552 /*
1553 * This works in the init section for this library
1554 * because setup_caches() doesn't call anything in libc
1555 * that calls malloc(). If it did, disaster would ensue.
1556 *
1557 * For this to work properly, this library must be the first
1558 * one to have its init section called (after libc) by the
1559 * dynamic linker. If some other library's init section
1560 * ran first and called malloc(), disaster would ensue.
1561 * Because this is an interposer library for malloc(), the
1562 * dynamic linker arranges for its init section to run first.
1563 */
1564 (void) setup_caches();
1565
1566 (void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1567 }
1568