xref: /titanic_50/usr/src/lib/libmtmalloc/common/mtmalloc.c (revision 1d53067866b073ea6710000ba4dd448441361988)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1d530678Sraf  * Common Development and Distribution License (the "License").
6*1d530678Sraf  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21*1d530678Sraf 
227c478bd9Sstevel@tonic-gate /*
23*1d530678Sraf  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
29*1d530678Sraf #include <c_synonyms.h>
307c478bd9Sstevel@tonic-gate #include <mtmalloc.h>
317c478bd9Sstevel@tonic-gate #include "mtmalloc_impl.h"
327c478bd9Sstevel@tonic-gate #include <unistd.h>
337c478bd9Sstevel@tonic-gate #include <synch.h>
347c478bd9Sstevel@tonic-gate #include <thread.h>
35*1d530678Sraf #include <pthread.h>
367c478bd9Sstevel@tonic-gate #include <stdio.h>
377c478bd9Sstevel@tonic-gate #include <limits.h>
387c478bd9Sstevel@tonic-gate #include <errno.h>
397c478bd9Sstevel@tonic-gate #include <string.h>
407c478bd9Sstevel@tonic-gate #include <strings.h>
417c478bd9Sstevel@tonic-gate #include <sys/param.h>
427c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate /*
457c478bd9Sstevel@tonic-gate  * To turn on the asserts just compile -DDEBUG
467c478bd9Sstevel@tonic-gate  */
477c478bd9Sstevel@tonic-gate 
487c478bd9Sstevel@tonic-gate #ifndef	DEBUG
497c478bd9Sstevel@tonic-gate #define	NDEBUG
507c478bd9Sstevel@tonic-gate #endif
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate #include <assert.h>
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate /*
557c478bd9Sstevel@tonic-gate  * The MT hot malloc implementation contained herein is designed to be
567c478bd9Sstevel@tonic-gate  * plug-compatible with the libc version of malloc. It is not intended
577c478bd9Sstevel@tonic-gate  * to replace that implementation until we decide that it is ok to break
587c478bd9Sstevel@tonic-gate  * customer apps (Solaris 3.0).
597c478bd9Sstevel@tonic-gate  *
607c478bd9Sstevel@tonic-gate  * For requests up to 2^^16, the allocator initializes itself into NCPUS
617c478bd9Sstevel@tonic-gate  * worth of chains of caches. When a memory request is made, the calling thread
627c478bd9Sstevel@tonic-gate  * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
637c478bd9Sstevel@tonic-gate  * contention-reducing index to use, eventually, this should be replaced with
647c478bd9Sstevel@tonic-gate  * the actual CPU sequence number, when an interface to get it is available.
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * Once the thread is vectored into one of the list of caches the real
677c478bd9Sstevel@tonic-gate  * allocation of the memory begins. The size is determined to figure out which
687c478bd9Sstevel@tonic-gate  * bucket the allocation should be satisfied from. The management of free
697c478bd9Sstevel@tonic-gate  * buckets is done via a bitmask. A free bucket is represented by a 1. The
707c478bd9Sstevel@tonic-gate  * first free bit represents the first free bucket. The position of the bit,
717c478bd9Sstevel@tonic-gate  * represents the position of the bucket in the arena.
727c478bd9Sstevel@tonic-gate  *
737c478bd9Sstevel@tonic-gate  * When the memory from the arena is handed out, the address of the cache
747c478bd9Sstevel@tonic-gate  * control structure is written in the word preceeding the returned memory.
757c478bd9Sstevel@tonic-gate  * This cache control address is used during free() to mark the buffer free
767c478bd9Sstevel@tonic-gate  * in the cache control structure.
777c478bd9Sstevel@tonic-gate  *
787c478bd9Sstevel@tonic-gate  * When all available memory in a cache has been depleted, a new chunk of memory
797c478bd9Sstevel@tonic-gate  * is allocated via sbrk(). The new cache is allocated from this chunk of memory
807c478bd9Sstevel@tonic-gate  * and initialized in the function create_cache(). New caches are installed at
817c478bd9Sstevel@tonic-gate  * the front of a singly linked list of the same size memory pools. This helps
827c478bd9Sstevel@tonic-gate  * to ensure that there will tend to be available memory in the beginning of the
837c478bd9Sstevel@tonic-gate  * list.
847c478bd9Sstevel@tonic-gate  *
857c478bd9Sstevel@tonic-gate  * Long linked lists hurt performance. To decrease this effect, there is a
867c478bd9Sstevel@tonic-gate  * tunable, requestsize, that bumps up the sbrk allocation size and thus
877c478bd9Sstevel@tonic-gate  * increases the number of available blocks within an arena.  We also keep
887c478bd9Sstevel@tonic-gate  * a "hint" for each cache list, which is the last cache in the list allocated
897c478bd9Sstevel@tonic-gate  * from.  This lowers the cost of searching if there are a lot of fully
907c478bd9Sstevel@tonic-gate  * allocated blocks at the front of the list.
917c478bd9Sstevel@tonic-gate  *
927c478bd9Sstevel@tonic-gate  * For requests greater than 2^^16 (oversize allocations), there are two pieces
937c478bd9Sstevel@tonic-gate  * of overhead. There is the OVERHEAD used to hold the cache addr
947c478bd9Sstevel@tonic-gate  * (&oversize_list), plus an oversize_t structure to further describe the block.
957c478bd9Sstevel@tonic-gate  *
967c478bd9Sstevel@tonic-gate  * The oversize list is kept as defragmented as possible by coalescing
977c478bd9Sstevel@tonic-gate  * freed oversized allocations with adjacent neighbors.
987c478bd9Sstevel@tonic-gate  *
997c478bd9Sstevel@tonic-gate  * Addresses handed out are stored in a hash table, and are aligned on
1007c478bd9Sstevel@tonic-gate  * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
1017c478bd9Sstevel@tonic-gate  * where necessary in order to achieve this. This eases the implementation of
1027c478bd9Sstevel@tonic-gate  * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
1037c478bd9Sstevel@tonic-gate  *
1047c478bd9Sstevel@tonic-gate  * A memalign allocation takes memalign header overhead.  There's two
1057c478bd9Sstevel@tonic-gate  * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
1067c478bd9Sstevel@tonic-gate  * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
1077c478bd9Sstevel@tonic-gate  * get to the aligned address from malloc'ed address is the minimum size
1087c478bd9Sstevel@tonic-gate  * OVERHEAD, we create a header taking only one OVERHEAD space with magic
1097c478bd9Sstevel@tonic-gate  * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
1107c478bd9Sstevel@tonic-gate  * from memaligned address, we can get to the malloc'ed address. Otherwise,
1117c478bd9Sstevel@tonic-gate  * we create a memalign header taking two OVERHEAD space, one stores
1127c478bd9Sstevel@tonic-gate  * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
1137c478bd9Sstevel@tonic-gate  * malloc'ed address.
1147c478bd9Sstevel@tonic-gate  */
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1177c478bd9Sstevel@tonic-gate #include <arpa/inet.h>	/* for htonl() */
1187c478bd9Sstevel@tonic-gate #endif
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate static void * morecore(size_t);
1217c478bd9Sstevel@tonic-gate static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
1227c478bd9Sstevel@tonic-gate static void * malloc_internal(size_t, percpu_t *);
1237c478bd9Sstevel@tonic-gate static void * oversize(size_t);
1247c478bd9Sstevel@tonic-gate static oversize_t *find_oversize(size_t);
1257c478bd9Sstevel@tonic-gate static void add_oversize(oversize_t *);
1267c478bd9Sstevel@tonic-gate static void copy_pattern(uint32_t, void *, size_t);
1277c478bd9Sstevel@tonic-gate static void * verify_pattern(uint32_t, void *, size_t);
1287c478bd9Sstevel@tonic-gate static void reinit_cpu_list(void);
1297c478bd9Sstevel@tonic-gate static void reinit_cache(cache_t *);
1307c478bd9Sstevel@tonic-gate static void free_oversize(oversize_t *);
1317c478bd9Sstevel@tonic-gate static oversize_t *oversize_header_alloc(uintptr_t, size_t);
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate  * oversize hash table stuff
1357c478bd9Sstevel@tonic-gate  */
1367c478bd9Sstevel@tonic-gate #define	NUM_BUCKETS	67	/* must be prime */
1377c478bd9Sstevel@tonic-gate #define	HASH_OVERSIZE(caddr)	((uintptr_t)(caddr) % NUM_BUCKETS)
1387c478bd9Sstevel@tonic-gate oversize_t *ovsz_hashtab[NUM_BUCKETS];
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate #define	ALIGN(x, a)	((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
1417c478bd9Sstevel@tonic-gate 			& ~((uintptr_t)(a) - 1)))
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate /* need this to deal with little endianess of x86 */
1447c478bd9Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
1457c478bd9Sstevel@tonic-gate #define	FLIP_EM(x)	htonl((x))
1467c478bd9Sstevel@tonic-gate #else
1477c478bd9Sstevel@tonic-gate #define	FLIP_EM(x)	(x)
1487c478bd9Sstevel@tonic-gate #endif
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate #define	INSERT_ONLY			0
1517c478bd9Sstevel@tonic-gate #define	COALESCE_LEFT			0x00000001
1527c478bd9Sstevel@tonic-gate #define	COALESCE_RIGHT			0x00000002
1537c478bd9Sstevel@tonic-gate #define	COALESCE_WITH_BOTH_SIDES	(COALESCE_LEFT | COALESCE_RIGHT)
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate #define	OVERHEAD	8	/* size needed to write cache addr */
1567c478bd9Sstevel@tonic-gate #define	HUNKSIZE	8192	/* just a multiplier */
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate #define	MAX_CACHED_SHIFT	16	/* 64K is the max cached size */
1597c478bd9Sstevel@tonic-gate #define	MAX_CACHED		(1 << MAX_CACHED_SHIFT)
1607c478bd9Sstevel@tonic-gate #define	MIN_CACHED_SHIFT	4	/* smaller requests rounded up */
1617c478bd9Sstevel@tonic-gate #define	MTMALLOC_MIN_ALIGN	8	/* min guaranteed alignment */
1627c478bd9Sstevel@tonic-gate 
16370911a0dSrm88369 /* maximum size before overflow */
16470911a0dSrm88369 #define	MAX_MTMALLOC	(SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
16570911a0dSrm88369 			- OVSZ_HEADER_SIZE)
16670911a0dSrm88369 
1677c478bd9Sstevel@tonic-gate #define	NUM_CACHES	(MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
1687c478bd9Sstevel@tonic-gate #define	CACHELIST_SIZE	ALIGN(NUM_CACHES * sizeof (cache_head_t), \
1697c478bd9Sstevel@tonic-gate     CACHE_COHERENCY_UNIT)
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate #define	MINSIZE		9	/* for requestsize, tunable */
1727c478bd9Sstevel@tonic-gate #define	MAXSIZE		256	/* arbitrary, big enough, for requestsize */
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate #define	FREEPATTERN	0xdeadbeef /* debug fill pattern for free buf */
1757c478bd9Sstevel@tonic-gate #define	INITPATTERN	0xbaddcafe /* debug fill pattern for new buf */
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate #define	misaligned(p)	((unsigned)(p) & (sizeof (int) - 1))
1787c478bd9Sstevel@tonic-gate #define	IS_OVERSIZE(x, y)	(((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate static uint_t cpu_mask;
1837c478bd9Sstevel@tonic-gate static curcpu_func curcpu;
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate static int32_t debugopt;
1867c478bd9Sstevel@tonic-gate static int32_t reinit;
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate static percpu_t *cpu_list;
1897c478bd9Sstevel@tonic-gate static oversize_t oversize_list;
190*1d530678Sraf static mutex_t oversize_lock = DEFAULTMUTEX;
1917c478bd9Sstevel@tonic-gate 
192*1d530678Sraf static int ncpus = 0;
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate #define	MTMALLOC_OVERSIZE_MAGIC		((uintptr_t)&oversize_list)
1957c478bd9Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MAGIC		((uintptr_t)&oversize_list + 1)
1967c478bd9Sstevel@tonic-gate #define	MTMALLOC_MEMALIGN_MIN_MAGIC	((uintptr_t)&oversize_list + 2)
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate /*
1997c478bd9Sstevel@tonic-gate  * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
2007c478bd9Sstevel@tonic-gate  * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
2017c478bd9Sstevel@tonic-gate  * this is achieved.
2027c478bd9Sstevel@tonic-gate  */
2037c478bd9Sstevel@tonic-gate #define	OVSZ_SIZE		(ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
2047c478bd9Sstevel@tonic-gate #define	OVSZ_HEADER_SIZE	(OVSZ_SIZE + OVERHEAD)
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate /*
2077c478bd9Sstevel@tonic-gate  * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
2087c478bd9Sstevel@tonic-gate  * other one points back to the start address of originally allocated space.
2097c478bd9Sstevel@tonic-gate  */
2107c478bd9Sstevel@tonic-gate #define	MEMALIGN_HEADER_SIZE	2 * OVERHEAD
2117c478bd9Sstevel@tonic-gate #define	MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
2127c478bd9Sstevel@tonic-gate 	if (shift == OVERHEAD)\
2137c478bd9Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2147c478bd9Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MIN_MAGIC; \
2157c478bd9Sstevel@tonic-gate 	else {\
2167c478bd9Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
2177c478bd9Sstevel@tonic-gate 			MTMALLOC_MEMALIGN_MAGIC; \
2187c478bd9Sstevel@tonic-gate 		*((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
2197c478bd9Sstevel@tonic-gate 			(uintptr_t)malloc_addr; \
2207c478bd9Sstevel@tonic-gate 	}
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate void *
2237c478bd9Sstevel@tonic-gate malloc(size_t bytes)
2247c478bd9Sstevel@tonic-gate {
2257c478bd9Sstevel@tonic-gate 	percpu_t *list_rotor;
2267c478bd9Sstevel@tonic-gate 	uint_t	list_index;
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate 	if (bytes > MAX_CACHED)
2297c478bd9Sstevel@tonic-gate 		return (oversize(bytes));
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate 	list_index = (curcpu() & cpu_mask);
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	list_rotor = &cpu_list[list_index];
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	return (malloc_internal(bytes, list_rotor));
2367c478bd9Sstevel@tonic-gate }
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate void *
2397c478bd9Sstevel@tonic-gate realloc(void * ptr, size_t bytes)
2407c478bd9Sstevel@tonic-gate {
2417c478bd9Sstevel@tonic-gate 	void *new, *data_ptr;
2427c478bd9Sstevel@tonic-gate 	cache_t *cacheptr;
2437c478bd9Sstevel@tonic-gate 	caddr_t mem;
2447c478bd9Sstevel@tonic-gate 	size_t shift = 0;
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate 	if (ptr == NULL)
2477c478bd9Sstevel@tonic-gate 		return (malloc(bytes));
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate 	if (bytes == 0) {
2507c478bd9Sstevel@tonic-gate 		free(ptr);
2517c478bd9Sstevel@tonic-gate 		return (NULL);
2527c478bd9Sstevel@tonic-gate 	}
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 	data_ptr = ptr;
2557c478bd9Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
2567c478bd9Sstevel@tonic-gate 
2577c478bd9Sstevel@tonic-gate 	new = malloc(bytes);
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	if (new == NULL)
2607c478bd9Sstevel@tonic-gate 		return (NULL);
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	/*
2637c478bd9Sstevel@tonic-gate 	 * If new == ptr, ptr has previously been freed. Passing a freed pointer
2647c478bd9Sstevel@tonic-gate 	 * to realloc() is not allowed - unless the caller specifically states
2657c478bd9Sstevel@tonic-gate 	 * otherwise, in which case we must avoid freeing ptr (ie new) before we
2667c478bd9Sstevel@tonic-gate 	 * return new. There is (obviously) no requirement to memcpy() ptr to
2677c478bd9Sstevel@tonic-gate 	 * new before we return.
2687c478bd9Sstevel@tonic-gate 	 */
2697c478bd9Sstevel@tonic-gate 	if (new == ptr) {
2707c478bd9Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
2717c478bd9Sstevel@tonic-gate 			abort();
2727c478bd9Sstevel@tonic-gate 		return (new);
2737c478bd9Sstevel@tonic-gate 	}
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
2767c478bd9Sstevel@tonic-gate 		mem -= OVERHEAD;
2777c478bd9Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
2787c478bd9Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
2797c478bd9Sstevel@tonic-gate 		shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
2807c478bd9Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
2817c478bd9Sstevel@tonic-gate 		ptr = (void *) mem;
2827c478bd9Sstevel@tonic-gate 		mem -= OVERHEAD;
2837c478bd9Sstevel@tonic-gate 		shift = OVERHEAD;
2847c478bd9Sstevel@tonic-gate 	}
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
2877c478bd9Sstevel@tonic-gate 		oversize_t *old;
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate 		old = (oversize_t *)(mem - OVSZ_SIZE);
2907c478bd9Sstevel@tonic-gate 		(void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
2917c478bd9Sstevel@tonic-gate 		free(ptr);
2927c478bd9Sstevel@tonic-gate 		return (new);
2937c478bd9Sstevel@tonic-gate 	}
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	(void) memcpy(new, data_ptr,
2987c478bd9Sstevel@tonic-gate 		MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
2997c478bd9Sstevel@tonic-gate 	free(ptr);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	return (new);
3027c478bd9Sstevel@tonic-gate }
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate void *
3057c478bd9Sstevel@tonic-gate calloc(size_t nelem, size_t bytes)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	void * ptr;
3087c478bd9Sstevel@tonic-gate 	size_t size = nelem * bytes;
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 	ptr = malloc(size);
3117c478bd9Sstevel@tonic-gate 	if (ptr == NULL)
3127c478bd9Sstevel@tonic-gate 		return (NULL);
313*1d530678Sraf 	(void) memset(ptr, 0, size);
3147c478bd9Sstevel@tonic-gate 
3157c478bd9Sstevel@tonic-gate 	return (ptr);
3167c478bd9Sstevel@tonic-gate }
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate void
3197c478bd9Sstevel@tonic-gate free(void * ptr)
3207c478bd9Sstevel@tonic-gate {
3217c478bd9Sstevel@tonic-gate 	cache_t *cacheptr;
3227c478bd9Sstevel@tonic-gate 	caddr_t mem;
3237c478bd9Sstevel@tonic-gate 	int32_t i;
3247c478bd9Sstevel@tonic-gate 	caddr_t freeblocks;
3257c478bd9Sstevel@tonic-gate 	uintptr_t offset;
3267c478bd9Sstevel@tonic-gate 	uchar_t mask;
3277c478bd9Sstevel@tonic-gate 	int32_t which_bit, num_bytes;
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 	if (ptr == NULL)
3307c478bd9Sstevel@tonic-gate 		return;
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 	mem = (caddr_t)ptr - OVERHEAD;
3337c478bd9Sstevel@tonic-gate 
3347c478bd9Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
3357c478bd9Sstevel@tonic-gate 		mem -= OVERHEAD;
3367c478bd9Sstevel@tonic-gate 		ptr = (void *)*(uintptr_t *)mem;
3377c478bd9Sstevel@tonic-gate 		mem = (caddr_t)ptr - OVERHEAD;
3387c478bd9Sstevel@tonic-gate 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
3397c478bd9Sstevel@tonic-gate 		ptr = (void *) mem;
3407c478bd9Sstevel@tonic-gate 		mem -= OVERHEAD;
3417c478bd9Sstevel@tonic-gate 	}
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
3447c478bd9Sstevel@tonic-gate 		oversize_t *big, **opp;
3457c478bd9Sstevel@tonic-gate 		int bucket;
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 		big = (oversize_t *)(mem - OVSZ_SIZE);
3487c478bd9Sstevel@tonic-gate 		(void) mutex_lock(&oversize_lock);
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate 		bucket = HASH_OVERSIZE(big->addr);
3517c478bd9Sstevel@tonic-gate 		for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
3527c478bd9Sstevel@tonic-gate 		    opp = &(*opp)->hash_next)
3537c478bd9Sstevel@tonic-gate 			if (*opp == big)
3547c478bd9Sstevel@tonic-gate 				break;
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 		if (*opp == NULL) {
3577c478bd9Sstevel@tonic-gate 			if (!(debugopt & MTDOUBLEFREE))
3587c478bd9Sstevel@tonic-gate 				abort();
3597c478bd9Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
3607c478bd9Sstevel@tonic-gate 			return;
3617c478bd9Sstevel@tonic-gate 		}
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate 		*opp = big->hash_next;	/* remove big from the hash table */
3647c478bd9Sstevel@tonic-gate 		big->hash_next = NULL;
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
3677c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, ptr, big->size);
3687c478bd9Sstevel@tonic-gate 		add_oversize(big);
3697c478bd9Sstevel@tonic-gate 		(void) mutex_unlock(&oversize_lock);
3707c478bd9Sstevel@tonic-gate 		return;
3717c478bd9Sstevel@tonic-gate 	}
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	cacheptr = (cache_t *)*(uintptr_t *)mem;
3747c478bd9Sstevel@tonic-gate 	freeblocks = cacheptr->mt_freelist;
3757c478bd9Sstevel@tonic-gate 
3767c478bd9Sstevel@tonic-gate 	/*
3777c478bd9Sstevel@tonic-gate 	 * This is the distance measured in bits into the arena.
3787c478bd9Sstevel@tonic-gate 	 * The value of offset is in bytes but there is a 1-1 correlation
3797c478bd9Sstevel@tonic-gate 	 * between distance into the arena and distance into the
3807c478bd9Sstevel@tonic-gate 	 * freelist bitmask.
3817c478bd9Sstevel@tonic-gate 	 */
3827c478bd9Sstevel@tonic-gate 	offset = mem - cacheptr->mt_arena;
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	/*
3857c478bd9Sstevel@tonic-gate 	 * i is total number of bits to offset into freelist bitmask.
3867c478bd9Sstevel@tonic-gate 	 */
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate 	i = offset / cacheptr->mt_size;
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	num_bytes = i >> 3;
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate 	/*
3937c478bd9Sstevel@tonic-gate 	 * which_bit is the bit offset into the byte in the freelist.
3947c478bd9Sstevel@tonic-gate 	 * if our freelist bitmask looks like 0xf3 and we are freeing
3957c478bd9Sstevel@tonic-gate 	 * block 5 (ie: the 6th block) our mask will be 0xf7 after
3967c478bd9Sstevel@tonic-gate 	 * the free. Things go left to right that's why the mask is 0x80
3977c478bd9Sstevel@tonic-gate 	 * and not 0x01.
3987c478bd9Sstevel@tonic-gate 	 */
3997c478bd9Sstevel@tonic-gate 	which_bit = i - (num_bytes << 3);
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate 	mask = 0x80 >> which_bit;
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	freeblocks += num_bytes;
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
4067c478bd9Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&cacheptr->mt_cache_lock);
4097c478bd9Sstevel@tonic-gate 
4107c478bd9Sstevel@tonic-gate 	if (*freeblocks & mask) {
4117c478bd9Sstevel@tonic-gate 		if (!(debugopt & MTDOUBLEFREE))
4127c478bd9Sstevel@tonic-gate 			abort();
4137c478bd9Sstevel@tonic-gate 	} else {
4147c478bd9Sstevel@tonic-gate 		*freeblocks |= mask;
4157c478bd9Sstevel@tonic-gate 		cacheptr->mt_nfree++;
4167c478bd9Sstevel@tonic-gate 	}
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&cacheptr->mt_cache_lock);
4197c478bd9Sstevel@tonic-gate }
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate void *
4227c478bd9Sstevel@tonic-gate memalign(size_t alignment, size_t size)
4237c478bd9Sstevel@tonic-gate {
4247c478bd9Sstevel@tonic-gate 	size_t alloc_size;
4257c478bd9Sstevel@tonic-gate 	uintptr_t offset;
4267c478bd9Sstevel@tonic-gate 	void *alloc_buf;
4277c478bd9Sstevel@tonic-gate 	void *ret_buf;
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	if (size == 0 || alignment == 0 ||
4307c478bd9Sstevel@tonic-gate 		misaligned(alignment) ||
4317c478bd9Sstevel@tonic-gate 		(alignment & (alignment - 1)) != 0) {
4327c478bd9Sstevel@tonic-gate 		errno = EINVAL;
4337c478bd9Sstevel@tonic-gate 		return (NULL);
4347c478bd9Sstevel@tonic-gate 	}
4357c478bd9Sstevel@tonic-gate 
4367c478bd9Sstevel@tonic-gate 	/* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
4377c478bd9Sstevel@tonic-gate 	if (alignment <= MTMALLOC_MIN_ALIGN)
4387c478bd9Sstevel@tonic-gate 		return (malloc(size));
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate 	alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	if (alloc_size < size) { /* overflow */
4437c478bd9Sstevel@tonic-gate 		errno = ENOMEM;
4447c478bd9Sstevel@tonic-gate 		return (NULL);
4457c478bd9Sstevel@tonic-gate 	}
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate 	alloc_buf = malloc(alloc_size);
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate 	if (alloc_buf == NULL)
4507c478bd9Sstevel@tonic-gate 		/* malloc sets errno */
4517c478bd9Sstevel@tonic-gate 		return (NULL);
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	/*
4547c478bd9Sstevel@tonic-gate 	 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
4557c478bd9Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
4567c478bd9Sstevel@tonic-gate 	 * we will use alloc_size to return the excess fragments to the free
4577c478bd9Sstevel@tonic-gate 	 * list, we also round-up alloc_size if necessary.
4587c478bd9Sstevel@tonic-gate 	 */
4597c478bd9Sstevel@tonic-gate 	if ((alloc_size > MAX_CACHED) &&
4607c478bd9Sstevel@tonic-gate 	    (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
4617c478bd9Sstevel@tonic-gate 		alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 	if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
4647c478bd9Sstevel@tonic-gate 		/* aligned correctly */
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 		size_t frag_size = alloc_size -
4677c478bd9Sstevel@tonic-gate 			(size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 		/*
4707c478bd9Sstevel@tonic-gate 		 * If the leftover piece of the memory > MAX_CACHED,
4717c478bd9Sstevel@tonic-gate 		 * split off the piece and return it back to the freelist.
4727c478bd9Sstevel@tonic-gate 		 */
4737c478bd9Sstevel@tonic-gate 		if (IS_OVERSIZE(frag_size, alloc_size)) {
4747c478bd9Sstevel@tonic-gate 			oversize_t *orig, *tail;
4757c478bd9Sstevel@tonic-gate 			uintptr_t taddr;
4767c478bd9Sstevel@tonic-gate 			size_t data_size;
4777c478bd9Sstevel@tonic-gate 			taddr = ALIGN((uintptr_t)alloc_buf + size,
4787c478bd9Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
4797c478bd9Sstevel@tonic-gate 			data_size = taddr - (uintptr_t)alloc_buf;
4807c478bd9Sstevel@tonic-gate 			orig = (oversize_t *)((uintptr_t)alloc_buf -
4817c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
4827c478bd9Sstevel@tonic-gate 			frag_size = orig->size - data_size -
4837c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE;
4847c478bd9Sstevel@tonic-gate 			orig->size = data_size;
4857c478bd9Sstevel@tonic-gate 			tail = oversize_header_alloc(taddr, frag_size);
4867c478bd9Sstevel@tonic-gate 			free_oversize(tail);
4877c478bd9Sstevel@tonic-gate 		}
4887c478bd9Sstevel@tonic-gate 		ret_buf = alloc_buf;
4897c478bd9Sstevel@tonic-gate 	} else {
4907c478bd9Sstevel@tonic-gate 		uchar_t	oversize_bits = 0;
4917c478bd9Sstevel@tonic-gate 		size_t	head_sz, data_sz, tail_sz;
4927c478bd9Sstevel@tonic-gate 		uintptr_t ret_addr, taddr, shift, tshift;
4937c478bd9Sstevel@tonic-gate 		oversize_t *orig, *tail;
4947c478bd9Sstevel@tonic-gate 		size_t tsize;
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 		/* needs to be aligned */
4977c478bd9Sstevel@tonic-gate 		shift = alignment - offset;
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 		assert(shift >= MTMALLOC_MIN_ALIGN);
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 		ret_addr = ((uintptr_t)alloc_buf + shift);
5027c478bd9Sstevel@tonic-gate 		ret_buf = (void *)ret_addr;
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 		if (alloc_size <= MAX_CACHED) {
5057c478bd9Sstevel@tonic-gate 			MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
5067c478bd9Sstevel@tonic-gate 			return (ret_buf);
5077c478bd9Sstevel@tonic-gate 		}
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate 		/*
5107c478bd9Sstevel@tonic-gate 		 * Only check for the fragments when the memory is allocted
5117c478bd9Sstevel@tonic-gate 		 * from oversize_list.  Split off a fragment and return it
5127c478bd9Sstevel@tonic-gate 		 * to the oversize freelist when it's > MAX_CACHED.
5137c478bd9Sstevel@tonic-gate 		 */
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 		head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 		tail_sz = alloc_size -
5187c478bd9Sstevel@tonic-gate 			(shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate 		oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
5217c478bd9Sstevel@tonic-gate 				IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
5227c478bd9Sstevel@tonic-gate 				IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 		switch (oversize_bits) {
5257c478bd9Sstevel@tonic-gate 			case NONE_OVERSIZE:
5267c478bd9Sstevel@tonic-gate 			case DATA_OVERSIZE:
5277c478bd9Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5287c478bd9Sstevel@tonic-gate 					alloc_buf);
5297c478bd9Sstevel@tonic-gate 				break;
5307c478bd9Sstevel@tonic-gate 			case HEAD_OVERSIZE:
5317c478bd9Sstevel@tonic-gate 				/*
5327c478bd9Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5337c478bd9Sstevel@tonic-gate 				 * head still > MAX_CACHED, we split head-end
5347c478bd9Sstevel@tonic-gate 				 * as the case of head-end and data oversized,
5357c478bd9Sstevel@tonic-gate 				 * otherwise just create memalign header.
5367c478bd9Sstevel@tonic-gate 				 */
5377c478bd9Sstevel@tonic-gate 				tsize = (shift + size) - (MAX_CACHED + 8 +
5387c478bd9Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
5417c478bd9Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5427c478bd9Sstevel@tonic-gate 						alloc_buf);
5437c478bd9Sstevel@tonic-gate 					break;
5447c478bd9Sstevel@tonic-gate 				} else {
5457c478bd9Sstevel@tonic-gate 					tsize += OVSZ_HEADER_SIZE;
5467c478bd9Sstevel@tonic-gate 					taddr = ALIGN((uintptr_t)alloc_buf +
5477c478bd9Sstevel@tonic-gate 						tsize, MTMALLOC_MIN_ALIGN);
5487c478bd9Sstevel@tonic-gate 					tshift = ret_addr - taddr;
5497c478bd9Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
5507c478bd9Sstevel@tonic-gate 						taddr);
5517c478bd9Sstevel@tonic-gate 					ret_addr = taddr;
5527c478bd9Sstevel@tonic-gate 					shift = ret_addr - (uintptr_t)alloc_buf;
5537c478bd9Sstevel@tonic-gate 				}
5547c478bd9Sstevel@tonic-gate 				/* FALLTHROUGH */
5557c478bd9Sstevel@tonic-gate 			case HEAD_AND_DATA_OVERSIZE:
5567c478bd9Sstevel@tonic-gate 				/*
5577c478bd9Sstevel@tonic-gate 				 * Split off the head fragment and
5587c478bd9Sstevel@tonic-gate 				 * return it back to oversize freelist.
5597c478bd9Sstevel@tonic-gate 				 * Create oversize header for the piece
5607c478bd9Sstevel@tonic-gate 				 * of (data + tail fragment).
5617c478bd9Sstevel@tonic-gate 				 */
5627c478bd9Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
5637c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
5647c478bd9Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
5657c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
5667c478bd9Sstevel@tonic-gate 						(orig->size - shift));
5677c478bd9Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
5687c478bd9Sstevel@tonic-gate 
5697c478bd9Sstevel@tonic-gate 				/* free up the head fragment */
5707c478bd9Sstevel@tonic-gate 				free_oversize(orig);
5717c478bd9Sstevel@tonic-gate 				break;
5727c478bd9Sstevel@tonic-gate 			case TAIL_OVERSIZE:
5737c478bd9Sstevel@tonic-gate 				/*
5747c478bd9Sstevel@tonic-gate 				 * If we can extend data > MAX_CACHED and have
5757c478bd9Sstevel@tonic-gate 				 * tail-end still > MAX_CACHED, we split tail
5767c478bd9Sstevel@tonic-gate 				 * end, otherwise just create memalign header.
5777c478bd9Sstevel@tonic-gate 				 */
5787c478bd9Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
5797c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
5807c478bd9Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
5817c478bd9Sstevel@tonic-gate 					shift + OVSZ_HEADER_SIZE +
5827c478bd9Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
5837c478bd9Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
5847c478bd9Sstevel@tonic-gate 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
5857c478bd9Sstevel@tonic-gate 						alloc_buf);
5867c478bd9Sstevel@tonic-gate 					break;
5877c478bd9Sstevel@tonic-gate 				} else {
5887c478bd9Sstevel@tonic-gate 					size = MAX_CACHED + 8;
5897c478bd9Sstevel@tonic-gate 				}
5907c478bd9Sstevel@tonic-gate 				/* FALLTHROUGH */
5917c478bd9Sstevel@tonic-gate 			case DATA_AND_TAIL_OVERSIZE:
5927c478bd9Sstevel@tonic-gate 				/*
5937c478bd9Sstevel@tonic-gate 				 * Split off the tail fragment and
5947c478bd9Sstevel@tonic-gate 				 * return it back to oversize freelist.
5957c478bd9Sstevel@tonic-gate 				 * Create memalign header and adjust
5967c478bd9Sstevel@tonic-gate 				 * the size for the piece of
5977c478bd9Sstevel@tonic-gate 				 * (head fragment + data).
5987c478bd9Sstevel@tonic-gate 				 */
5997c478bd9Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
6007c478bd9Sstevel@tonic-gate 						MTMALLOC_MIN_ALIGN);
6017c478bd9Sstevel@tonic-gate 				data_sz = (size_t)(taddr -
6027c478bd9Sstevel@tonic-gate 						(uintptr_t)alloc_buf);
6037c478bd9Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6047c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE);
6057c478bd9Sstevel@tonic-gate 				tsize = orig->size - data_sz;
6067c478bd9Sstevel@tonic-gate 				orig->size = data_sz;
6077c478bd9Sstevel@tonic-gate 				MEMALIGN_HEADER_ALLOC(ret_buf, shift,
6087c478bd9Sstevel@tonic-gate 					alloc_buf);
6097c478bd9Sstevel@tonic-gate 				tsize -= OVSZ_HEADER_SIZE;
6107c478bd9Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr,  tsize);
6117c478bd9Sstevel@tonic-gate 				free_oversize(tail);
6127c478bd9Sstevel@tonic-gate 				break;
6137c478bd9Sstevel@tonic-gate 			case HEAD_AND_TAIL_OVERSIZE:
6147c478bd9Sstevel@tonic-gate 				/*
6157c478bd9Sstevel@tonic-gate 				 * Split off the head fragment.
6167c478bd9Sstevel@tonic-gate 				 * We try to free up tail-end when we can
6177c478bd9Sstevel@tonic-gate 				 * extend data size to (MAX_CACHED + 8)
6187c478bd9Sstevel@tonic-gate 				 * and remain tail-end oversized.
6197c478bd9Sstevel@tonic-gate 				 * The bottom line is all split pieces
6207c478bd9Sstevel@tonic-gate 				 * should be oversize in size.
6217c478bd9Sstevel@tonic-gate 				 */
6227c478bd9Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6237c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6247c478bd9Sstevel@tonic-gate 				tsize =  orig->size - (MAX_CACHED + 8 +
6257c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE + shift +
6267c478bd9Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 				if (!IS_OVERSIZE(tsize, alloc_size)) {
6297c478bd9Sstevel@tonic-gate 					/*
6307c478bd9Sstevel@tonic-gate 					 * If the chunk is not big enough
6317c478bd9Sstevel@tonic-gate 					 * to make both data and tail oversize
6327c478bd9Sstevel@tonic-gate 					 * we just keep them as one piece.
6337c478bd9Sstevel@tonic-gate 					 */
6347c478bd9Sstevel@tonic-gate 					(void) oversize_header_alloc(ret_addr -
6357c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE,
6367c478bd9Sstevel@tonic-gate 						orig->size - shift);
6377c478bd9Sstevel@tonic-gate 					orig->size = shift -
6387c478bd9Sstevel@tonic-gate 						OVSZ_HEADER_SIZE;
6397c478bd9Sstevel@tonic-gate 					free_oversize(orig);
6407c478bd9Sstevel@tonic-gate 					break;
6417c478bd9Sstevel@tonic-gate 				} else {
6427c478bd9Sstevel@tonic-gate 					/*
6437c478bd9Sstevel@tonic-gate 					 * extend data size > MAX_CACHED
6447c478bd9Sstevel@tonic-gate 					 * and handle it as head, data, tail
6457c478bd9Sstevel@tonic-gate 					 * are all oversized.
6467c478bd9Sstevel@tonic-gate 					 */
6477c478bd9Sstevel@tonic-gate 					size = MAX_CACHED + 8;
6487c478bd9Sstevel@tonic-gate 				}
6497c478bd9Sstevel@tonic-gate 				/* FALLTHROUGH */
6507c478bd9Sstevel@tonic-gate 			case ALL_OVERSIZE:
6517c478bd9Sstevel@tonic-gate 				/*
6527c478bd9Sstevel@tonic-gate 				 * split off the head and tail fragments,
6537c478bd9Sstevel@tonic-gate 				 * return them back to the oversize freelist.
6547c478bd9Sstevel@tonic-gate 				 * Alloc oversize header for data seg.
6557c478bd9Sstevel@tonic-gate 				 */
6567c478bd9Sstevel@tonic-gate 				orig = (oversize_t *)((uintptr_t)alloc_buf -
6577c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE);
6587c478bd9Sstevel@tonic-gate 				tsize = orig->size;
6597c478bd9Sstevel@tonic-gate 				orig->size = shift - OVSZ_HEADER_SIZE;
6607c478bd9Sstevel@tonic-gate 				free_oversize(orig);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 				taddr = ALIGN(ret_addr + size,
6637c478bd9Sstevel@tonic-gate 					MTMALLOC_MIN_ALIGN);
6647c478bd9Sstevel@tonic-gate 				data_sz = taddr - ret_addr;
6657c478bd9Sstevel@tonic-gate 				assert(tsize > (shift + data_sz +
6667c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE));
6677c478bd9Sstevel@tonic-gate 				tail_sz = tsize -
6687c478bd9Sstevel@tonic-gate 					(shift + data_sz + OVSZ_HEADER_SIZE);
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 				/* create oversize header for data seg */
6717c478bd9Sstevel@tonic-gate 				(void) oversize_header_alloc(ret_addr -
6727c478bd9Sstevel@tonic-gate 					OVSZ_HEADER_SIZE, data_sz);
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 				/* create oversize header for tail fragment */
6757c478bd9Sstevel@tonic-gate 				tail = oversize_header_alloc(taddr, tail_sz);
6767c478bd9Sstevel@tonic-gate 				free_oversize(tail);
6777c478bd9Sstevel@tonic-gate 				break;
6787c478bd9Sstevel@tonic-gate 			default:
6797c478bd9Sstevel@tonic-gate 				/* should not reach here */
6807c478bd9Sstevel@tonic-gate 				assert(0);
6817c478bd9Sstevel@tonic-gate 		}
6827c478bd9Sstevel@tonic-gate 	}
6837c478bd9Sstevel@tonic-gate 	return (ret_buf);
6847c478bd9Sstevel@tonic-gate }
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 
6877c478bd9Sstevel@tonic-gate void *
6887c478bd9Sstevel@tonic-gate valloc(size_t size)
6897c478bd9Sstevel@tonic-gate {
6907c478bd9Sstevel@tonic-gate 	static unsigned pagesize;
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	if (size == 0)
6937c478bd9Sstevel@tonic-gate 		return (NULL);
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	if (!pagesize)
6967c478bd9Sstevel@tonic-gate 		pagesize = sysconf(_SC_PAGESIZE);
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate 	return (memalign(pagesize, size));
6997c478bd9Sstevel@tonic-gate }
7007c478bd9Sstevel@tonic-gate 
7017c478bd9Sstevel@tonic-gate void
7027c478bd9Sstevel@tonic-gate mallocctl(int cmd, long value)
7037c478bd9Sstevel@tonic-gate {
7047c478bd9Sstevel@tonic-gate 	switch (cmd) {
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	case MTDEBUGPATTERN:
7077c478bd9Sstevel@tonic-gate 		/*
7087c478bd9Sstevel@tonic-gate 		 * Reinitialize free blocks in case malloc() is called prior
7097c478bd9Sstevel@tonic-gate 		 * to mallocctl().
7107c478bd9Sstevel@tonic-gate 		 */
7117c478bd9Sstevel@tonic-gate 		if (value && !(debugopt & cmd)) {
7127c478bd9Sstevel@tonic-gate 			reinit++;
7137c478bd9Sstevel@tonic-gate 			debugopt |= cmd;
7147c478bd9Sstevel@tonic-gate 			reinit_cpu_list();
7157c478bd9Sstevel@tonic-gate 		}
7167c478bd9Sstevel@tonic-gate 		/*FALLTHRU*/
7177c478bd9Sstevel@tonic-gate 	case MTDOUBLEFREE:
7187c478bd9Sstevel@tonic-gate 	case MTINITBUFFER:
7197c478bd9Sstevel@tonic-gate 		if (value)
7207c478bd9Sstevel@tonic-gate 			debugopt |= cmd;
7217c478bd9Sstevel@tonic-gate 		else
7227c478bd9Sstevel@tonic-gate 			debugopt &= ~cmd;
7237c478bd9Sstevel@tonic-gate 		break;
7247c478bd9Sstevel@tonic-gate 	case MTCHUNKSIZE:
7257c478bd9Sstevel@tonic-gate 		if (value >= MINSIZE && value <= MAXSIZE)
7267c478bd9Sstevel@tonic-gate 			requestsize = value;
7277c478bd9Sstevel@tonic-gate 		break;
7287c478bd9Sstevel@tonic-gate 	default:
7297c478bd9Sstevel@tonic-gate 		break;
7307c478bd9Sstevel@tonic-gate 	}
7317c478bd9Sstevel@tonic-gate }
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate /*
734*1d530678Sraf  * Initialization function, called from the init section of the library.
735*1d530678Sraf  * No locking is required here because we are single-threaded during
736*1d530678Sraf  * library initialization.
7377c478bd9Sstevel@tonic-gate  */
738*1d530678Sraf static void
7397c478bd9Sstevel@tonic-gate setup_caches(void)
7407c478bd9Sstevel@tonic-gate {
7417c478bd9Sstevel@tonic-gate 	uintptr_t oldbrk;
7427c478bd9Sstevel@tonic-gate 	uintptr_t newbrk;
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	size_t cache_space_needed;
7457c478bd9Sstevel@tonic-gate 	size_t padding;
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate 	curcpu_func new_curcpu;
7487c478bd9Sstevel@tonic-gate 	uint_t new_cpu_mask;
7497c478bd9Sstevel@tonic-gate 	percpu_t *new_cpu_list;
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 	uint_t i, j;
7527c478bd9Sstevel@tonic-gate 	uintptr_t list_addr;
7537c478bd9Sstevel@tonic-gate 
754*1d530678Sraf 	/*
755*1d530678Sraf 	 * Get a decent "current cpu identifier", to be used to reduce
756*1d530678Sraf 	 * contention.  Eventually, this should be replaced by an interface
757*1d530678Sraf 	 * to get the actual CPU sequence number in libthread/liblwp.
758*1d530678Sraf 	 */
759*1d530678Sraf 	new_curcpu = (curcpu_func)thr_self;
7607c478bd9Sstevel@tonic-gate 	if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
7617c478bd9Sstevel@tonic-gate 		ncpus = 4; /* decent default value */
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	/* round ncpus up to a power of 2 */
7647c478bd9Sstevel@tonic-gate 	while (ncpus & (ncpus - 1))
7657c478bd9Sstevel@tonic-gate 		ncpus++;
7667c478bd9Sstevel@tonic-gate 
7677c478bd9Sstevel@tonic-gate 	new_cpu_mask = ncpus - 1;	/* create the cpu mask */
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 	/*
7707c478bd9Sstevel@tonic-gate 	 * We now do some magic with the brk.  What we want to get in the
7717c478bd9Sstevel@tonic-gate 	 * end is a bunch of well-aligned stuff in a big initial allocation.
7727c478bd9Sstevel@tonic-gate 	 * Along the way, we do sanity checks to make sure no one else has
7737c478bd9Sstevel@tonic-gate 	 * touched the brk (which shouldn't happen, but it's always good to
7747c478bd9Sstevel@tonic-gate 	 * check)
7757c478bd9Sstevel@tonic-gate 	 *
7767c478bd9Sstevel@tonic-gate 	 * First, make sure sbrk is sane, and store the current brk in oldbrk.
7777c478bd9Sstevel@tonic-gate 	 */
7787c478bd9Sstevel@tonic-gate 	oldbrk = (uintptr_t)sbrk(0);
779*1d530678Sraf 	if ((void *)oldbrk == (void *)-1)
780*1d530678Sraf 		abort();	/* sbrk is broken -- we're doomed. */
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	/*
7837c478bd9Sstevel@tonic-gate 	 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
7847c478bd9Sstevel@tonic-gate 	 * the percpu structures and cache lists will be properly aligned.
7857c478bd9Sstevel@tonic-gate 	 *
7867c478bd9Sstevel@tonic-gate 	 *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
7877c478bd9Sstevel@tonic-gate 	 *	so they can be paged out individually.
7887c478bd9Sstevel@tonic-gate 	 */
7897c478bd9Sstevel@tonic-gate 	newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
790*1d530678Sraf 	if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
791*1d530678Sraf 		abort();	/* sbrk is broken -- we're doomed. */
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 	/*
7947c478bd9Sstevel@tonic-gate 	 * For each cpu, there is one percpu_t and a list of caches
7957c478bd9Sstevel@tonic-gate 	 */
7967c478bd9Sstevel@tonic-gate 	cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate 	new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	if (new_cpu_list == (percpu_t *)-1 ||
801*1d530678Sraf 	    (uintptr_t)new_cpu_list != newbrk)
802*1d530678Sraf 		abort();	/* sbrk is broken -- we're doomed. */
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate 	/*
8057c478bd9Sstevel@tonic-gate 	 * Finally, align the brk to HUNKSIZE so that all hunks are
8067c478bd9Sstevel@tonic-gate 	 * page-aligned, to avoid edge-effects.
8077c478bd9Sstevel@tonic-gate 	 */
8087c478bd9Sstevel@tonic-gate 
8097c478bd9Sstevel@tonic-gate 	newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
8107c478bd9Sstevel@tonic-gate 
8117c478bd9Sstevel@tonic-gate 	padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
8127c478bd9Sstevel@tonic-gate 
813*1d530678Sraf 	if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
814*1d530678Sraf 		abort();	/* sbrk is broken -- we're doomed. */
8157c478bd9Sstevel@tonic-gate 
8167c478bd9Sstevel@tonic-gate 	list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	/* initialize the percpu list */
8197c478bd9Sstevel@tonic-gate 	for (i = 0; i < ncpus; i++) {
8207c478bd9Sstevel@tonic-gate 		new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
8217c478bd9Sstevel@tonic-gate 		for (j = 0; j < NUM_CACHES; j++) {
8227c478bd9Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_cache = NULL;
8237c478bd9Sstevel@tonic-gate 			new_cpu_list[i].mt_caches[j].mt_hint = NULL;
8247c478bd9Sstevel@tonic-gate 		}
8257c478bd9Sstevel@tonic-gate 
826*1d530678Sraf 		(void) mutex_init(&new_cpu_list[i].mt_parent_lock,
827*1d530678Sraf 		    USYNC_THREAD, NULL);
8287c478bd9Sstevel@tonic-gate 
8297c478bd9Sstevel@tonic-gate 		/* get the correct cache list alignment */
8307c478bd9Sstevel@tonic-gate 		list_addr += CACHELIST_SIZE;
8317c478bd9Sstevel@tonic-gate 	}
8327c478bd9Sstevel@tonic-gate 
8337c478bd9Sstevel@tonic-gate 	/*
8347c478bd9Sstevel@tonic-gate 	 * Initialize oversize listhead
8357c478bd9Sstevel@tonic-gate 	 */
8367c478bd9Sstevel@tonic-gate 	oversize_list.next_bysize = &oversize_list;
8377c478bd9Sstevel@tonic-gate 	oversize_list.prev_bysize = &oversize_list;
8387c478bd9Sstevel@tonic-gate 	oversize_list.next_byaddr = &oversize_list;
8397c478bd9Sstevel@tonic-gate 	oversize_list.prev_byaddr = &oversize_list;
8407c478bd9Sstevel@tonic-gate 	oversize_list.addr = NULL;
8417c478bd9Sstevel@tonic-gate 	oversize_list.size = 0;		/* sentinal */
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate 	/*
844*1d530678Sraf 	 * Now install the global variables.
8457c478bd9Sstevel@tonic-gate 	 */
8467c478bd9Sstevel@tonic-gate 	curcpu = new_curcpu;
8477c478bd9Sstevel@tonic-gate 	cpu_mask = new_cpu_mask;
8487c478bd9Sstevel@tonic-gate 	cpu_list = new_cpu_list;
8497c478bd9Sstevel@tonic-gate }
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate static void
8527c478bd9Sstevel@tonic-gate create_cache(cache_t *cp, size_t size, uint_t chunksize)
8537c478bd9Sstevel@tonic-gate {
8547c478bd9Sstevel@tonic-gate 	long nblocks;
8557c478bd9Sstevel@tonic-gate 
856*1d530678Sraf 	(void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
8577c478bd9Sstevel@tonic-gate 	cp->mt_size = size;
8587c478bd9Sstevel@tonic-gate 	cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
8597c478bd9Sstevel@tonic-gate 	cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
8607c478bd9Sstevel@tonic-gate 	cp->mt_hunks = chunksize;
8617c478bd9Sstevel@tonic-gate 	/*
8627c478bd9Sstevel@tonic-gate 	 * rough calculation. We will need to adjust later.
8637c478bd9Sstevel@tonic-gate 	 */
8647c478bd9Sstevel@tonic-gate 	nblocks = cp->mt_span / cp->mt_size;
8657c478bd9Sstevel@tonic-gate 	nblocks >>= 3;
8667c478bd9Sstevel@tonic-gate 	if (nblocks == 0) { /* less than 8 free blocks in this pool */
8677c478bd9Sstevel@tonic-gate 		int32_t numblocks = 0;
8687c478bd9Sstevel@tonic-gate 		long i = cp->mt_span;
8697c478bd9Sstevel@tonic-gate 		size_t sub = cp->mt_size;
8707c478bd9Sstevel@tonic-gate 		uchar_t mask = 0;
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate 		while (i > sub) {
8737c478bd9Sstevel@tonic-gate 			numblocks++;
8747c478bd9Sstevel@tonic-gate 			i -= sub;
8757c478bd9Sstevel@tonic-gate 		}
8767c478bd9Sstevel@tonic-gate 		nblocks = numblocks;
8777c478bd9Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
8787c478bd9Sstevel@tonic-gate 		cp->mt_nfree = numblocks;
8797c478bd9Sstevel@tonic-gate 		while (numblocks--) {
8807c478bd9Sstevel@tonic-gate 			mask |= 0x80 >> numblocks;
8817c478bd9Sstevel@tonic-gate 		}
8827c478bd9Sstevel@tonic-gate 		*(cp->mt_freelist) = mask;
8837c478bd9Sstevel@tonic-gate 	} else {
8847c478bd9Sstevel@tonic-gate 		cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
8857c478bd9Sstevel@tonic-gate 			nblocks, 32);
8867c478bd9Sstevel@tonic-gate 		/* recompute nblocks */
8877c478bd9Sstevel@tonic-gate 		nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
8887c478bd9Sstevel@tonic-gate 			cp->mt_span - cp->mt_arena) / cp->mt_size;
8897c478bd9Sstevel@tonic-gate 		cp->mt_nfree = ((nblocks >> 3) << 3);
8907c478bd9Sstevel@tonic-gate 		/* Set everything to free */
8917c478bd9Sstevel@tonic-gate 		(void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
8957c478bd9Sstevel@tonic-gate 		copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 	cp->mt_next = NULL;
8987c478bd9Sstevel@tonic-gate }
8997c478bd9Sstevel@tonic-gate 
9007c478bd9Sstevel@tonic-gate static void
9017c478bd9Sstevel@tonic-gate reinit_cpu_list(void)
9027c478bd9Sstevel@tonic-gate {
9037c478bd9Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
9047c478bd9Sstevel@tonic-gate 	percpu_t *cpuptr;
9057c478bd9Sstevel@tonic-gate 	cache_t *thiscache;
9067c478bd9Sstevel@tonic-gate 	cache_head_t *cachehead;
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 	/* Reinitialize free oversize blocks. */
9097c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
9107c478bd9Sstevel@tonic-gate 	if (debugopt & MTDEBUGPATTERN)
9117c478bd9Sstevel@tonic-gate 		for (; wp != &oversize_list; wp = wp->next_bysize)
9127c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, wp->addr, wp->size);
9137c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
9147c478bd9Sstevel@tonic-gate 
9157c478bd9Sstevel@tonic-gate 	/* Reinitialize free blocks. */
9167c478bd9Sstevel@tonic-gate 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
9177c478bd9Sstevel@tonic-gate 		(void) mutex_lock(&cpuptr->mt_parent_lock);
9187c478bd9Sstevel@tonic-gate 		for (cachehead = &cpuptr->mt_caches[0]; cachehead <
9197c478bd9Sstevel@tonic-gate 			&cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
9207c478bd9Sstevel@tonic-gate 			for (thiscache = cachehead->mt_cache; thiscache != NULL;
9217c478bd9Sstevel@tonic-gate 				thiscache = thiscache->mt_next) {
9227c478bd9Sstevel@tonic-gate 				(void) mutex_lock(&thiscache->mt_cache_lock);
9237c478bd9Sstevel@tonic-gate 				if (thiscache->mt_nfree == 0) {
9247c478bd9Sstevel@tonic-gate 					(void) mutex_unlock(
9257c478bd9Sstevel@tonic-gate 					    &thiscache->mt_cache_lock);
9267c478bd9Sstevel@tonic-gate 					continue;
9277c478bd9Sstevel@tonic-gate 				}
9287c478bd9Sstevel@tonic-gate 				if (thiscache != NULL)
9297c478bd9Sstevel@tonic-gate 					reinit_cache(thiscache);
9307c478bd9Sstevel@tonic-gate 				(void) mutex_unlock(&thiscache->mt_cache_lock);
9317c478bd9Sstevel@tonic-gate 			}
9327c478bd9Sstevel@tonic-gate 		}
9337c478bd9Sstevel@tonic-gate 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
9347c478bd9Sstevel@tonic-gate 	}
9357c478bd9Sstevel@tonic-gate 	reinit = 0;
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate static void
9397c478bd9Sstevel@tonic-gate reinit_cache(cache_t *thiscache)
9407c478bd9Sstevel@tonic-gate {
9417c478bd9Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
9427c478bd9Sstevel@tonic-gate 	int32_t i, n;
9437c478bd9Sstevel@tonic-gate 	caddr_t ret;
9447c478bd9Sstevel@tonic-gate 
9457c478bd9Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
9467c478bd9Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
9477c478bd9Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff) {
9487c478bd9Sstevel@tonic-gate 		    for (i = 0; i < 32; i++) {
9497c478bd9Sstevel@tonic-gate 			if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
9507c478bd9Sstevel@tonic-gate 				n = (uintptr_t)(((freeblocks -
9517c478bd9Sstevel@tonic-gate 				    (uint32_t *)thiscache->mt_freelist) << 5)
9527c478bd9Sstevel@tonic-gate 				    + i) * thiscache->mt_size;
9537c478bd9Sstevel@tonic-gate 				ret = thiscache->mt_arena + n;
9547c478bd9Sstevel@tonic-gate 				ret += OVERHEAD;
9557c478bd9Sstevel@tonic-gate 				copy_pattern(FREEPATTERN, ret,
9567c478bd9Sstevel@tonic-gate 				    thiscache->mt_size);
9577c478bd9Sstevel@tonic-gate 			}
9587c478bd9Sstevel@tonic-gate 		    }
9597c478bd9Sstevel@tonic-gate 		}
9607c478bd9Sstevel@tonic-gate 		freeblocks++;
9617c478bd9Sstevel@tonic-gate 	}
9627c478bd9Sstevel@tonic-gate }
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate static void *
9657c478bd9Sstevel@tonic-gate malloc_internal(size_t size, percpu_t *cpuptr)
9667c478bd9Sstevel@tonic-gate {
9677c478bd9Sstevel@tonic-gate 	cache_head_t *cachehead;
9687c478bd9Sstevel@tonic-gate 	cache_t *thiscache, *hintcache;
9697c478bd9Sstevel@tonic-gate 	int32_t i, n, logsz, bucket;
9707c478bd9Sstevel@tonic-gate 	uint32_t index;
9717c478bd9Sstevel@tonic-gate 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
9727c478bd9Sstevel@tonic-gate 	caddr_t ret;
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate 	logsz = MIN_CACHED_SHIFT;
9757c478bd9Sstevel@tonic-gate 
9767c478bd9Sstevel@tonic-gate 	while (size > (1 << logsz))
9777c478bd9Sstevel@tonic-gate 		logsz++;
9787c478bd9Sstevel@tonic-gate 
9797c478bd9Sstevel@tonic-gate 	bucket = logsz - MIN_CACHED_SHIFT;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&cpuptr->mt_parent_lock);
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	/*
9847c478bd9Sstevel@tonic-gate 	 * Find a cache of the appropriate size with free buffers.
9857c478bd9Sstevel@tonic-gate 	 *
9867c478bd9Sstevel@tonic-gate 	 * We don't need to lock each cache as we check their mt_nfree count,
9877c478bd9Sstevel@tonic-gate 	 * since:
9887c478bd9Sstevel@tonic-gate 	 *	1.  We are only looking for caches with mt_nfree > 0.  If a
9897c478bd9Sstevel@tonic-gate 	 *	   free happens during our search, it will increment mt_nfree,
9907c478bd9Sstevel@tonic-gate 	 *	   which will not effect the test.
9917c478bd9Sstevel@tonic-gate 	 *	2.  Allocations can decrement mt_nfree, but they can't happen
9927c478bd9Sstevel@tonic-gate 	 *	   as long as we hold mt_parent_lock.
9937c478bd9Sstevel@tonic-gate 	 */
9947c478bd9Sstevel@tonic-gate 
9957c478bd9Sstevel@tonic-gate 	cachehead = &cpuptr->mt_caches[bucket];
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate 	/* Search through the list, starting at the mt_hint */
9987c478bd9Sstevel@tonic-gate 	thiscache = cachehead->mt_hint;
9997c478bd9Sstevel@tonic-gate 
10007c478bd9Sstevel@tonic-gate 	while (thiscache != NULL && thiscache->mt_nfree == 0)
10017c478bd9Sstevel@tonic-gate 		thiscache = thiscache->mt_next;
10027c478bd9Sstevel@tonic-gate 
10037c478bd9Sstevel@tonic-gate 	if (thiscache == NULL) {
10047c478bd9Sstevel@tonic-gate 		/* wrap around -- search up to the hint */
10057c478bd9Sstevel@tonic-gate 		thiscache = cachehead->mt_cache;
10067c478bd9Sstevel@tonic-gate 		hintcache = cachehead->mt_hint;
10077c478bd9Sstevel@tonic-gate 
10087c478bd9Sstevel@tonic-gate 		while (thiscache != NULL && thiscache != hintcache &&
10097c478bd9Sstevel@tonic-gate 		    thiscache->mt_nfree == 0)
10107c478bd9Sstevel@tonic-gate 			thiscache = thiscache->mt_next;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 		if (thiscache == hintcache)
10137c478bd9Sstevel@tonic-gate 			thiscache = NULL;
10147c478bd9Sstevel@tonic-gate 	}
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	if (thiscache == NULL) { /* there are no free caches */
10187c478bd9Sstevel@tonic-gate 		int32_t thisrequest = requestsize;
10197c478bd9Sstevel@tonic-gate 		int32_t buffer_size = (1 << logsz) + OVERHEAD;
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate 		thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate 		if (thiscache == (cache_t *)-1) {
10247c478bd9Sstevel@tonic-gate 		    (void) mutex_unlock(&cpuptr->mt_parent_lock);
10257c478bd9Sstevel@tonic-gate 		    errno = EAGAIN;
10267c478bd9Sstevel@tonic-gate 		    return (NULL);
10277c478bd9Sstevel@tonic-gate 		}
10287c478bd9Sstevel@tonic-gate 		create_cache(thiscache, buffer_size, thisrequest);
10297c478bd9Sstevel@tonic-gate 
10307c478bd9Sstevel@tonic-gate 		/* link in the new block at the beginning of the list */
10317c478bd9Sstevel@tonic-gate 		thiscache->mt_next = cachehead->mt_cache;
10327c478bd9Sstevel@tonic-gate 		cachehead->mt_cache = thiscache;
10337c478bd9Sstevel@tonic-gate 	}
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	/* update the hint to the cache we found or created */
10367c478bd9Sstevel@tonic-gate 	cachehead->mt_hint = thiscache;
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate 	/* thiscache now points to a cache with available space */
10397c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&thiscache->mt_cache_lock);
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	freeblocks = (uint32_t *)thiscache->mt_freelist;
10427c478bd9Sstevel@tonic-gate 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
10437c478bd9Sstevel@tonic-gate 		if (*freeblocks & 0xffffffff)
10447c478bd9Sstevel@tonic-gate 			break;
10457c478bd9Sstevel@tonic-gate 		freeblocks++;
10467c478bd9Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10477c478bd9Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10487c478bd9Sstevel@tonic-gate 			break;
10497c478bd9Sstevel@tonic-gate 		freeblocks++;
10507c478bd9Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10517c478bd9Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10527c478bd9Sstevel@tonic-gate 			break;
10537c478bd9Sstevel@tonic-gate 		freeblocks++;
10547c478bd9Sstevel@tonic-gate 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
10557c478bd9Sstevel@tonic-gate 		    *freeblocks & 0xffffffff)
10567c478bd9Sstevel@tonic-gate 			break;
10577c478bd9Sstevel@tonic-gate 		freeblocks++;
10587c478bd9Sstevel@tonic-gate 	}
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	/*
10617c478bd9Sstevel@tonic-gate 	 * the offset from mt_freelist to freeblocks is the offset into
10627c478bd9Sstevel@tonic-gate 	 * the arena. Be sure to include the offset into freeblocks
10637c478bd9Sstevel@tonic-gate 	 * of the bitmask. n is the offset.
10647c478bd9Sstevel@tonic-gate 	 */
10657c478bd9Sstevel@tonic-gate 	for (i = 0; i < 32; ) {
10667c478bd9Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10677c478bd9Sstevel@tonic-gate 			break;
10687c478bd9Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10697c478bd9Sstevel@tonic-gate 			break;
10707c478bd9Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10717c478bd9Sstevel@tonic-gate 			break;
10727c478bd9Sstevel@tonic-gate 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
10737c478bd9Sstevel@tonic-gate 			break;
10747c478bd9Sstevel@tonic-gate 	}
10757c478bd9Sstevel@tonic-gate 	index = 0x80000000 >> --i;
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 
10787c478bd9Sstevel@tonic-gate 	*freeblocks &= FLIP_EM(~index);
10797c478bd9Sstevel@tonic-gate 
10807c478bd9Sstevel@tonic-gate 	thiscache->mt_nfree--;
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&thiscache->mt_cache_lock);
10837c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&cpuptr->mt_parent_lock);
10847c478bd9Sstevel@tonic-gate 
10857c478bd9Sstevel@tonic-gate 	n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
10867c478bd9Sstevel@tonic-gate 		+ i) * thiscache->mt_size;
10877c478bd9Sstevel@tonic-gate 	/*
10887c478bd9Sstevel@tonic-gate 	 * Now you have the offset in n, you've changed the free mask
10897c478bd9Sstevel@tonic-gate 	 * in the freelist. Nothing left to do but find the block
10907c478bd9Sstevel@tonic-gate 	 * in the arena and put the value of thiscache in the word
10917c478bd9Sstevel@tonic-gate 	 * ahead of the handed out address and return the memory
10927c478bd9Sstevel@tonic-gate 	 * back to the user.
10937c478bd9Sstevel@tonic-gate 	 */
10947c478bd9Sstevel@tonic-gate 	ret = thiscache->mt_arena + n;
10957c478bd9Sstevel@tonic-gate 
10967c478bd9Sstevel@tonic-gate 	/* Store the cache addr for this buf. Makes free go fast. */
10977c478bd9Sstevel@tonic-gate 	*(uintptr_t *)ret = (uintptr_t)thiscache;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	/*
11007c478bd9Sstevel@tonic-gate 	 * This assert makes sure we don't hand out memory that is not
11017c478bd9Sstevel@tonic-gate 	 * owned by this cache.
11027c478bd9Sstevel@tonic-gate 	 */
11037c478bd9Sstevel@tonic-gate 	assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
11047c478bd9Sstevel@tonic-gate 		thiscache->mt_span);
11057c478bd9Sstevel@tonic-gate 
11067c478bd9Sstevel@tonic-gate 	ret += OVERHEAD;
11077c478bd9Sstevel@tonic-gate 
11087c478bd9Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
11117c478bd9Sstevel@tonic-gate 		if (verify_pattern(FREEPATTERN, ret, size))
11127c478bd9Sstevel@tonic-gate 			abort();	/* reference after free */
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
11157c478bd9Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
11167c478bd9Sstevel@tonic-gate 	return ((void *)ret);
11177c478bd9Sstevel@tonic-gate }
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate static void *
11207c478bd9Sstevel@tonic-gate morecore(size_t bytes)
11217c478bd9Sstevel@tonic-gate {
11227c478bd9Sstevel@tonic-gate 	void * ret;
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	if (bytes > LONG_MAX) {
11257c478bd9Sstevel@tonic-gate 		intptr_t wad;
11267c478bd9Sstevel@tonic-gate 		/*
11277c478bd9Sstevel@tonic-gate 		 * The request size is too big. We need to do this in
11287c478bd9Sstevel@tonic-gate 		 * chunks. Sbrk only takes an int for an arg.
11297c478bd9Sstevel@tonic-gate 		 */
11307c478bd9Sstevel@tonic-gate 		if (bytes == ULONG_MAX)
11317c478bd9Sstevel@tonic-gate 			return ((void *)-1);
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate 		ret = sbrk(0);
11347c478bd9Sstevel@tonic-gate 		wad = LONG_MAX;
11357c478bd9Sstevel@tonic-gate 		while (wad > 0) {
11367c478bd9Sstevel@tonic-gate 			if (sbrk(wad) == (void *)-1) {
11377c478bd9Sstevel@tonic-gate 				if (ret != sbrk(0))
11387c478bd9Sstevel@tonic-gate 					(void) sbrk(-LONG_MAX);
11397c478bd9Sstevel@tonic-gate 				return ((void *)-1);
11407c478bd9Sstevel@tonic-gate 			}
11417c478bd9Sstevel@tonic-gate 			bytes -= LONG_MAX;
11427c478bd9Sstevel@tonic-gate 			wad = bytes;
11437c478bd9Sstevel@tonic-gate 		}
11447c478bd9Sstevel@tonic-gate 	} else
11457c478bd9Sstevel@tonic-gate 		ret = sbrk(bytes);
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 	return (ret);
11487c478bd9Sstevel@tonic-gate }
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate 
11517c478bd9Sstevel@tonic-gate static void *
11527c478bd9Sstevel@tonic-gate oversize(size_t size)
11537c478bd9Sstevel@tonic-gate {
11547c478bd9Sstevel@tonic-gate 	caddr_t ret;
11557c478bd9Sstevel@tonic-gate 	oversize_t *big;
11567c478bd9Sstevel@tonic-gate 	int bucket;
11577c478bd9Sstevel@tonic-gate 
115870911a0dSrm88369 	/* make sure we will not overflow */
115970911a0dSrm88369 	if (size > MAX_MTMALLOC) {
116070911a0dSrm88369 		errno = ENOMEM;
116170911a0dSrm88369 		return (NULL);
116270911a0dSrm88369 	}
11637c478bd9Sstevel@tonic-gate 
11647c478bd9Sstevel@tonic-gate 	/*
11657c478bd9Sstevel@tonic-gate 	 * Since we ensure every address we hand back is
11667c478bd9Sstevel@tonic-gate 	 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
11677c478bd9Sstevel@tonic-gate 	 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
11687c478bd9Sstevel@tonic-gate 	 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
11697c478bd9Sstevel@tonic-gate 	 * particularly where coalescing occurs.
11707c478bd9Sstevel@tonic-gate 	 */
11717c478bd9Sstevel@tonic-gate 	size = ALIGN(size, MTMALLOC_MIN_ALIGN);
11727c478bd9Sstevel@tonic-gate 
117370911a0dSrm88369 	/*
117470911a0dSrm88369 	 * The idea with the global lock is that we are sure to
117570911a0dSrm88369 	 * block in the kernel anyway since given an oversize alloc
117670911a0dSrm88369 	 * we are sure to have to call morecore();
117770911a0dSrm88369 	 */
117870911a0dSrm88369 	(void) mutex_lock(&oversize_lock);
117970911a0dSrm88369 
11807c478bd9Sstevel@tonic-gate 	if ((big = find_oversize(size)) != NULL) {
11817c478bd9Sstevel@tonic-gate 		if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
11827c478bd9Sstevel@tonic-gate 			if (verify_pattern(FREEPATTERN, big->addr, size))
11837c478bd9Sstevel@tonic-gate 				abort();	/* reference after free */
11847c478bd9Sstevel@tonic-gate 	} else {
11857c478bd9Sstevel@tonic-gate 		/* Get more 8-byte aligned memory from heap */
11867c478bd9Sstevel@tonic-gate 		ret = morecore(size + OVSZ_HEADER_SIZE);
11877c478bd9Sstevel@tonic-gate 		if (ret == (caddr_t)-1) {
11887c478bd9Sstevel@tonic-gate 			(void) mutex_unlock(&oversize_lock);
11897c478bd9Sstevel@tonic-gate 			errno = ENOMEM;
11907c478bd9Sstevel@tonic-gate 			return (NULL);
11917c478bd9Sstevel@tonic-gate 		}
11927c478bd9Sstevel@tonic-gate 		big = oversize_header_alloc((uintptr_t)ret, size);
11937c478bd9Sstevel@tonic-gate 	}
11947c478bd9Sstevel@tonic-gate 	ret = big->addr;
11957c478bd9Sstevel@tonic-gate 
11967c478bd9Sstevel@tonic-gate 	/* Add big to the hash table at the head of the relevant bucket. */
11977c478bd9Sstevel@tonic-gate 	bucket = HASH_OVERSIZE(ret);
11987c478bd9Sstevel@tonic-gate 	big->hash_next = ovsz_hashtab[bucket];
11997c478bd9Sstevel@tonic-gate 	ovsz_hashtab[bucket] = big;
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 	if (debugopt & MTINITBUFFER)
12027c478bd9Sstevel@tonic-gate 		copy_pattern(INITPATTERN, ret, size);
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
12057c478bd9Sstevel@tonic-gate 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
12067c478bd9Sstevel@tonic-gate 	return ((void *)ret);
12077c478bd9Sstevel@tonic-gate }
12087c478bd9Sstevel@tonic-gate 
12097c478bd9Sstevel@tonic-gate static void
12107c478bd9Sstevel@tonic-gate insert_oversize(oversize_t *op, oversize_t *nx)
12117c478bd9Sstevel@tonic-gate {
12127c478bd9Sstevel@tonic-gate 	oversize_t *sp;
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 	/* locate correct insertion point in size-ordered list */
12157c478bd9Sstevel@tonic-gate 	for (sp = oversize_list.next_bysize;
12167c478bd9Sstevel@tonic-gate 	    sp != &oversize_list && (op->size > sp->size);
12177c478bd9Sstevel@tonic-gate 	    sp = sp->next_bysize)
12187c478bd9Sstevel@tonic-gate 		;
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 	/* link into size-ordered list */
12217c478bd9Sstevel@tonic-gate 	op->next_bysize = sp;
12227c478bd9Sstevel@tonic-gate 	op->prev_bysize = sp->prev_bysize;
12237c478bd9Sstevel@tonic-gate 	op->prev_bysize->next_bysize = op;
12247c478bd9Sstevel@tonic-gate 	op->next_bysize->prev_bysize = op;
12257c478bd9Sstevel@tonic-gate 
12267c478bd9Sstevel@tonic-gate 	/*
12277c478bd9Sstevel@tonic-gate 	 * link item into address-ordered list
12287c478bd9Sstevel@tonic-gate 	 * (caller provides insertion point as an optimization)
12297c478bd9Sstevel@tonic-gate 	 */
12307c478bd9Sstevel@tonic-gate 	op->next_byaddr = nx;
12317c478bd9Sstevel@tonic-gate 	op->prev_byaddr = nx->prev_byaddr;
12327c478bd9Sstevel@tonic-gate 	op->prev_byaddr->next_byaddr = op;
12337c478bd9Sstevel@tonic-gate 	op->next_byaddr->prev_byaddr = op;
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate }
12367c478bd9Sstevel@tonic-gate 
12377c478bd9Sstevel@tonic-gate static void
12387c478bd9Sstevel@tonic-gate unlink_oversize(oversize_t *lp)
12397c478bd9Sstevel@tonic-gate {
12407c478bd9Sstevel@tonic-gate 	/* unlink from address list */
12417c478bd9Sstevel@tonic-gate 	lp->prev_byaddr->next_byaddr = lp->next_byaddr;
12427c478bd9Sstevel@tonic-gate 	lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
12437c478bd9Sstevel@tonic-gate 
12447c478bd9Sstevel@tonic-gate 	/* unlink from size list */
12457c478bd9Sstevel@tonic-gate 	lp->prev_bysize->next_bysize = lp->next_bysize;
12467c478bd9Sstevel@tonic-gate 	lp->next_bysize->prev_bysize = lp->prev_bysize;
12477c478bd9Sstevel@tonic-gate }
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate static void
12507c478bd9Sstevel@tonic-gate position_oversize_by_size(oversize_t *op)
12517c478bd9Sstevel@tonic-gate {
12527c478bd9Sstevel@tonic-gate 	oversize_t *sp;
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate 	if (op->size > op->next_bysize->size ||
12557c478bd9Sstevel@tonic-gate 	    op->size < op->prev_bysize->size) {
12567c478bd9Sstevel@tonic-gate 
12577c478bd9Sstevel@tonic-gate 		/* unlink from size list */
12587c478bd9Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op->next_bysize;
12597c478bd9Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op->prev_bysize;
12607c478bd9Sstevel@tonic-gate 
12617c478bd9Sstevel@tonic-gate 		/* locate correct insertion point in size-ordered list */
12627c478bd9Sstevel@tonic-gate 		for (sp = oversize_list.next_bysize;
12637c478bd9Sstevel@tonic-gate 		    sp != &oversize_list && (op->size > sp->size);
12647c478bd9Sstevel@tonic-gate 		    sp = sp->next_bysize)
12657c478bd9Sstevel@tonic-gate 			;
12667c478bd9Sstevel@tonic-gate 
12677c478bd9Sstevel@tonic-gate 		/* link into size-ordered list */
12687c478bd9Sstevel@tonic-gate 		op->next_bysize = sp;
12697c478bd9Sstevel@tonic-gate 		op->prev_bysize = sp->prev_bysize;
12707c478bd9Sstevel@tonic-gate 		op->prev_bysize->next_bysize = op;
12717c478bd9Sstevel@tonic-gate 		op->next_bysize->prev_bysize = op;
12727c478bd9Sstevel@tonic-gate 	}
12737c478bd9Sstevel@tonic-gate }
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate static void
12767c478bd9Sstevel@tonic-gate add_oversize(oversize_t *lp)
12777c478bd9Sstevel@tonic-gate {
12787c478bd9Sstevel@tonic-gate 	int merge_flags = INSERT_ONLY;
12797c478bd9Sstevel@tonic-gate 	oversize_t *nx;  	/* ptr to item right of insertion point */
12807c478bd9Sstevel@tonic-gate 	oversize_t *pv;  	/* ptr to item left of insertion point */
12817c478bd9Sstevel@tonic-gate 	uint_t size_lp, size_pv, size_nx;
12827c478bd9Sstevel@tonic-gate 	uintptr_t endp_lp, endp_pv, endp_nx;
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	/*
12857c478bd9Sstevel@tonic-gate 	 * Locate insertion point in address-ordered list
12867c478bd9Sstevel@tonic-gate 	 */
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate 	for (nx = oversize_list.next_byaddr;
12897c478bd9Sstevel@tonic-gate 	    nx != &oversize_list && (lp->addr > nx->addr);
12907c478bd9Sstevel@tonic-gate 	    nx = nx->next_byaddr)
12917c478bd9Sstevel@tonic-gate 		;
12927c478bd9Sstevel@tonic-gate 
12937c478bd9Sstevel@tonic-gate 	/*
12947c478bd9Sstevel@tonic-gate 	 * Determine how to add chunk to oversize freelist
12957c478bd9Sstevel@tonic-gate 	 */
12967c478bd9Sstevel@tonic-gate 
12977c478bd9Sstevel@tonic-gate 	size_lp = OVSZ_HEADER_SIZE + lp->size;
12987c478bd9Sstevel@tonic-gate 	endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
12997c478bd9Sstevel@tonic-gate 	size_lp = endp_lp - (uintptr_t)lp;
13007c478bd9Sstevel@tonic-gate 
13017c478bd9Sstevel@tonic-gate 	pv = nx->prev_byaddr;
13027c478bd9Sstevel@tonic-gate 
13037c478bd9Sstevel@tonic-gate 	if (pv->size) {
13047c478bd9Sstevel@tonic-gate 
13057c478bd9Sstevel@tonic-gate 		size_pv = OVSZ_HEADER_SIZE + pv->size;
13067c478bd9Sstevel@tonic-gate 		endp_pv = ALIGN((uintptr_t)pv + size_pv,
13077c478bd9Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13087c478bd9Sstevel@tonic-gate 		size_pv = endp_pv - (uintptr_t)pv;
13097c478bd9Sstevel@tonic-gate 
13107c478bd9Sstevel@tonic-gate 		/* Check for adjacency with left chunk */
13117c478bd9Sstevel@tonic-gate 		if ((uintptr_t)lp == endp_pv)
13127c478bd9Sstevel@tonic-gate 			merge_flags |= COALESCE_LEFT;
13137c478bd9Sstevel@tonic-gate 	}
13147c478bd9Sstevel@tonic-gate 
13157c478bd9Sstevel@tonic-gate 	if (nx->size) {
13167c478bd9Sstevel@tonic-gate 
13177c478bd9Sstevel@tonic-gate 	    /* Check for adjacency with right chunk */
13187c478bd9Sstevel@tonic-gate 	    if ((uintptr_t)nx == endp_lp) {
13197c478bd9Sstevel@tonic-gate 		size_nx = OVSZ_HEADER_SIZE + nx->size;
13207c478bd9Sstevel@tonic-gate 		endp_nx = ALIGN((uintptr_t)nx + size_nx,
13217c478bd9Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13227c478bd9Sstevel@tonic-gate 		size_nx = endp_nx - (uintptr_t)nx;
13237c478bd9Sstevel@tonic-gate 		merge_flags |= COALESCE_RIGHT;
13247c478bd9Sstevel@tonic-gate 	    }
13257c478bd9Sstevel@tonic-gate 	}
13267c478bd9Sstevel@tonic-gate 
13277c478bd9Sstevel@tonic-gate 	/*
13287c478bd9Sstevel@tonic-gate 	 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
13297c478bd9Sstevel@tonic-gate 	 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
13307c478bd9Sstevel@tonic-gate 	 * header(s) that will also become part of the memory available for
13317c478bd9Sstevel@tonic-gate 	 * reallocation (ie lp and/or nx) must also be overwritten with
13327c478bd9Sstevel@tonic-gate 	 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
13337c478bd9Sstevel@tonic-gate 	 */
13347c478bd9Sstevel@tonic-gate 	switch (merge_flags) {
13357c478bd9Sstevel@tonic-gate 
13367c478bd9Sstevel@tonic-gate 	case INSERT_ONLY:		/* Coalescing not possible */
13377c478bd9Sstevel@tonic-gate 		insert_oversize(lp, nx);
13387c478bd9Sstevel@tonic-gate 		break;
13397c478bd9Sstevel@tonic-gate 	case COALESCE_LEFT:
13407c478bd9Sstevel@tonic-gate 		pv->size += size_lp;
13417c478bd9Sstevel@tonic-gate 		position_oversize_by_size(pv);
13427c478bd9Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
13437c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
13447c478bd9Sstevel@tonic-gate 		break;
13457c478bd9Sstevel@tonic-gate 	case COALESCE_RIGHT:
13467c478bd9Sstevel@tonic-gate 		unlink_oversize(nx);
13477c478bd9Sstevel@tonic-gate 		lp->size += size_nx;
13487c478bd9Sstevel@tonic-gate 		insert_oversize(lp, pv->next_byaddr);
13497c478bd9Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN)
13507c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
13517c478bd9Sstevel@tonic-gate 		break;
13527c478bd9Sstevel@tonic-gate 	case COALESCE_WITH_BOTH_SIDES:	/* Merge (with right) to the left */
13537c478bd9Sstevel@tonic-gate 		pv->size += size_lp + size_nx;
13547c478bd9Sstevel@tonic-gate 		unlink_oversize(nx);
13557c478bd9Sstevel@tonic-gate 		position_oversize_by_size(pv);
13567c478bd9Sstevel@tonic-gate 		if (debugopt & MTDEBUGPATTERN) {
13577c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
13587c478bd9Sstevel@tonic-gate 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
13597c478bd9Sstevel@tonic-gate 		}
13607c478bd9Sstevel@tonic-gate 		break;
13617c478bd9Sstevel@tonic-gate 	}
13627c478bd9Sstevel@tonic-gate }
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate /*
13657c478bd9Sstevel@tonic-gate  * Find memory on our list that is at least size big. If we find a block that is
13667c478bd9Sstevel@tonic-gate  * big enough, we break it up and return the associated oversize_t struct back
13677c478bd9Sstevel@tonic-gate  * to the calling client. Any leftover piece of that block is returned to the
13687c478bd9Sstevel@tonic-gate  * freelist.
13697c478bd9Sstevel@tonic-gate  */
13707c478bd9Sstevel@tonic-gate static oversize_t *
13717c478bd9Sstevel@tonic-gate find_oversize(size_t size)
13727c478bd9Sstevel@tonic-gate {
13737c478bd9Sstevel@tonic-gate 	oversize_t *wp = oversize_list.next_bysize;
13747c478bd9Sstevel@tonic-gate 	while (wp != &oversize_list && size > wp->size)
13757c478bd9Sstevel@tonic-gate 		wp = wp->next_bysize;
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate 	if (wp == &oversize_list) /* empty list or nothing big enough */
13787c478bd9Sstevel@tonic-gate 		return (NULL);
13797c478bd9Sstevel@tonic-gate 	/* breaking up a chunk of memory */
13807c478bd9Sstevel@tonic-gate 	if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
13817c478bd9Sstevel@tonic-gate 	    > MAX_CACHED) {
13827c478bd9Sstevel@tonic-gate 		caddr_t off;
13837c478bd9Sstevel@tonic-gate 		oversize_t *np;
13847c478bd9Sstevel@tonic-gate 		size_t osize;
13857c478bd9Sstevel@tonic-gate 		off = (caddr_t)ALIGN(wp->addr + size,
13867c478bd9Sstevel@tonic-gate 		    MTMALLOC_MIN_ALIGN);
13877c478bd9Sstevel@tonic-gate 		osize = wp->size;
13887c478bd9Sstevel@tonic-gate 		wp->size = (size_t)(off - wp->addr);
13897c478bd9Sstevel@tonic-gate 		np = oversize_header_alloc((uintptr_t)off,
13907c478bd9Sstevel@tonic-gate 		    osize - (wp->size + OVSZ_HEADER_SIZE));
13917c478bd9Sstevel@tonic-gate 		if ((long)np->size < 0)
13927c478bd9Sstevel@tonic-gate 			abort();
13937c478bd9Sstevel@tonic-gate 		unlink_oversize(wp);
13947c478bd9Sstevel@tonic-gate 		add_oversize(np);
13957c478bd9Sstevel@tonic-gate 	} else {
13967c478bd9Sstevel@tonic-gate 		unlink_oversize(wp);
13977c478bd9Sstevel@tonic-gate 	}
13987c478bd9Sstevel@tonic-gate 	return (wp);
13997c478bd9Sstevel@tonic-gate }
14007c478bd9Sstevel@tonic-gate 
14017c478bd9Sstevel@tonic-gate static void
14027c478bd9Sstevel@tonic-gate copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
14037c478bd9Sstevel@tonic-gate {
14047c478bd9Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14057c478bd9Sstevel@tonic-gate 	uint32_t *buf = buf_arg;
14067c478bd9Sstevel@tonic-gate 
14077c478bd9Sstevel@tonic-gate 	while (buf < bufend - 3) {
14087c478bd9Sstevel@tonic-gate 		buf[3] = buf[2] = buf[1] = buf[0] = pattern;
14097c478bd9Sstevel@tonic-gate 		buf += 4;
14107c478bd9Sstevel@tonic-gate 	}
14117c478bd9Sstevel@tonic-gate 	while (buf < bufend)
14127c478bd9Sstevel@tonic-gate 		*buf++ = pattern;
14137c478bd9Sstevel@tonic-gate }
14147c478bd9Sstevel@tonic-gate 
14157c478bd9Sstevel@tonic-gate static void *
14167c478bd9Sstevel@tonic-gate verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
14177c478bd9Sstevel@tonic-gate {
14187c478bd9Sstevel@tonic-gate 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
14197c478bd9Sstevel@tonic-gate 	uint32_t *buf;
14207c478bd9Sstevel@tonic-gate 
14217c478bd9Sstevel@tonic-gate 	for (buf = buf_arg; buf < bufend; buf++)
14227c478bd9Sstevel@tonic-gate 		if (*buf != pattern)
14237c478bd9Sstevel@tonic-gate 			return (buf);
14247c478bd9Sstevel@tonic-gate 	return (NULL);
14257c478bd9Sstevel@tonic-gate }
14267c478bd9Sstevel@tonic-gate 
14277c478bd9Sstevel@tonic-gate static void
14287c478bd9Sstevel@tonic-gate free_oversize(oversize_t *ovp)
14297c478bd9Sstevel@tonic-gate {
14307c478bd9Sstevel@tonic-gate 	assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
14317c478bd9Sstevel@tonic-gate 	assert(ovp->size > MAX_CACHED);
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 	ovp->next_bysize = ovp->prev_bysize = NULL;
14347c478bd9Sstevel@tonic-gate 	ovp->next_byaddr = ovp->prev_byaddr = NULL;
14357c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&oversize_lock);
14367c478bd9Sstevel@tonic-gate 	add_oversize(ovp);
14377c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&oversize_lock);
14387c478bd9Sstevel@tonic-gate }
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate static oversize_t *
14417c478bd9Sstevel@tonic-gate oversize_header_alloc(uintptr_t mem, size_t size)
14427c478bd9Sstevel@tonic-gate {
14437c478bd9Sstevel@tonic-gate 	oversize_t *ovsz_hdr;
14447c478bd9Sstevel@tonic-gate 
14457c478bd9Sstevel@tonic-gate 	assert(size > MAX_CACHED);
14467c478bd9Sstevel@tonic-gate 
14477c478bd9Sstevel@tonic-gate 	ovsz_hdr = (oversize_t *)mem;
14487c478bd9Sstevel@tonic-gate 	ovsz_hdr->prev_bysize = NULL;
14497c478bd9Sstevel@tonic-gate 	ovsz_hdr->next_bysize = NULL;
14507c478bd9Sstevel@tonic-gate 	ovsz_hdr->prev_byaddr = NULL;
14517c478bd9Sstevel@tonic-gate 	ovsz_hdr->next_byaddr = NULL;
14527c478bd9Sstevel@tonic-gate 	ovsz_hdr->hash_next = NULL;
14537c478bd9Sstevel@tonic-gate 	ovsz_hdr->size = size;
14547c478bd9Sstevel@tonic-gate 	mem += OVSZ_SIZE;
14557c478bd9Sstevel@tonic-gate 	*(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
14567c478bd9Sstevel@tonic-gate 	mem += OVERHEAD;
14577c478bd9Sstevel@tonic-gate 	assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
14587c478bd9Sstevel@tonic-gate 	ovsz_hdr->addr = (caddr_t)mem;
14597c478bd9Sstevel@tonic-gate 	return (ovsz_hdr);
14607c478bd9Sstevel@tonic-gate }
1461*1d530678Sraf 
1462*1d530678Sraf static void
1463*1d530678Sraf malloc_prepare()
1464*1d530678Sraf {
1465*1d530678Sraf 	percpu_t *cpuptr;
1466*1d530678Sraf 	cache_head_t *cachehead;
1467*1d530678Sraf 	cache_t *thiscache;
1468*1d530678Sraf 
1469*1d530678Sraf 	(void) mutex_lock(&oversize_lock);
1470*1d530678Sraf 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
1471*1d530678Sraf 		(void) mutex_lock(&cpuptr->mt_parent_lock);
1472*1d530678Sraf 		for (cachehead = &cpuptr->mt_caches[0];
1473*1d530678Sraf 		    cachehead < &cpuptr->mt_caches[NUM_CACHES];
1474*1d530678Sraf 		    cachehead++) {
1475*1d530678Sraf 			for (thiscache = cachehead->mt_cache;
1476*1d530678Sraf 			    thiscache != NULL;
1477*1d530678Sraf 			    thiscache = thiscache->mt_next) {
1478*1d530678Sraf 				(void) mutex_lock(
1479*1d530678Sraf 				    &thiscache->mt_cache_lock);
1480*1d530678Sraf 			}
1481*1d530678Sraf 		}
1482*1d530678Sraf 	}
1483*1d530678Sraf }
1484*1d530678Sraf 
1485*1d530678Sraf static void
1486*1d530678Sraf malloc_release()
1487*1d530678Sraf {
1488*1d530678Sraf 	percpu_t *cpuptr;
1489*1d530678Sraf 	cache_head_t *cachehead;
1490*1d530678Sraf 	cache_t *thiscache;
1491*1d530678Sraf 
1492*1d530678Sraf 	for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
1493*1d530678Sraf 		for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
1494*1d530678Sraf 		    cachehead >= &cpuptr->mt_caches[0];
1495*1d530678Sraf 		    cachehead--) {
1496*1d530678Sraf 			for (thiscache = cachehead->mt_cache;
1497*1d530678Sraf 			    thiscache != NULL;
1498*1d530678Sraf 			    thiscache = thiscache->mt_next) {
1499*1d530678Sraf 				(void) mutex_unlock(
1500*1d530678Sraf 				    &thiscache->mt_cache_lock);
1501*1d530678Sraf 			}
1502*1d530678Sraf 		}
1503*1d530678Sraf 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
1504*1d530678Sraf 	}
1505*1d530678Sraf 	(void) mutex_unlock(&oversize_lock);
1506*1d530678Sraf }
1507*1d530678Sraf 
1508*1d530678Sraf #pragma init(malloc_init)
1509*1d530678Sraf static void
1510*1d530678Sraf malloc_init(void)
1511*1d530678Sraf {
1512*1d530678Sraf 	/*
1513*1d530678Sraf 	 * This works in the init section for this library
1514*1d530678Sraf 	 * because setup_caches() doesn't call anything in libc
1515*1d530678Sraf 	 * that calls malloc().  If it did, disaster would ensue.
1516*1d530678Sraf 	 *
1517*1d530678Sraf 	 * For this to work properly, this library must be the first
1518*1d530678Sraf 	 * one to have its init section called (after libc) by the
1519*1d530678Sraf 	 * dynamic linker.  If some other library's init section
1520*1d530678Sraf 	 * ran first and called malloc(), disaster would ensue.
1521*1d530678Sraf 	 * Because this is an interposer library for malloc(), the
1522*1d530678Sraf 	 * dynamic linker arranges for its init section to run first.
1523*1d530678Sraf 	 */
1524*1d530678Sraf 	(void) setup_caches();
1525*1d530678Sraf 
1526*1d530678Sraf 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
1527*1d530678Sraf }
1528