xref: /titanic_44/usr/src/lib/libumem/amd64/umem_genasm.c (revision 4f364e7c95ee7fd9d5bbeddc1940e92405bb0e72)
1*4f364e7cSRobert Mustacchi /*
2*4f364e7cSRobert Mustacchi  * CDDL HEADER START
3*4f364e7cSRobert Mustacchi  *
4*4f364e7cSRobert Mustacchi  * The contents of this file are subject to the terms of the
5*4f364e7cSRobert Mustacchi  * Common Development and Distribution License (the "License").
6*4f364e7cSRobert Mustacchi  * You may not use this file except in compliance with the License.
7*4f364e7cSRobert Mustacchi  *
8*4f364e7cSRobert Mustacchi  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*4f364e7cSRobert Mustacchi  * or http://www.opensolaris.org/os/licensing.
10*4f364e7cSRobert Mustacchi  * See the License for the specific language governing permissions
11*4f364e7cSRobert Mustacchi  * and limitations under the License.
12*4f364e7cSRobert Mustacchi  *
13*4f364e7cSRobert Mustacchi  * When distributing Covered Code, include this CDDL HEADER in each
14*4f364e7cSRobert Mustacchi  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*4f364e7cSRobert Mustacchi  * If applicable, add the following below this CDDL HEADER, with the
16*4f364e7cSRobert Mustacchi  * fields enclosed by brackets "[]" replaced with your own identifying
17*4f364e7cSRobert Mustacchi  * information: Portions Copyright [yyyy] [name of copyright owner]
18*4f364e7cSRobert Mustacchi  *
19*4f364e7cSRobert Mustacchi  * CDDL HEADER END
20*4f364e7cSRobert Mustacchi  */
21*4f364e7cSRobert Mustacchi /*
22*4f364e7cSRobert Mustacchi  * Copyright (c) 2013 Joyent, Inc.  All rights reserved.
23*4f364e7cSRobert Mustacchi  */
24*4f364e7cSRobert Mustacchi 
25*4f364e7cSRobert Mustacchi /*
26*4f364e7cSRobert Mustacchi  * Don't Panic! If you find the blocks of assembly that follow confusing and
27*4f364e7cSRobert Mustacchi  * you're questioning why they exist, please go read section 8 of the umem.c big
28*4f364e7cSRobert Mustacchi  * theory statement. Next familiarize yourself with the malloc and free
29*4f364e7cSRobert Mustacchi  * implementations in libumem's malloc.c.
30*4f364e7cSRobert Mustacchi  *
31*4f364e7cSRobert Mustacchi  * What follows is the amd64 implementation of the thread caching automatic
32*4f364e7cSRobert Mustacchi  * assembly generation. The amd64 calling conventions are documented in the
33*4f364e7cSRobert Mustacchi  * 64-bit System V ABI. For our purposes what matters is that our first argument
34*4f364e7cSRobert Mustacchi  * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We
35*4f364e7cSRobert Mustacchi  * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11.
36*4f364e7cSRobert Mustacchi  *
37*4f364e7cSRobert Mustacchi  * For both our implementation of malloc and free we only use the registers we
38*4f364e7cSRobert Mustacchi  * don't have to preserve.
39*4f364e7cSRobert Mustacchi  *
40*4f364e7cSRobert Mustacchi  * Malloc register usage:
41*4f364e7cSRobert Mustacchi  * 	o. rdi: Original size to malloc. This never changes and is preserved.
42*4f364e7cSRobert Mustacchi  * 	o. rsi: Adjusted malloc size for malloc_data_tag(s).
43*4f364e7cSRobert Mustacchi  * 	o. rcx: Pointer to the tmem_t in the ulwp_t.
44*4f364e7cSRobert Mustacchi  * 	o. rdx: Pointer to the tmem_t array of roots
45*4f364e7cSRobert Mustacchi  * 	o. r8:  Size of the cache
46*4f364e7cSRobert Mustacchi  * 	o. r9:  Scratch register
47*4f364e7cSRobert Mustacchi  *
48*4f364e7cSRobert Mustacchi  * Free register usage:
49*4f364e7cSRobert Mustacchi  *	o. rdi: Original buffer to free. This never changes and is preserved.
50*4f364e7cSRobert Mustacchi  *	o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s).
51*4f364e7cSRobert Mustacchi  * 	o. rcx: Pointer to the tmem_t in the ulwp_t.
52*4f364e7cSRobert Mustacchi  * 	o. rdx: Pointer to the tmem_t array of roots
53*4f364e7cSRobert Mustacchi  * 	o. r8:  Size of the cache
54*4f364e7cSRobert Mustacchi  * 	o. r9:  Scratch register
55*4f364e7cSRobert Mustacchi  *
56*4f364e7cSRobert Mustacchi  * Once we determine what cache we are using, we increment %rdx to the
57*4f364e7cSRobert Mustacchi  * appropriate offset and set %r8 with the size of the cache. This means that
58*4f364e7cSRobert Mustacchi  * when we break out to the normal buffer allocation point %rdx contains the
59*4f364e7cSRobert Mustacchi  * head of the linked list and %r8 is the amount that we have to adjust the
60*4f364e7cSRobert Mustacchi  * thread's cached amount by.
61*4f364e7cSRobert Mustacchi  *
62*4f364e7cSRobert Mustacchi  * Each block of assembly has psuedocode that describes its purpose.
63*4f364e7cSRobert Mustacchi  */
64*4f364e7cSRobert Mustacchi 
65*4f364e7cSRobert Mustacchi #include <atomic.h>
66*4f364e7cSRobert Mustacchi #include <inttypes.h>
67*4f364e7cSRobert Mustacchi #include <sys/types.h>
68*4f364e7cSRobert Mustacchi #include <strings.h>
69*4f364e7cSRobert Mustacchi #include <umem_impl.h>
70*4f364e7cSRobert Mustacchi #include "umem_base.h"
71*4f364e7cSRobert Mustacchi 
72*4f364e7cSRobert Mustacchi #include <stdio.h>
73*4f364e7cSRobert Mustacchi 
74*4f364e7cSRobert Mustacchi const int umem_genasm_supported = 1;
75*4f364e7cSRobert Mustacchi static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
76*4f364e7cSRobert Mustacchi static size_t umem_genasm_msize = 576;
77*4f364e7cSRobert Mustacchi static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
78*4f364e7cSRobert Mustacchi static size_t umem_genasm_fsize = 576;
79*4f364e7cSRobert Mustacchi static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
80*4f364e7cSRobert Mustacchi static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
81*4f364e7cSRobert Mustacchi 
82*4f364e7cSRobert Mustacchi #define	UMEM_GENASM_MAX64	(UINT32_MAX / sizeof (uintptr_t))
83*4f364e7cSRobert Mustacchi #define	PTC_JMPADDR(dest, src)	(dest - (src + 4))
84*4f364e7cSRobert Mustacchi #define	PTC_ROOT_SIZE	sizeof (uintptr_t)
85*4f364e7cSRobert Mustacchi #define	MULTINOP	0x0000441f0f
86*4f364e7cSRobert Mustacchi 
87*4f364e7cSRobert Mustacchi /*
88*4f364e7cSRobert Mustacchi  * void *ptcmalloc(size_t orig_size);
89*4f364e7cSRobert Mustacchi  *
90*4f364e7cSRobert Mustacchi  * size_t size = orig_size + 8;
91*4f364e7cSRobert Mustacchi  * if (size > UMEM_SECOND_ALIGN)
92*4f364e7cSRobert Mustacchi  * 	size += 8;
93*4f364e7cSRobert Mustacchi  *
94*4f364e7cSRobert Mustacchi  * if (size < orig_size)
95*4f364e7cSRobert Mustacchi  * 	goto tomalloc;		! This is overflow
96*4f364e7cSRobert Mustacchi  *
97*4f364e7cSRobert Mustacchi  * if (size > cache_max)
98*4f364e7cSRobert Mustacchi  * 	goto tomalloc
99*4f364e7cSRobert Mustacchi  *
100*4f364e7cSRobert Mustacchi  * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
101*4f364e7cSRobert Mustacchi  * void **roots = t->tm_roots;
102*4f364e7cSRobert Mustacchi  */
103*4f364e7cSRobert Mustacchi #define	PTC_MALINIT_JOUT	0x13
104*4f364e7cSRobert Mustacchi #define	PTC_MALINIT_MCS	0x1a
105*4f364e7cSRobert Mustacchi #define	PTC_MALINIT_JOV	0x20
106*4f364e7cSRobert Mustacchi #define	PTC_MALINIT_SOFF	0x30
107*4f364e7cSRobert Mustacchi static const uint8_t malinit[] =  {
108*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x77, 0x08,		/* leaq 0x8(%rdi),%rsi */
109*4f364e7cSRobert Mustacchi 	0x48, 0x83, 0xfe, 0x10,		/* cmpq $0x10, %rsi */
110*4f364e7cSRobert Mustacchi 	0x76, 0x04,			/* jbe +0x4 */
111*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x77, 0x10,		/* leaq 0x10(%rdi),%rsi */
112*4f364e7cSRobert Mustacchi 	0x48, 0x39, 0xfe,		/* cmpq %rdi,%rsi */
113*4f364e7cSRobert Mustacchi 	0x0f, 0x82, 0x00, 0x00, 0x00, 0x00,	/* jb +errout */
114*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
115*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
116*4f364e7cSRobert Mustacchi 	0x0f, 0x87, 0x00, 0x00, 0x00, 0x00,	/* ja +errout */
117*4f364e7cSRobert Mustacchi 	0x64, 0x48, 0x8b, 0x0c, 0x25,
118*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq %fs:0x0,%rcx */
119*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc1,
120*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $SOFF, %rcx */
121*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x51, 0x08,		/* leaq 0x8(%rcx),%rdx */
122*4f364e7cSRobert Mustacchi };
123*4f364e7cSRobert Mustacchi 
124*4f364e7cSRobert Mustacchi /*
125*4f364e7cSRobert Mustacchi  * void ptcfree(void *buf);
126*4f364e7cSRobert Mustacchi  *
127*4f364e7cSRobert Mustacchi  * if (buf == NULL)
128*4f364e7cSRobert Mustacchi  * 	return;
129*4f364e7cSRobert Mustacchi  *
130*4f364e7cSRobert Mustacchi  * malloc_data_t *tag = buf;
131*4f364e7cSRobert Mustacchi  * tag--;
132*4f364e7cSRobert Mustacchi  * int size = tag->malloc_size;
133*4f364e7cSRobert Mustacchi  * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
134*4f364e7cSRobert Mustacchi  * if (tagval == MALLOC_SECOND_MAGIC) {
135*4f364e7cSRobert Mustacchi  * 	tag--;
136*4f364e7cSRobert Mustacchi  * } else if (tagval != MALLOC_MAGIC) {
137*4f364e7cSRobert Mustacchi  * 	goto tofree;
138*4f364e7cSRobert Mustacchi  * }
139*4f364e7cSRobert Mustacchi  *
140*4f364e7cSRobert Mustacchi  * if (size > cache_max)
141*4f364e7cSRobert Mustacchi  * 	goto tofree;
142*4f364e7cSRobert Mustacchi  *
143*4f364e7cSRobert Mustacchi  * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
144*4f364e7cSRobert Mustacchi  * void **roots = t->tm_roots;
145*4f364e7cSRobert Mustacchi  */
146*4f364e7cSRobert Mustacchi #define	PTC_FRINI_JDONE	0x05
147*4f364e7cSRobert Mustacchi #define	PTC_FRINI_JFREE	0x25
148*4f364e7cSRobert Mustacchi #define	PTC_FRINI_MCS	0x30
149*4f364e7cSRobert Mustacchi #define	PTC_FRINI_JOV	0x36
150*4f364e7cSRobert Mustacchi #define	PTC_FRINI_SOFF	0x46
151*4f364e7cSRobert Mustacchi static const uint8_t freeinit[] = {
152*4f364e7cSRobert Mustacchi 	0x48, 0x85, 0xff,		/* testq %rdi,%rdi */
153*4f364e7cSRobert Mustacchi 	0x0f, 0x84, 0x00, 0x00, 0x00, 0x00,	/* jmp $JDONE (done) */
154*4f364e7cSRobert Mustacchi 	0x8b, 0x77, 0xf8,		/* movl -0x8(%rdi),%esi */
155*4f364e7cSRobert Mustacchi 	0x8b, 0x47, 0xfc,		/* movl -0x4(%rdi),%eax */
156*4f364e7cSRobert Mustacchi 	0x01, 0xf0,			/* addl %esi,%eax */
157*4f364e7cSRobert Mustacchi 	0x3d, 0x00, 0x70, 0xba, 0x16,	/* cmpl $MALLOC_2_MAGIC, %eax */
158*4f364e7cSRobert Mustacchi 	0x75, 0x06,			/* jne +0x6 (checkover) */
159*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x47, 0xf0,		/* leaq -0x10(%rdi),%eax */
160*4f364e7cSRobert Mustacchi 	0xeb, 0x0f,			/* jmp +0xf (freebuf) */
161*4f364e7cSRobert Mustacchi 	0x3d, 0x00, 0xc0, 0x10, 0x3a,	/* cmpl $MALLOC_MAGIC, %eax */
162*4f364e7cSRobert Mustacchi 	0x0f, 0x85, 0x00, 0x00, 0x00, 0x00,	/* jmp +JFREE (goto torfree) */
163*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x47, 0xf8,		/* leaq -0x8(%rdi),%rax */
164*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
165*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
166*4f364e7cSRobert Mustacchi 	0x0f, 0x87, 0x00, 0x00, 0x00, 0x00,	/* ja +errout */
167*4f364e7cSRobert Mustacchi 	0x64, 0x48, 0x8b, 0x0c, 0x25,
168*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq %fs:0x0,%rcx */
169*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc1,
170*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $SOFF, %rcx */
171*4f364e7cSRobert Mustacchi 	0x48, 0x8d, 0x51, 0x08,		/* leaq 0x8(%rcx),%rdx */
172*4f364e7cSRobert Mustacchi };
173*4f364e7cSRobert Mustacchi 
174*4f364e7cSRobert Mustacchi /*
175*4f364e7cSRobert Mustacchi  * if (size <= $CACHE_SIZE) {
176*4f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
177*4f364e7cSRobert Mustacchi  * } else ...				! goto next cache
178*4f364e7cSRobert Mustacchi  */
179*4f364e7cSRobert Mustacchi #define	PTC_INICACHE_CMP	0x03
180*4f364e7cSRobert Mustacchi #define	PTC_INICACHE_SIZE	0x0c
181*4f364e7cSRobert Mustacchi #define	PTC_INICACHE_JMP	0x11
182*4f364e7cSRobert Mustacchi static const uint8_t inicache[] = {
183*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
184*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
185*4f364e7cSRobert Mustacchi 	0x77, 0x0c,			/* ja +0xc (next cache) */
186*4f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
187*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
188*4f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00,	/* jmp $JMP (allocbuf) */
189*4f364e7cSRobert Mustacchi };
190*4f364e7cSRobert Mustacchi 
191*4f364e7cSRobert Mustacchi /*
192*4f364e7cSRobert Mustacchi  * if (size <= $CACHE_SIZE) {
193*4f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
194*4f364e7cSRobert Mustacchi  *	roots += $CACHE_NUM;
195*4f364e7cSRobert Mustacchi  * } else ...				! goto next cache
196*4f364e7cSRobert Mustacchi  */
197*4f364e7cSRobert Mustacchi #define	PTC_GENCACHE_CMP	0x03
198*4f364e7cSRobert Mustacchi #define	PTC_GENCACHE_SIZE	0x0c
199*4f364e7cSRobert Mustacchi #define	PTC_GENCACHE_NUM	0x13
200*4f364e7cSRobert Mustacchi #define	PTC_GENCACHE_JMP	0x18
201*4f364e7cSRobert Mustacchi static const uint8_t gencache[] = {
202*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
203*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
204*4f364e7cSRobert Mustacchi 	0x77, 0x14,			/* ja +0xc (next cache) */
205*4f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
206*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
207*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc2,
208*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $8*ii, %rdx */
209*4f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp +$JMP (allocbuf ) */
210*4f364e7cSRobert Mustacchi };
211*4f364e7cSRobert Mustacchi 
212*4f364e7cSRobert Mustacchi /*
213*4f364e7cSRobert Mustacchi  * else if (size <= $CACHE_SIZE) {
214*4f364e7cSRobert Mustacchi  *	csize = $CACHE_SIZE;
215*4f364e7cSRobert Mustacchi  *	roots += $CACHE_NUM;
216*4f364e7cSRobert Mustacchi  * } else {
217*4f364e7cSRobert Mustacchi  *	goto tofunc; 			! goto tomalloc if ptcmalloc.
218*4f364e7cSRobert Mustacchi  * }					! goto tofree if ptcfree.
219*4f364e7cSRobert Mustacchi  */
220*4f364e7cSRobert Mustacchi #define	PTC_FINCACHE_CMP	0x03
221*4f364e7cSRobert Mustacchi #define	PTC_FINCACHE_JMP	0x08
222*4f364e7cSRobert Mustacchi #define	PTC_FINCACHE_SIZE	0x0c
223*4f364e7cSRobert Mustacchi #define	PTC_FINCACHE_NUM	0x13
224*4f364e7cSRobert Mustacchi static const uint8_t fincache[] = {
225*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xfe,
226*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpq sizeof ($CACHE), %rsi */
227*4f364e7cSRobert Mustacchi 	0x77, 0x00,			/* ja +JMP (to real malloc) */
228*4f364e7cSRobert Mustacchi 	0x49, 0xc7, 0xc0,
229*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* movq sizeof ($CACHE), %r8 */
230*4f364e7cSRobert Mustacchi 	0x48, 0x81, 0xc2,
231*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* addq $8*ii, %rdx */
232*4f364e7cSRobert Mustacchi 
233*4f364e7cSRobert Mustacchi };
234*4f364e7cSRobert Mustacchi 
235*4f364e7cSRobert Mustacchi /*
236*4f364e7cSRobert Mustacchi  * if (*root == NULL)
237*4f364e7cSRobert Mustacchi  * 	goto tomalloc;
238*4f364e7cSRobert Mustacchi  *
239*4f364e7cSRobert Mustacchi  * malloc_data_t *ret = *root;
240*4f364e7cSRobert Mustacchi  * *root = *(void **)ret;
241*4f364e7cSRobert Mustacchi  * t->tm_size += csize;
242*4f364e7cSRobert Mustacchi  * ret->malloc_size = size;
243*4f364e7cSRobert Mustacchi  *
244*4f364e7cSRobert Mustacchi  * if (size > UMEM_SECOND_ALIGN) {
245*4f364e7cSRobert Mustacchi  *	ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
246*4f364e7cSRobert Mustacchi  *	ret += 2;
247*4f364e7cSRobert Mustacchi  * } else {
248*4f364e7cSRobert Mustacchi  *	ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
249*4f364e7cSRobert Mustacchi  *	ret += 1;
250*4f364e7cSRobert Mustacchi  * }
251*4f364e7cSRobert Mustacchi  *
252*4f364e7cSRobert Mustacchi  * return ((void *)ret);
253*4f364e7cSRobert Mustacchi  * tomalloc:
254*4f364e7cSRobert Mustacchi  * 	return (malloc(orig_size));
255*4f364e7cSRobert Mustacchi  */
256*4f364e7cSRobert Mustacchi #define	PTC_MALFINI_ALLABEL	0x00
257*4f364e7cSRobert Mustacchi #define	PTC_MALFINI_JMLABEL	0x40
258*4f364e7cSRobert Mustacchi #define	PTC_MALFINI_JMADDR	0x41
259*4f364e7cSRobert Mustacchi static const uint8_t malfini[] = {
260*4f364e7cSRobert Mustacchi 	0x48, 0x8b, 0x02,		/* movl (%rdx),%rax */
261*4f364e7cSRobert Mustacchi 	0x48, 0x85, 0xc0,		/* testq %rax,%rax */
262*4f364e7cSRobert Mustacchi 	0x74, 0x38,			/* je +0x38 (errout) */
263*4f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x08,		/* movq (%rax),%r9 */
264*4f364e7cSRobert Mustacchi 	0x4c, 0x89, 0x0a,		/* movq %r9,(%rdx) */
265*4f364e7cSRobert Mustacchi 	0x4c, 0x29, 0x01,		/* subq %rsi,(%rcx) */
266*4f364e7cSRobert Mustacchi 	0x48, 0x83, 0xfe, 0x10,		/* cmpq $0x10,%rsi */
267*4f364e7cSRobert Mustacchi 	0x76, 0x15,			/* jbe +0x15 */
268*4f364e7cSRobert Mustacchi 	0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */
269*4f364e7cSRobert Mustacchi 	0x89, 0x70, 0x08,		/* movl %r9d,0x8(%rax) */
270*4f364e7cSRobert Mustacchi 	0x41, 0x29, 0xf1,		/* subl %esi, %r9d */
271*4f364e7cSRobert Mustacchi 	0x44, 0x89, 0x48, 0x0c,		/* movl %r9d, 0xc(%rax) */
272*4f364e7cSRobert Mustacchi 	0x48, 0x83, 0xc0, 0x10,		/* addq $0x10, %rax */
273*4f364e7cSRobert Mustacchi 	0xc3,				/* ret */
274*4f364e7cSRobert Mustacchi 	0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a,	/* movl %MALLOC_MAGIC, %r9d */
275*4f364e7cSRobert Mustacchi 	0x89, 0x30,			/* movl %esi,(%rax) */
276*4f364e7cSRobert Mustacchi 	0x41, 0x29, 0xf1,		/* subl %esi,%r9d */
277*4f364e7cSRobert Mustacchi 	0x44, 0x89, 0x48, 0x04,		/* movl %r9d,0x4(%rax) */
278*4f364e7cSRobert Mustacchi 	0x48, 0x83, 0xc0, 0x08,		/* addq $0x8,%rax */
279*4f364e7cSRobert Mustacchi 	0xc3,				/* ret */
280*4f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp $MALLOC */
281*4f364e7cSRobert Mustacchi };
282*4f364e7cSRobert Mustacchi 
283*4f364e7cSRobert Mustacchi /*
284*4f364e7cSRobert Mustacchi  * if (t->tm_size + csize > umem_ptc_size)
285*4f364e7cSRobert Mustacchi  * 	goto tofree;
286*4f364e7cSRobert Mustacchi  *
287*4f364e7cSRobert Mustacchi  * t->tm_size += csize
288*4f364e7cSRobert Mustacchi  * *(void **)tag = *root;
289*4f364e7cSRobert Mustacchi  * *root = tag;
290*4f364e7cSRobert Mustacchi  * return;
291*4f364e7cSRobert Mustacchi  * tofree:
292*4f364e7cSRobert Mustacchi  * 	free(buf);
293*4f364e7cSRobert Mustacchi  * 	return;
294*4f364e7cSRobert Mustacchi  */
295*4f364e7cSRobert Mustacchi #define	PTC_FRFINI_RBUFLABEL	0x00
296*4f364e7cSRobert Mustacchi #define	PTC_FRFINI_CACHEMAX	0x09
297*4f364e7cSRobert Mustacchi #define	PTC_FRFINI_DONELABEL	0x1b
298*4f364e7cSRobert Mustacchi #define	PTC_FRFINI_JFLABEL	0x1c
299*4f364e7cSRobert Mustacchi #define	PTC_FRFINI_JFADDR	0x1d
300*4f364e7cSRobert Mustacchi static const uint8_t freefini[] = {
301*4f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x09,		/* movq (%rcx),%r9 */
302*4f364e7cSRobert Mustacchi 	0x4d, 0x01, 0xc1,		/* addq %r8, %r9 */
303*4f364e7cSRobert Mustacchi 	0x49, 0x81, 0xf9,
304*4f364e7cSRobert Mustacchi 	0x00, 0x00, 0x00, 0x00,		/* cmpl $THR_CACHE_MAX, %r9 */
305*4f364e7cSRobert Mustacchi 	0x77, 0x0d,			/* jae +0xd (torfree) */
306*4f364e7cSRobert Mustacchi 	0x4c, 0x01, 0x01,		/* addq %r8,(%rcx) */
307*4f364e7cSRobert Mustacchi 	0x4c, 0x8b, 0x0a,		/* movq (%rdx),%r9 */
308*4f364e7cSRobert Mustacchi 	0x4c, 0x89, 0x08,		/* movq %r9,(%rax) */
309*4f364e7cSRobert Mustacchi 	0x48, 0x89, 0x02,		/* movq %rax,(%rdx) */
310*4f364e7cSRobert Mustacchi 	0xc3,				/* ret */
311*4f364e7cSRobert Mustacchi 	0xe9, 0x00, 0x00, 0x00, 0x00	/* jmp free */
312*4f364e7cSRobert Mustacchi };
313*4f364e7cSRobert Mustacchi 
314*4f364e7cSRobert Mustacchi /*
315*4f364e7cSRobert Mustacchi  * Construct the initial part of malloc. off contains the offset from curthread
316*4f364e7cSRobert Mustacchi  * to the root of the tmem structure. ep is the address of the label to error
317*4f364e7cSRobert Mustacchi  * and jump to free. csize is the size of the largest umem_cache in ptcumem.
318*4f364e7cSRobert Mustacchi  */
319*4f364e7cSRobert Mustacchi static int
genasm_malinit(uint8_t * bp,uint32_t off,uint32_t ep,uint32_t csize)320*4f364e7cSRobert Mustacchi genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
321*4f364e7cSRobert Mustacchi {
322*4f364e7cSRobert Mustacchi 	uint32_t addr;
323*4f364e7cSRobert Mustacchi 
324*4f364e7cSRobert Mustacchi 	bcopy(malinit, bp, sizeof (malinit));
325*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
326*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
327*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
328*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
329*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
330*4f364e7cSRobert Mustacchi 	bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
331*4f364e7cSRobert Mustacchi 
332*4f364e7cSRobert Mustacchi 	return (sizeof (malinit));
333*4f364e7cSRobert Mustacchi }
334*4f364e7cSRobert Mustacchi 
335*4f364e7cSRobert Mustacchi static int
genasm_frinit(uint8_t * bp,uint32_t off,uint32_t dp,uint32_t ep,uint32_t mcs)336*4f364e7cSRobert Mustacchi genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs)
337*4f364e7cSRobert Mustacchi {
338*4f364e7cSRobert Mustacchi 	uint32_t addr;
339*4f364e7cSRobert Mustacchi 
340*4f364e7cSRobert Mustacchi 	bcopy(freeinit, bp, sizeof (freeinit));
341*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
342*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
343*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
344*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
345*4f364e7cSRobert Mustacchi 	bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs));
346*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
347*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
348*4f364e7cSRobert Mustacchi 	bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
349*4f364e7cSRobert Mustacchi 	return (sizeof (freeinit));
350*4f364e7cSRobert Mustacchi }
351*4f364e7cSRobert Mustacchi 
352*4f364e7cSRobert Mustacchi 
353*4f364e7cSRobert Mustacchi /*
354*4f364e7cSRobert Mustacchi  * Create the initial cache entry of the specified size. The value of ap tells
355*4f364e7cSRobert Mustacchi  * us what the address of the label to try and allocate a buffer. This value is
356*4f364e7cSRobert Mustacchi  * an offset from the current base to that value.
357*4f364e7cSRobert Mustacchi  */
358*4f364e7cSRobert Mustacchi static int
genasm_firstcache(uint8_t * bp,uint32_t csize,uint32_t ap)359*4f364e7cSRobert Mustacchi genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
360*4f364e7cSRobert Mustacchi {
361*4f364e7cSRobert Mustacchi 	uint32_t addr;
362*4f364e7cSRobert Mustacchi 
363*4f364e7cSRobert Mustacchi 	bcopy(inicache, bp, sizeof (inicache));
364*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
365*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
366*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
367*4f364e7cSRobert Mustacchi 	ASSERT(addr != 0);
368*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
369*4f364e7cSRobert Mustacchi 
370*4f364e7cSRobert Mustacchi 	return (sizeof (inicache));
371*4f364e7cSRobert Mustacchi }
372*4f364e7cSRobert Mustacchi 
373*4f364e7cSRobert Mustacchi static int
genasm_gencache(uint8_t * bp,int num,uint32_t csize,uint32_t ap)374*4f364e7cSRobert Mustacchi genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
375*4f364e7cSRobert Mustacchi {
376*4f364e7cSRobert Mustacchi 	uint32_t addr;
377*4f364e7cSRobert Mustacchi 	uint32_t coff;
378*4f364e7cSRobert Mustacchi 
379*4f364e7cSRobert Mustacchi 	ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
380*4f364e7cSRobert Mustacchi 	ASSERT(num != 0);
381*4f364e7cSRobert Mustacchi 	bcopy(gencache, bp, sizeof (gencache));
382*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
383*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
384*4f364e7cSRobert Mustacchi 	coff = num * PTC_ROOT_SIZE;
385*4f364e7cSRobert Mustacchi 	bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
386*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
387*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
388*4f364e7cSRobert Mustacchi 
389*4f364e7cSRobert Mustacchi 	return (sizeof (gencache));
390*4f364e7cSRobert Mustacchi }
391*4f364e7cSRobert Mustacchi 
392*4f364e7cSRobert Mustacchi static int
genasm_lastcache(uint8_t * bp,int num,uint32_t csize,uint32_t ep)393*4f364e7cSRobert Mustacchi genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
394*4f364e7cSRobert Mustacchi {
395*4f364e7cSRobert Mustacchi 	uint8_t eap;
396*4f364e7cSRobert Mustacchi 	uint32_t coff;
397*4f364e7cSRobert Mustacchi 
398*4f364e7cSRobert Mustacchi 	ASSERT(ep <= 0xff && ep > 7);
399*4f364e7cSRobert Mustacchi 	ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
400*4f364e7cSRobert Mustacchi 	bcopy(fincache, bp, sizeof (fincache));
401*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
402*4f364e7cSRobert Mustacchi 	bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
403*4f364e7cSRobert Mustacchi 	coff = num * PTC_ROOT_SIZE;
404*4f364e7cSRobert Mustacchi 	bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff));
405*4f364e7cSRobert Mustacchi 	eap = ep - PTC_FINCACHE_JMP - 1;
406*4f364e7cSRobert Mustacchi 	bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap));
407*4f364e7cSRobert Mustacchi 
408*4f364e7cSRobert Mustacchi 	return (sizeof (fincache));
409*4f364e7cSRobert Mustacchi }
410*4f364e7cSRobert Mustacchi 
411*4f364e7cSRobert Mustacchi static int
genasm_malfini(uint8_t * bp,uintptr_t mptr)412*4f364e7cSRobert Mustacchi genasm_malfini(uint8_t *bp, uintptr_t mptr)
413*4f364e7cSRobert Mustacchi {
414*4f364e7cSRobert Mustacchi 	uint32_t addr;
415*4f364e7cSRobert Mustacchi 
416*4f364e7cSRobert Mustacchi 	bcopy(malfini, bp, sizeof (malfini));
417*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
418*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
419*4f364e7cSRobert Mustacchi 
420*4f364e7cSRobert Mustacchi 	return (sizeof (malfini));
421*4f364e7cSRobert Mustacchi }
422*4f364e7cSRobert Mustacchi 
423*4f364e7cSRobert Mustacchi static int
genasm_frfini(uint8_t * bp,uint32_t maxthr,uintptr_t fptr)424*4f364e7cSRobert Mustacchi genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
425*4f364e7cSRobert Mustacchi {
426*4f364e7cSRobert Mustacchi 	uint32_t addr;
427*4f364e7cSRobert Mustacchi 
428*4f364e7cSRobert Mustacchi 	bcopy(freefini, bp, sizeof (freefini));
429*4f364e7cSRobert Mustacchi 	bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
430*4f364e7cSRobert Mustacchi 	addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
431*4f364e7cSRobert Mustacchi 	bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
432*4f364e7cSRobert Mustacchi 
433*4f364e7cSRobert Mustacchi 	return (sizeof (freefini));
434*4f364e7cSRobert Mustacchi }
435*4f364e7cSRobert Mustacchi 
436*4f364e7cSRobert Mustacchi /*
437*4f364e7cSRobert Mustacchi  * The malloc inline assembly is constructed as follows:
438*4f364e7cSRobert Mustacchi  *
439*4f364e7cSRobert Mustacchi  * o Malloc prologue assembly
440*4f364e7cSRobert Mustacchi  * o Generic first-cache check
441*4f364e7cSRobert Mustacchi  * o n Generic cache checks (where n = _tmem_get_entries() - 2)
442*4f364e7cSRobert Mustacchi  * o Generic last-cache check
443*4f364e7cSRobert Mustacchi  * o Malloc epilogue assembly
444*4f364e7cSRobert Mustacchi  *
445*4f364e7cSRobert Mustacchi  * Generally there are at least three caches. When there is only one cache we
446*4f364e7cSRobert Mustacchi  * only use the generic last-cache. In the case where there are two caches, we
447*4f364e7cSRobert Mustacchi  * just leave out the middle ones.
448*4f364e7cSRobert Mustacchi  */
449*4f364e7cSRobert Mustacchi static int
genasm_malloc(void * base,size_t len,int nents,int * umem_alloc_sizes)450*4f364e7cSRobert Mustacchi genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
451*4f364e7cSRobert Mustacchi {
452*4f364e7cSRobert Mustacchi 	int ii, off;
453*4f364e7cSRobert Mustacchi 	uint8_t *bp;
454*4f364e7cSRobert Mustacchi 	size_t total;
455*4f364e7cSRobert Mustacchi 	uint32_t allocoff, erroff;
456*4f364e7cSRobert Mustacchi 
457*4f364e7cSRobert Mustacchi 	total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
458*4f364e7cSRobert Mustacchi 
459*4f364e7cSRobert Mustacchi 	if (nents >= 2)
460*4f364e7cSRobert Mustacchi 		total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
461*4f364e7cSRobert Mustacchi 
462*4f364e7cSRobert Mustacchi 	if (total > len)
463*4f364e7cSRobert Mustacchi 		return (1);
464*4f364e7cSRobert Mustacchi 
465*4f364e7cSRobert Mustacchi 	erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
466*4f364e7cSRobert Mustacchi 	allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
467*4f364e7cSRobert Mustacchi 
468*4f364e7cSRobert Mustacchi 	bp = base;
469*4f364e7cSRobert Mustacchi 
470*4f364e7cSRobert Mustacchi 	off = genasm_malinit(bp, umem_tmem_off, erroff,
471*4f364e7cSRobert Mustacchi 	    umem_alloc_sizes[nents-1]);
472*4f364e7cSRobert Mustacchi 	bp += off;
473*4f364e7cSRobert Mustacchi 	allocoff -= off;
474*4f364e7cSRobert Mustacchi 	erroff -= off;
475*4f364e7cSRobert Mustacchi 
476*4f364e7cSRobert Mustacchi 	if (nents > 1) {
477*4f364e7cSRobert Mustacchi 		off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
478*4f364e7cSRobert Mustacchi 		bp += off;
479*4f364e7cSRobert Mustacchi 		allocoff -= off;
480*4f364e7cSRobert Mustacchi 		erroff -= off;
481*4f364e7cSRobert Mustacchi 	}
482*4f364e7cSRobert Mustacchi 
483*4f364e7cSRobert Mustacchi 	for (ii = 1; ii < nents - 1; ii++) {
484*4f364e7cSRobert Mustacchi 		off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
485*4f364e7cSRobert Mustacchi 		bp += off;
486*4f364e7cSRobert Mustacchi 		allocoff -= off;
487*4f364e7cSRobert Mustacchi 		erroff -= off;
488*4f364e7cSRobert Mustacchi 	}
489*4f364e7cSRobert Mustacchi 
490*4f364e7cSRobert Mustacchi 	bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
491*4f364e7cSRobert Mustacchi 	    erroff);
492*4f364e7cSRobert Mustacchi 	bp += genasm_malfini(bp, umem_genasm_omptr);
493*4f364e7cSRobert Mustacchi 	ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
494*4f364e7cSRobert Mustacchi 
495*4f364e7cSRobert Mustacchi 	return (0);
496*4f364e7cSRobert Mustacchi }
497*4f364e7cSRobert Mustacchi 
498*4f364e7cSRobert Mustacchi static int
genasm_free(void * base,size_t len,int nents,int * umem_alloc_sizes)499*4f364e7cSRobert Mustacchi genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
500*4f364e7cSRobert Mustacchi {
501*4f364e7cSRobert Mustacchi 	uint8_t *bp;
502*4f364e7cSRobert Mustacchi 	int ii, off;
503*4f364e7cSRobert Mustacchi 	size_t total;
504*4f364e7cSRobert Mustacchi 	uint32_t rbufoff, retoff, erroff;
505*4f364e7cSRobert Mustacchi 
506*4f364e7cSRobert Mustacchi 	/* Assume that nents has already been audited for us */
507*4f364e7cSRobert Mustacchi 	total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
508*4f364e7cSRobert Mustacchi 	if (nents >= 2)
509*4f364e7cSRobert Mustacchi 		total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
510*4f364e7cSRobert Mustacchi 
511*4f364e7cSRobert Mustacchi 	if (total > len)
512*4f364e7cSRobert Mustacchi 		return (1);
513*4f364e7cSRobert Mustacchi 
514*4f364e7cSRobert Mustacchi 	erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
515*4f364e7cSRobert Mustacchi 	rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
516*4f364e7cSRobert Mustacchi 	retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
517*4f364e7cSRobert Mustacchi 
518*4f364e7cSRobert Mustacchi 	bp = base;
519*4f364e7cSRobert Mustacchi 
520*4f364e7cSRobert Mustacchi 	off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
521*4f364e7cSRobert Mustacchi 	    umem_alloc_sizes[nents - 1]);
522*4f364e7cSRobert Mustacchi 	bp += off;
523*4f364e7cSRobert Mustacchi 	erroff -= off;
524*4f364e7cSRobert Mustacchi 	rbufoff -= off;
525*4f364e7cSRobert Mustacchi 
526*4f364e7cSRobert Mustacchi 	if (nents > 1) {
527*4f364e7cSRobert Mustacchi 		off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
528*4f364e7cSRobert Mustacchi 		bp += off;
529*4f364e7cSRobert Mustacchi 		erroff -= off;
530*4f364e7cSRobert Mustacchi 		rbufoff -= off;
531*4f364e7cSRobert Mustacchi 	}
532*4f364e7cSRobert Mustacchi 
533*4f364e7cSRobert Mustacchi 	for (ii = 1; ii < nents - 1; ii++) {
534*4f364e7cSRobert Mustacchi 		off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
535*4f364e7cSRobert Mustacchi 		bp += off;
536*4f364e7cSRobert Mustacchi 		rbufoff -= off;
537*4f364e7cSRobert Mustacchi 		erroff -= off;
538*4f364e7cSRobert Mustacchi 	}
539*4f364e7cSRobert Mustacchi 
540*4f364e7cSRobert Mustacchi 	bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
541*4f364e7cSRobert Mustacchi 	    erroff);
542*4f364e7cSRobert Mustacchi 	bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
543*4f364e7cSRobert Mustacchi 	ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
544*4f364e7cSRobert Mustacchi 
545*4f364e7cSRobert Mustacchi 	return (0);
546*4f364e7cSRobert Mustacchi }
547*4f364e7cSRobert Mustacchi 
548*4f364e7cSRobert Mustacchi /*ARGSUSED*/
549*4f364e7cSRobert Mustacchi int
umem_genasm(int * cp,umem_cache_t ** caches,int nc)550*4f364e7cSRobert Mustacchi umem_genasm(int *cp, umem_cache_t **caches, int nc)
551*4f364e7cSRobert Mustacchi {
552*4f364e7cSRobert Mustacchi 	int nents, i;
553*4f364e7cSRobert Mustacchi 	uint8_t *mptr;
554*4f364e7cSRobert Mustacchi 	uint8_t *fptr;
555*4f364e7cSRobert Mustacchi 	uint64_t v, *vptr;
556*4f364e7cSRobert Mustacchi 
557*4f364e7cSRobert Mustacchi 	mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
558*4f364e7cSRobert Mustacchi 	fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
559*4f364e7cSRobert Mustacchi 	if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
560*4f364e7cSRobert Mustacchi 	    umem_genasm_fptr == 0 || umem_genasm_fsize == 0)
561*4f364e7cSRobert Mustacchi 		return (1);
562*4f364e7cSRobert Mustacchi 
563*4f364e7cSRobert Mustacchi 	/*
564*4f364e7cSRobert Mustacchi 	 * The total number of caches that we can service is the minimum of:
565*4f364e7cSRobert Mustacchi 	 *  o the amount supported by libc
566*4f364e7cSRobert Mustacchi 	 *  o the total number of umem caches
567*4f364e7cSRobert Mustacchi 	 *  o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t)
568*4f364e7cSRobert Mustacchi 	 *    For 64-bit, this is MAX_UINT32 >> 3, a lot.
569*4f364e7cSRobert Mustacchi 	 */
570*4f364e7cSRobert Mustacchi 	nents = _tmem_get_nentries();
571*4f364e7cSRobert Mustacchi 
572*4f364e7cSRobert Mustacchi 	if (UMEM_GENASM_MAX64 < nents)
573*4f364e7cSRobert Mustacchi 		nents = UMEM_GENASM_MAX64;
574*4f364e7cSRobert Mustacchi 
575*4f364e7cSRobert Mustacchi 	if (nc < nents)
576*4f364e7cSRobert Mustacchi 		nents = nc;
577*4f364e7cSRobert Mustacchi 
578*4f364e7cSRobert Mustacchi 	/* Based on our constraints, this is not an error */
579*4f364e7cSRobert Mustacchi 	if (nents == 0 || umem_ptc_size == 0)
580*4f364e7cSRobert Mustacchi 		return (0);
581*4f364e7cSRobert Mustacchi 
582*4f364e7cSRobert Mustacchi 	/* Take into account the jump */
583*4f364e7cSRobert Mustacchi 	if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0)
584*4f364e7cSRobert Mustacchi 		return (1);
585*4f364e7cSRobert Mustacchi 
586*4f364e7cSRobert Mustacchi 	if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0)
587*4f364e7cSRobert Mustacchi 		return (1);
588*4f364e7cSRobert Mustacchi 
589*4f364e7cSRobert Mustacchi 
590*4f364e7cSRobert Mustacchi 	/* nop out the jump with a multibyte jump */
591*4f364e7cSRobert Mustacchi 	vptr = (void *)umem_genasm_mptr;
592*4f364e7cSRobert Mustacchi 	v = MULTINOP;
593*4f364e7cSRobert Mustacchi 	v |= *vptr & (0xffffffULL << 40);
594*4f364e7cSRobert Mustacchi 	(void) atomic_swap_64(vptr, v);
595*4f364e7cSRobert Mustacchi 	vptr = (void *)umem_genasm_fptr;
596*4f364e7cSRobert Mustacchi 	v = MULTINOP;
597*4f364e7cSRobert Mustacchi 	v |= *vptr & (0xffffffULL << 40);
598*4f364e7cSRobert Mustacchi 	(void) atomic_swap_64(vptr, v);
599*4f364e7cSRobert Mustacchi 
600*4f364e7cSRobert Mustacchi 	for (i = 0; i < nents; i++)
601*4f364e7cSRobert Mustacchi 		caches[i]->cache_flags |= UMF_PTC;
602*4f364e7cSRobert Mustacchi 
603*4f364e7cSRobert Mustacchi 	return (0);
604*4f364e7cSRobert Mustacchi }
605