1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2013 Joyent, Inc. All rights reserved.
23 */
24
25 /*
26 * Don't Panic! If you find the blocks of assembly that follow confusing and
27 * you're questioning why they exist, please go read section 8 of the umem.c big
28 * theory statement. Next familiarize yourself with the malloc and free
29 * implementations in libumem's malloc.c.
30 *
31 * What follows is the amd64 implementation of the thread caching automatic
32 * assembly generation. The amd64 calling conventions are documented in the
33 * 64-bit System V ABI. For our purposes what matters is that our first argument
34 * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We
35 * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11.
36 *
37 * For both our implementation of malloc and free we only use the registers we
38 * don't have to preserve.
39 *
40 * Malloc register usage:
41 * o. rdi: Original size to malloc. This never changes and is preserved.
42 * o. rsi: Adjusted malloc size for malloc_data_tag(s).
43 * o. rcx: Pointer to the tmem_t in the ulwp_t.
44 * o. rdx: Pointer to the tmem_t array of roots
45 * o. r8: Size of the cache
46 * o. r9: Scratch register
47 *
48 * Free register usage:
49 * o. rdi: Original buffer to free. This never changes and is preserved.
50 * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s).
51 * o. rcx: Pointer to the tmem_t in the ulwp_t.
52 * o. rdx: Pointer to the tmem_t array of roots
53 * o. r8: Size of the cache
54 * o. r9: Scratch register
55 *
56 * Once we determine what cache we are using, we increment %rdx to the
57 * appropriate offset and set %r8 with the size of the cache. This means that
58 * when we break out to the normal buffer allocation point %rdx contains the
59 * head of the linked list and %r8 is the amount that we have to adjust the
60 * thread's cached amount by.
61 *
62 * Each block of assembly has psuedocode that describes its purpose.
63 */
64
65 #include <atomic.h>
66 #include <inttypes.h>
67 #include <sys/types.h>
68 #include <strings.h>
69 #include <umem_impl.h>
70 #include "umem_base.h"
71
72 #include <stdio.h>
73
74 const int umem_genasm_supported = 1;
75 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
76 static size_t umem_genasm_msize = 576;
77 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
78 static size_t umem_genasm_fsize = 576;
79 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
80 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
81
82 #define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t))
83 #define PTC_JMPADDR(dest, src) (dest - (src + 4))
84 #define PTC_ROOT_SIZE sizeof (uintptr_t)
85 #define MULTINOP 0x0000441f0f
86
87 /*
88 * void *ptcmalloc(size_t orig_size);
89 *
90 * size_t size = orig_size + 8;
91 * if (size > UMEM_SECOND_ALIGN)
92 * size += 8;
93 *
94 * if (size < orig_size)
95 * goto tomalloc; ! This is overflow
96 *
97 * if (size > cache_max)
98 * goto tomalloc
99 *
100 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
101 * void **roots = t->tm_roots;
102 */
103 #define PTC_MALINIT_JOUT 0x13
104 #define PTC_MALINIT_MCS 0x1a
105 #define PTC_MALINIT_JOV 0x20
106 #define PTC_MALINIT_SOFF 0x30
107 static const uint8_t malinit[] = {
108 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */
109 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */
110 0x76, 0x04, /* jbe +0x4 */
111 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */
112 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */
113 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */
114 0x48, 0x81, 0xfe,
115 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
116 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */
117 0x64, 0x48, 0x8b, 0x0c, 0x25,
118 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */
119 0x48, 0x81, 0xc1,
120 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */
121 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */
122 };
123
124 /*
125 * void ptcfree(void *buf);
126 *
127 * if (buf == NULL)
128 * return;
129 *
130 * malloc_data_t *tag = buf;
131 * tag--;
132 * int size = tag->malloc_size;
133 * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
134 * if (tagval == MALLOC_SECOND_MAGIC) {
135 * tag--;
136 * } else if (tagval != MALLOC_MAGIC) {
137 * goto tofree;
138 * }
139 *
140 * if (size > cache_max)
141 * goto tofree;
142 *
143 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
144 * void **roots = t->tm_roots;
145 */
146 #define PTC_FRINI_JDONE 0x05
147 #define PTC_FRINI_JFREE 0x25
148 #define PTC_FRINI_MCS 0x30
149 #define PTC_FRINI_JOV 0x36
150 #define PTC_FRINI_SOFF 0x46
151 static const uint8_t freeinit[] = {
152 0x48, 0x85, 0xff, /* testq %rdi,%rdi */
153 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */
154 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */
155 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */
156 0x01, 0xf0, /* addl %esi,%eax */
157 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */
158 0x75, 0x06, /* jne +0x6 (checkover) */
159 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */
160 0xeb, 0x0f, /* jmp +0xf (freebuf) */
161 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */
162 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */
163 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */
164 0x48, 0x81, 0xfe,
165 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
166 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */
167 0x64, 0x48, 0x8b, 0x0c, 0x25,
168 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */
169 0x48, 0x81, 0xc1,
170 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */
171 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */
172 };
173
174 /*
175 * if (size <= $CACHE_SIZE) {
176 * csize = $CACHE_SIZE;
177 * } else ... ! goto next cache
178 */
179 #define PTC_INICACHE_CMP 0x03
180 #define PTC_INICACHE_SIZE 0x0c
181 #define PTC_INICACHE_JMP 0x11
182 static const uint8_t inicache[] = {
183 0x48, 0x81, 0xfe,
184 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
185 0x77, 0x0c, /* ja +0xc (next cache) */
186 0x49, 0xc7, 0xc0,
187 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
188 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */
189 };
190
191 /*
192 * if (size <= $CACHE_SIZE) {
193 * csize = $CACHE_SIZE;
194 * roots += $CACHE_NUM;
195 * } else ... ! goto next cache
196 */
197 #define PTC_GENCACHE_CMP 0x03
198 #define PTC_GENCACHE_SIZE 0x0c
199 #define PTC_GENCACHE_NUM 0x13
200 #define PTC_GENCACHE_JMP 0x18
201 static const uint8_t gencache[] = {
202 0x48, 0x81, 0xfe,
203 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
204 0x77, 0x14, /* ja +0xc (next cache) */
205 0x49, 0xc7, 0xc0,
206 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
207 0x48, 0x81, 0xc2,
208 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */
209 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */
210 };
211
212 /*
213 * else if (size <= $CACHE_SIZE) {
214 * csize = $CACHE_SIZE;
215 * roots += $CACHE_NUM;
216 * } else {
217 * goto tofunc; ! goto tomalloc if ptcmalloc.
218 * } ! goto tofree if ptcfree.
219 */
220 #define PTC_FINCACHE_CMP 0x03
221 #define PTC_FINCACHE_JMP 0x08
222 #define PTC_FINCACHE_SIZE 0x0c
223 #define PTC_FINCACHE_NUM 0x13
224 static const uint8_t fincache[] = {
225 0x48, 0x81, 0xfe,
226 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */
227 0x77, 0x00, /* ja +JMP (to real malloc) */
228 0x49, 0xc7, 0xc0,
229 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */
230 0x48, 0x81, 0xc2,
231 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */
232
233 };
234
235 /*
236 * if (*root == NULL)
237 * goto tomalloc;
238 *
239 * malloc_data_t *ret = *root;
240 * *root = *(void **)ret;
241 * t->tm_size += csize;
242 * ret->malloc_size = size;
243 *
244 * if (size > UMEM_SECOND_ALIGN) {
245 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
246 * ret += 2;
247 * } else {
248 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
249 * ret += 1;
250 * }
251 *
252 * return ((void *)ret);
253 * tomalloc:
254 * return (malloc(orig_size));
255 */
256 #define PTC_MALFINI_ALLABEL 0x00
257 #define PTC_MALFINI_JMLABEL 0x40
258 #define PTC_MALFINI_JMADDR 0x41
259 static const uint8_t malfini[] = {
260 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */
261 0x48, 0x85, 0xc0, /* testq %rax,%rax */
262 0x74, 0x38, /* je +0x38 (errout) */
263 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */
264 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */
265 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */
266 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */
267 0x76, 0x15, /* jbe +0x15 */
268 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */
269 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */
270 0x41, 0x29, 0xf1, /* subl %esi, %r9d */
271 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */
272 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */
273 0xc3, /* ret */
274 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */
275 0x89, 0x30, /* movl %esi,(%rax) */
276 0x41, 0x29, 0xf1, /* subl %esi,%r9d */
277 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */
278 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */
279 0xc3, /* ret */
280 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */
281 };
282
283 /*
284 * if (t->tm_size + csize > umem_ptc_size)
285 * goto tofree;
286 *
287 * t->tm_size += csize
288 * *(void **)tag = *root;
289 * *root = tag;
290 * return;
291 * tofree:
292 * free(buf);
293 * return;
294 */
295 #define PTC_FRFINI_RBUFLABEL 0x00
296 #define PTC_FRFINI_CACHEMAX 0x09
297 #define PTC_FRFINI_DONELABEL 0x1b
298 #define PTC_FRFINI_JFLABEL 0x1c
299 #define PTC_FRFINI_JFADDR 0x1d
300 static const uint8_t freefini[] = {
301 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */
302 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */
303 0x49, 0x81, 0xf9,
304 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */
305 0x77, 0x0d, /* jae +0xd (torfree) */
306 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */
307 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */
308 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */
309 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */
310 0xc3, /* ret */
311 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */
312 };
313
314 /*
315 * Construct the initial part of malloc. off contains the offset from curthread
316 * to the root of the tmem structure. ep is the address of the label to error
317 * and jump to free. csize is the size of the largest umem_cache in ptcumem.
318 */
319 static int
genasm_malinit(uint8_t * bp,uint32_t off,uint32_t ep,uint32_t csize)320 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
321 {
322 uint32_t addr;
323
324 bcopy(malinit, bp, sizeof (malinit));
325 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
326 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
327 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
328 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
329 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
330 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
331
332 return (sizeof (malinit));
333 }
334
335 static int
genasm_frinit(uint8_t * bp,uint32_t off,uint32_t dp,uint32_t ep,uint32_t mcs)336 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs)
337 {
338 uint32_t addr;
339
340 bcopy(freeinit, bp, sizeof (freeinit));
341 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
342 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
343 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
344 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
345 bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs));
346 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
347 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
348 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
349 return (sizeof (freeinit));
350 }
351
352
353 /*
354 * Create the initial cache entry of the specified size. The value of ap tells
355 * us what the address of the label to try and allocate a buffer. This value is
356 * an offset from the current base to that value.
357 */
358 static int
genasm_firstcache(uint8_t * bp,uint32_t csize,uint32_t ap)359 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
360 {
361 uint32_t addr;
362
363 bcopy(inicache, bp, sizeof (inicache));
364 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
365 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
366 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
367 ASSERT(addr != 0);
368 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
369
370 return (sizeof (inicache));
371 }
372
373 static int
genasm_gencache(uint8_t * bp,int num,uint32_t csize,uint32_t ap)374 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
375 {
376 uint32_t addr;
377 uint32_t coff;
378
379 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
380 ASSERT(num != 0);
381 bcopy(gencache, bp, sizeof (gencache));
382 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
383 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
384 coff = num * PTC_ROOT_SIZE;
385 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
386 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
387 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
388
389 return (sizeof (gencache));
390 }
391
392 static int
genasm_lastcache(uint8_t * bp,int num,uint32_t csize,uint32_t ep)393 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
394 {
395 uint8_t eap;
396 uint32_t coff;
397
398 ASSERT(ep <= 0xff && ep > 7);
399 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num);
400 bcopy(fincache, bp, sizeof (fincache));
401 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
402 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
403 coff = num * PTC_ROOT_SIZE;
404 bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff));
405 eap = ep - PTC_FINCACHE_JMP - 1;
406 bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap));
407
408 return (sizeof (fincache));
409 }
410
411 static int
genasm_malfini(uint8_t * bp,uintptr_t mptr)412 genasm_malfini(uint8_t *bp, uintptr_t mptr)
413 {
414 uint32_t addr;
415
416 bcopy(malfini, bp, sizeof (malfini));
417 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
418 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
419
420 return (sizeof (malfini));
421 }
422
423 static int
genasm_frfini(uint8_t * bp,uint32_t maxthr,uintptr_t fptr)424 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
425 {
426 uint32_t addr;
427
428 bcopy(freefini, bp, sizeof (freefini));
429 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
430 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
431 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
432
433 return (sizeof (freefini));
434 }
435
436 /*
437 * The malloc inline assembly is constructed as follows:
438 *
439 * o Malloc prologue assembly
440 * o Generic first-cache check
441 * o n Generic cache checks (where n = _tmem_get_entries() - 2)
442 * o Generic last-cache check
443 * o Malloc epilogue assembly
444 *
445 * Generally there are at least three caches. When there is only one cache we
446 * only use the generic last-cache. In the case where there are two caches, we
447 * just leave out the middle ones.
448 */
449 static int
genasm_malloc(void * base,size_t len,int nents,int * umem_alloc_sizes)450 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
451 {
452 int ii, off;
453 uint8_t *bp;
454 size_t total;
455 uint32_t allocoff, erroff;
456
457 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
458
459 if (nents >= 2)
460 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
461
462 if (total > len)
463 return (1);
464
465 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
466 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
467
468 bp = base;
469
470 off = genasm_malinit(bp, umem_tmem_off, erroff,
471 umem_alloc_sizes[nents-1]);
472 bp += off;
473 allocoff -= off;
474 erroff -= off;
475
476 if (nents > 1) {
477 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
478 bp += off;
479 allocoff -= off;
480 erroff -= off;
481 }
482
483 for (ii = 1; ii < nents - 1; ii++) {
484 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
485 bp += off;
486 allocoff -= off;
487 erroff -= off;
488 }
489
490 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
491 erroff);
492 bp += genasm_malfini(bp, umem_genasm_omptr);
493 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
494
495 return (0);
496 }
497
498 static int
genasm_free(void * base,size_t len,int nents,int * umem_alloc_sizes)499 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
500 {
501 uint8_t *bp;
502 int ii, off;
503 size_t total;
504 uint32_t rbufoff, retoff, erroff;
505
506 /* Assume that nents has already been audited for us */
507 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
508 if (nents >= 2)
509 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
510
511 if (total > len)
512 return (1);
513
514 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
515 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
516 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
517
518 bp = base;
519
520 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
521 umem_alloc_sizes[nents - 1]);
522 bp += off;
523 erroff -= off;
524 rbufoff -= off;
525
526 if (nents > 1) {
527 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
528 bp += off;
529 erroff -= off;
530 rbufoff -= off;
531 }
532
533 for (ii = 1; ii < nents - 1; ii++) {
534 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
535 bp += off;
536 rbufoff -= off;
537 erroff -= off;
538 }
539
540 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
541 erroff);
542 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
543 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
544
545 return (0);
546 }
547
548 /*ARGSUSED*/
549 int
umem_genasm(int * cp,umem_cache_t ** caches,int nc)550 umem_genasm(int *cp, umem_cache_t **caches, int nc)
551 {
552 int nents, i;
553 uint8_t *mptr;
554 uint8_t *fptr;
555 uint64_t v, *vptr;
556
557 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
558 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
559 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
560 umem_genasm_fptr == 0 || umem_genasm_fsize == 0)
561 return (1);
562
563 /*
564 * The total number of caches that we can service is the minimum of:
565 * o the amount supported by libc
566 * o the total number of umem caches
567 * o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t)
568 * For 64-bit, this is MAX_UINT32 >> 3, a lot.
569 */
570 nents = _tmem_get_nentries();
571
572 if (UMEM_GENASM_MAX64 < nents)
573 nents = UMEM_GENASM_MAX64;
574
575 if (nc < nents)
576 nents = nc;
577
578 /* Based on our constraints, this is not an error */
579 if (nents == 0 || umem_ptc_size == 0)
580 return (0);
581
582 /* Take into account the jump */
583 if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0)
584 return (1);
585
586 if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0)
587 return (1);
588
589
590 /* nop out the jump with a multibyte jump */
591 vptr = (void *)umem_genasm_mptr;
592 v = MULTINOP;
593 v |= *vptr & (0xffffffULL << 40);
594 (void) atomic_swap_64(vptr, v);
595 vptr = (void *)umem_genasm_fptr;
596 v = MULTINOP;
597 v |= *vptr & (0xffffffULL << 40);
598 (void) atomic_swap_64(vptr, v);
599
600 for (i = 0; i < nents; i++)
601 caches[i]->cache_flags |= UMF_PTC;
602
603 return (0);
604 }
605