1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2014 Joyent, Inc. All rights reserved.
23 */
24
25 /*
26 * Don't Panic! If you find the blocks of assembly that follow confusing and
27 * you're questioning why they exist, please go read section 8 of the umem.c big
28 * theory statement. Next familiarize yourself with the malloc and free
29 * implementations in libumem's malloc.c.
30 *
31 * What follows is the i386 implementation of the thread caching automatic
32 * assembly generation. With i386 a function only has three registers it's
33 * allowed to change without restoring them: eax, ecx, and edx. All others have
34 * to be preserved. Since the set of registers we have available is so small, we
35 * have to make use of esi, ebx, and edi and save their original values to the
36 * stack.
37 *
38 * Malloc register usage:
39 * o. esi: Size of the malloc (passed into us and modified)
40 * o. edi: Size of the cache
41 * o. eax: Buffer to return
42 * o. ebx: Scratch space and temporary values
43 * o. ecx: Pointer to the tmem_t in the ulwp_t.
44 * o. edx: Pointer to the tmem_t array of roots
45 *
46 * Free register usage:
47 * o. esi: Size of the malloc (passed into us and modified)
48 * o. edi: Size of the cache
49 * o. eax: Buffer to free
50 * o. ebx: Scratch space and temporary values
51 * o. ecx: Pointer to the tmem_t in the ulwp_t.
52 * o. edx: Pointer to the tmem_t array of roots
53 *
54 * Once we determine what cache we are using, we increment %edx to the
55 * appropriate offset and set %edi with the size of the cache. This means that
56 * when we break out to the normal buffer allocation point %edx contains the
57 * head of the linked list and %edi is the amount that we have to adjust the
58 * total amount cached by the thread.
59 *
60 * Each block of assembly has psuedocode that describes its purpose.
61 */
62
63 #include <inttypes.h>
64 #include <strings.h>
65 #include <umem_impl.h>
66 #include "umem_base.h"
67
68 #include <atomic.h>
69
70 const int umem_genasm_supported = 1;
71 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
72 static size_t umem_genasm_msize = 512;
73 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
74 static size_t umem_genasm_fsize = 512;
75 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
76 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
77 /*
78 * The maximum number of caches we can support. We use a single byte addl so
79 * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63
80 */
81 #define UMEM_GENASM_MAX32 63
82
83 #define PTC_JMPADDR(dest, src) (dest - (src + 4))
84 #define PTC_ROOT_SIZE sizeof (uintptr_t)
85 #define MULTINOP 0x0000441f0f
86
87 /*
88 * void *ptcmalloc(size_t orig_size);
89 *
90 * size_t size = orig_size + 8;
91 *
92 * if (size < orig_size)
93 * goto tomalloc; ! This is overflow
94 *
95 * if (size > cache_size)
96 * goto tomalloc;
97 *
98 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
99 * void **roots = t->tm_roots;
100 */
101 #define PTC_MALINIT_JOUT 0x0e
102 #define PTC_MALINIT_MCS 0x14
103 #define PTC_MALINIT_JOV 0x1a
104 #define PTC_MALINIT_SOFF 0x27
105 static const uint8_t malinit[] = {
106 0x55, /* pushl %ebp */
107 0x89, 0xe5, /* movl %esp, %ebp */
108 0x57, /* pushl %edi */
109 0x56, /* pushl %esi */
110 0x53, /* pushl %ebx */
111 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */
112 0x83, 0xc6, 0x08, /* addl $0x8,%esi */
113 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */
114 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
115 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
116 0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */
117 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */
118 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */
119 };
120
121 /*
122 * void ptcfree(void *buf);
123 *
124 * if (buf == NULL)
125 * return;
126 *
127 * malloc_data_t *tag = buf;
128 * tag--;
129 * int size = tag->malloc_size;
130 * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
131 *
132 * if (tagval != MALLOC_MAGIC)
133 * goto tofree;
134 *
135 * if (size > cache_max)
136 * goto tofree;
137 *
138 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
139 * void **roots = t->tm_roots;
140 */
141 #define PTC_FRINI_JDONE 0x0d
142 #define PTC_FRINI_JFREE 0x23
143 #define PTC_FRINI_MCS 0x29
144 #define PTC_FRINI_JOV 0x2f
145 #define PTC_FRINI_SOFF 0x3c
146 static const uint8_t freeinit[] = {
147 0x55, /* pushl %ebp */
148 0x89, 0xe5, /* movl %esp, %ebp */
149 0x57, /* pushl %edi */
150 0x56, /* pushl %esi */
151 0x53, /* pushl %ebx */
152 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */
153 0x85, 0xc0, /* testl %eax, %eax */
154 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */
155 0x83, 0xe8, 0x08, /* subl $0x8,%eax */
156 0x8b, 0x30, /* movl (%eax),%esi */
157 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */
158 0x01, 0xf2, /* addl %esi,%edx */
159 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */
160 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */
161
162 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
163 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
164 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */
165 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */
166 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */
167 };
168
169 /*
170 * if (size <= $CACHE_SIZE) {
171 * csize = $CACHE_SIZE;
172 * } else ... ! goto next cache
173 */
174 #define PTC_INICACHE_CMP 0x02
175 #define PTC_INICACHE_SIZE 0x09
176 #define PTC_INICACHE_JMP 0x0e
177 static const uint8_t inicache[] = {
178 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
179 0x77, 0x0a, /* ja +0xa */
180 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */
181 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
182 };
183
184 /*
185 * if (size <= $CACHE_SIZE) {
186 * csize = $CACHE_SIZE;
187 * roots += $CACHE_NUM;
188 * } else ... ! goto next cache
189 */
190 #define PTC_GENCACHE_CMP 0x02
191 #define PTC_GENCACHE_NUM 0x0a
192 #define PTC_GENCACHE_SIZE 0x0c
193 #define PTC_GENCACHE_JMP 0x11
194 static const uint8_t gencache[] = {
195 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */
196 0x77, 0x0d, /* ja +0xd (next cache) */
197 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */
198 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */
199 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
200 };
201
202 /*
203 * else if (size <= $CACHE_SIZE) {
204 * csize = $CACHE_SIZE;
205 * roots += $CACHE_NUM;
206 * } else {
207 * goto tofunc; ! goto tomalloc if ptcmalloc.
208 * } ! goto tofree if ptcfree.
209 */
210 #define PTC_FINCACHE_CMP 0x02
211 #define PTC_FINCACHE_JMP 0x07
212 #define PTC_FINCACHE_NUM 0x0a
213 #define PTC_FINCACHE_SIZE 0x0c
214 static const uint8_t fincache[] = {
215 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */
216 0x77, 0x00, /* ja +$JMP (to errout) */
217 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */
218 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */
219 };
220
221 /*
222 * if (*root == NULL)
223 * goto tomalloc;
224 *
225 * malloc_data_t *ret = *root;
226 * *root = *(void **)ret;
227 * t->tm_size += csize;
228 * ret->malloc_size = size;
229 *
230 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
231 * ret++;
232 *
233 * return ((void *)ret);
234 * tomalloc:
235 * return (malloc(orig_size));
236 */
237 #define PTC_MALFINI_ALLABEL 0x00
238 #define PTC_MALFINI_JMLABEL 0x20
239 #define PTC_MALFINI_JMADDR 0x25
240 static const uint8_t malfini[] = {
241 /* allocbuf: */
242 0x8b, 0x02, /* movl (%edx), %eax */
243 0x85, 0xc0, /* testl %eax, %eax */
244 0x74, 0x1a, /* je +0x1a (errout) */
245 0x8b, 0x18, /* movl (%eax), %esi */
246 0x89, 0x1a, /* movl %esi, (%edx) */
247 0x29, 0x39, /* subl %edi, (%ecx) */
248 0x89, 0x30, /* movl %esi, ($eax) */
249 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */
250 0x29, 0xf2, /* subl %esi, %edx */
251 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */
252 0x83, 0xc0, 0x08, /* addl %0x8, %eax */
253 0x5b, /* popl %ebx */
254 0x5e, /* popl %esi */
255 0x5f, /* popl %edi */
256 0xc9, /* leave */
257 0xc3, /* ret */
258 /* errout: */
259 0x5b, /* popl %ebx */
260 0x5e, /* popl %esi */
261 0x5f, /* popl %edi */
262 0xc9, /* leave */
263 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */
264 };
265
266 /*
267 * if (t->tm_size + csize > umem_ptc_size)
268 * goto tofree;
269 *
270 * t->tm_size += csize
271 * *(void **)tag = *root;
272 * *root = tag;
273 * return;
274 * tofree:
275 * free(buf);
276 * return;
277 */
278 #define PTC_FRFINI_RBUFLABEL 0x00
279 #define PTC_FRFINI_CACHEMAX 0x06
280 #define PTC_FRFINI_DONELABEL 0x14
281 #define PTC_FRFINI_JFLABEL 0x19
282 #define PTC_FRFINI_JFADDR 0x1e
283 static const uint8_t freefini[] = {
284 /* freebuf: */
285 0x8b, 0x19, /* movl (%ecx),%ebx */
286 0x01, 0xfb, /* addl %edi,%ebx */
287 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */
288 0x73, 0x0d, /* jae +0xd <tofree> */
289 0x01, 0x39, /* addl %edi,(%ecx) */
290 0x8b, 0x3a, /* movl (%edx),%edi */
291 0x89, 0x38, /* movl %edi,(%eax) */
292 0x89, 0x02, /* movl %eax,(%edx) */
293 /* done: */
294 0x5b, /* popl %ebx */
295 0x5e, /* popl %esi */
296 0x5f, /* popl %edi */
297 0xc9, /* leave */
298 0xc3, /* ret */
299 /* realfree: */
300 0x5b, /* popl %ebx */
301 0x5e, /* popl %esi */
302 0x5f, /* popl %edi */
303 0xc9, /* leave */
304 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */
305 };
306
307 /*
308 * Construct the initial part of malloc. off contains the offset from curthread
309 * to the root of the tmem structure. ep is the address of the label to error
310 * and jump to free. csize is the size of the largest umem_cache in ptcumem.
311 */
312 static int
genasm_malinit(uint8_t * bp,uint32_t off,uint32_t ep,uint32_t csize)313 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
314 {
315 uint32_t addr;
316
317 bcopy(malinit, bp, sizeof (malinit));
318 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
319 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
320 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
321 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
322 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
323 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
324
325 return (sizeof (malinit));
326 }
327
328 static int
genasm_frinit(uint8_t * bp,uint32_t off,uint32_t dp,uint32_t ep,uint32_t mc)329 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc)
330 {
331 uint32_t addr;
332
333 bcopy(freeinit, bp, sizeof (freeinit));
334 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
335 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
336 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
337 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
338 bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc));
339 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
340 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
341 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
342 return (sizeof (freeinit));
343 }
344
345 /*
346 * Create the initial cache entry of the specified size. The value of ap tells
347 * us what the address of the label to try and allocate a buffer. This value is
348 * an offset from the current base to that value.
349 */
350 static int
genasm_firstcache(uint8_t * bp,uint32_t csize,uint32_t ap)351 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
352 {
353 uint32_t addr;
354
355 bcopy(inicache, bp, sizeof (inicache));
356 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
357 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
358 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
359 ASSERT(addr != 0);
360 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
361
362 return (sizeof (inicache));
363 }
364
365 static int
genasm_gencache(uint8_t * bp,int num,uint32_t csize,uint32_t ap)366 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
367 {
368 uint32_t addr;
369 uint8_t coff;
370
371 ASSERT(256 / PTC_ROOT_SIZE > num);
372 ASSERT(num != 0);
373 bcopy(gencache, bp, sizeof (gencache));
374 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
375 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
376 coff = num * PTC_ROOT_SIZE;
377 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
378 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
379 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
380
381 return (sizeof (gencache));
382 }
383
384 static int
genasm_lastcache(uint8_t * bp,int num,uint32_t csize,uint32_t ep)385 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
386 {
387 uint8_t addr;
388
389 ASSERT(ep <= 0xff && ep > 7);
390 ASSERT(256 / PTC_ROOT_SIZE > num);
391 bcopy(fincache, bp, sizeof (fincache));
392 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
393 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
394 addr = num * PTC_ROOT_SIZE;
395 bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr));
396 addr = ep - PTC_FINCACHE_JMP - 1;
397 bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr));
398
399 return (sizeof (fincache));
400 }
401
402 static int
genasm_malfini(uint8_t * bp,uintptr_t mptr)403 genasm_malfini(uint8_t *bp, uintptr_t mptr)
404 {
405 uint32_t addr;
406
407 bcopy(malfini, bp, sizeof (malfini));
408 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
409 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
410
411 return (sizeof (malfini));
412 }
413
414 static int
genasm_frfini(uint8_t * bp,uint32_t maxthr,uintptr_t fptr)415 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
416 {
417 uint32_t addr;
418
419 bcopy(freefini, bp, sizeof (freefini));
420 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
421 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
422 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
423
424 return (sizeof (freefini));
425 }
426
427 /*
428 * The malloc inline assembly is constructed as follows:
429 *
430 * o Malloc prologue assembly
431 * o Generic first-cache check
432 * o n Generic cache checks (where n = _tmem_get_entries() - 2)
433 * o Generic last-cache check
434 * o Malloc epilogue assembly
435 *
436 * Generally there are at least three caches. When there is only one cache we
437 * only use the generic last-cache. In the case where there are two caches, we
438 * just leave out the middle ones.
439 */
440 static int
genasm_malloc(void * base,size_t len,int nents,int * umem_alloc_sizes)441 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
442 {
443 int ii, off;
444 uint8_t *bp;
445 size_t total;
446 uint32_t allocoff, erroff;
447
448 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
449
450 if (nents >= 2)
451 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
452
453 if (total > len)
454 return (1);
455
456 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
457 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
458
459 bp = base;
460
461 off = genasm_malinit(bp, umem_tmem_off, erroff,
462 umem_alloc_sizes[nents-1]);
463 bp += off;
464 allocoff -= off;
465 erroff -= off;
466
467 if (nents > 1) {
468 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
469 bp += off;
470 allocoff -= off;
471 erroff -= off;
472 }
473
474 for (ii = 1; ii < nents - 1; ii++) {
475 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
476 bp += off;
477 allocoff -= off;
478 erroff -= off;
479 }
480
481 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
482 erroff);
483 bp += genasm_malfini(bp, umem_genasm_omptr);
484 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
485
486 return (0);
487 }
488
489 static int
genasm_free(void * base,size_t len,int nents,int * umem_alloc_sizes)490 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
491 {
492 uint8_t *bp;
493 int ii, off;
494 size_t total;
495 uint32_t rbufoff, retoff, erroff;
496
497 /* Assume that nents has already been audited for us */
498 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
499 if (nents >= 2)
500 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
501
502 if (total > len)
503 return (1);
504
505 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
506 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
507 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
508
509 bp = base;
510
511 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
512 umem_alloc_sizes[nents - 1]);
513 bp += off;
514 erroff -= off;
515 rbufoff -= off;
516
517 if (nents > 1) {
518 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
519 bp += off;
520 erroff -= off;
521 rbufoff -= off;
522 }
523
524 for (ii = 1; ii < nents - 1; ii++) {
525 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
526 bp += off;
527 rbufoff -= off;
528 erroff -= off;
529 }
530
531 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
532 erroff);
533 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
534 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
535
536 return (0);
537 }
538
539 int
umem_genasm(int * alloc_sizes,umem_cache_t ** caches,int ncaches)540 umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches)
541 {
542 int nents, i;
543 uint8_t *mptr;
544 uint8_t *fptr;
545 uint64_t v, *vptr;
546
547 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
548 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
549 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
550 umem_genasm_fptr == 0 || umem_genasm_fsize == 0)
551 return (1);
552
553 /*
554 * The total number of caches that we can service is the minimum of:
555 * o the amount supported by libc
556 * o the total number of umem caches
557 * o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For
558 * 32-bit, this is 63.
559 */
560 nents = _tmem_get_nentries();
561
562 if (UMEM_GENASM_MAX32 < nents)
563 nents = UMEM_GENASM_MAX32;
564
565 if (ncaches < nents)
566 nents = ncaches;
567
568 /* Based on our constraints, this is not an error */
569 if (nents == 0 || umem_ptc_size == 0)
570 return (0);
571
572 /* Take into account the jump */
573 if (genasm_malloc(mptr, umem_genasm_msize, nents,
574 alloc_sizes) != 0)
575 return (1);
576
577 if (genasm_free(fptr, umem_genasm_fsize, nents,
578 alloc_sizes) != 0)
579 return (1);
580
581 /* nop out the jump with a multibyte jump */
582 vptr = (void *)umem_genasm_mptr;
583 v = MULTINOP;
584 v |= *vptr & (0xffffffULL << 40);
585 (void) atomic_swap_64(vptr, v);
586 vptr = (void *)umem_genasm_fptr;
587 v = MULTINOP;
588 v |= *vptr & (0xffffffULL << 40);
589 (void) atomic_swap_64(vptr, v);
590
591 for (i = 0; i < nents; i++)
592 caches[i]->cache_flags |= UMF_PTC;
593
594 return (0);
595 }
596