1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2019 Joyent, Inc. All rights reserved.
23 */
24
25 /*
26 * Don't Panic! If you find the blocks of assembly that follow confusing and
27 * you're questioning why they exist, please go read section 8 of the umem.c big
28 * theory statement. Next familiarize yourself with the malloc and free
29 * implementations in libumem's malloc.c.
30 *
31 * What follows is the i386 implementation of the thread caching automatic
32 * assembly generation. With i386 a function only has three registers it's
33 * allowed to change without restoring them: eax, ecx, and edx. All others have
34 * to be preserved. Since the set of registers we have available is so small, we
35 * have to make use of esi, ebx, and edi and save their original values to the
36 * stack.
37 *
38 * Malloc register usage:
39 * o. esi: Size of the malloc (passed into us and modified)
40 * o. edi: Size of the cache
41 * o. eax: Buffer to return
42 * o. ebx: Scratch space and temporary values
43 * o. ecx: Pointer to the tmem_t in the ulwp_t.
44 * o. edx: Pointer to the tmem_t array of roots
45 *
46 * Free register usage:
47 * o. esi: Size of the malloc (passed into us and modified)
48 * o. edi: Size of the cache
49 * o. eax: Buffer to free
50 * o. ebx: Scratch space and temporary values
51 * o. ecx: Pointer to the tmem_t in the ulwp_t.
52 * o. edx: Pointer to the tmem_t array of roots
53 *
54 * Once we determine what cache we are using, we increment %edx to the
55 * appropriate offset and set %edi with the size of the cache. This means that
56 * when we break out to the normal buffer allocation point %edx contains the
57 * head of the linked list and %edi is the amount that we have to adjust the
58 * total amount cached by the thread.
59 *
60 * Each block of assembly has psuedocode that describes its purpose.
61 */
62
63 /*
64 * umem_base must be first.
65 */
66 #include "umem_base.h"
67
68 #include <inttypes.h>
69 #include <strings.h>
70 #include <umem_impl.h>
71 #include <atomic.h>
72 #include <sys/mman.h>
73 #include <errno.h>
74
75 const int umem_genasm_supported = 1;
76 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
77 static size_t umem_genasm_msize = 512;
78 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free;
79 static size_t umem_genasm_fsize = 512;
80 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc;
81 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free;
82 /*
83 * The maximum number of caches we can support. We use a single byte addl so
84 * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63
85 */
86 #define UMEM_GENASM_MAX32 63
87
88 #define PTC_JMPADDR(dest, src) (dest - (src + 4))
89 #define PTC_ROOT_SIZE sizeof (uintptr_t)
90 #define MULTINOP 0x0000441f0f
91
92 /*
93 * void *ptcmalloc(size_t orig_size);
94 *
95 * size_t size = orig_size + 8;
96 *
97 * if (size < orig_size)
98 * goto tomalloc; ! This is overflow
99 *
100 * if (size > cache_size)
101 * goto tomalloc;
102 *
103 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
104 * void **roots = t->tm_roots;
105 */
106 #define PTC_MALINIT_JOUT 0x0e
107 #define PTC_MALINIT_MCS 0x14
108 #define PTC_MALINIT_JOV 0x1a
109 #define PTC_MALINIT_SOFF 0x27
110 static const uint8_t malinit[] = {
111 0x55, /* pushl %ebp */
112 0x89, 0xe5, /* movl %esp, %ebp */
113 0x57, /* pushl %edi */
114 0x56, /* pushl %esi */
115 0x53, /* pushl %ebx */
116 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */
117 0x83, 0xc6, 0x08, /* addl $0x8,%esi */
118 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */
119 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
120 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
121 0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */
122 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */
123 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */
124 };
125
126 /*
127 * void ptcfree(void *buf);
128 *
129 * if (buf == NULL)
130 * return;
131 *
132 * malloc_data_t *tag = buf;
133 * tag--;
134 * int size = tag->malloc_size;
135 * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size);
136 *
137 * if (tagval != MALLOC_MAGIC)
138 * goto tofree;
139 *
140 * if (size > cache_max)
141 * goto tofree;
142 *
143 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset;
144 * void **roots = t->tm_roots;
145 */
146 #define PTC_FRINI_JDONE 0x0d
147 #define PTC_FRINI_JFREE 0x23
148 #define PTC_FRINI_MCS 0x29
149 #define PTC_FRINI_JOV 0x2f
150 #define PTC_FRINI_SOFF 0x3c
151 static const uint8_t freeinit[] = {
152 0x55, /* pushl %ebp */
153 0x89, 0xe5, /* movl %esp, %ebp */
154 0x57, /* pushl %edi */
155 0x56, /* pushl %esi */
156 0x53, /* pushl %ebx */
157 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */
158 0x85, 0xc0, /* testl %eax, %eax */
159 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */
160 0x83, 0xe8, 0x08, /* subl $0x8,%eax */
161 0x8b, 0x30, /* movl (%eax),%esi */
162 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */
163 0x01, 0xf2, /* addl %esi,%edx */
164 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */
165 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */
166
167 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
168 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */
169 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */
170 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */
171 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */
172 };
173
174 /*
175 * if (size <= $CACHE_SIZE) {
176 * csize = $CACHE_SIZE;
177 * } else ... ! goto next cache
178 */
179 #define PTC_INICACHE_CMP 0x02
180 #define PTC_INICACHE_SIZE 0x09
181 #define PTC_INICACHE_JMP 0x0e
182 static const uint8_t inicache[] = {
183 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */
184 0x77, 0x0a, /* ja +0xa */
185 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */
186 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
187 };
188
189 /*
190 * if (size <= $CACHE_SIZE) {
191 * csize = $CACHE_SIZE;
192 * roots += $CACHE_NUM;
193 * } else ... ! goto next cache
194 */
195 #define PTC_GENCACHE_CMP 0x02
196 #define PTC_GENCACHE_NUM 0x0a
197 #define PTC_GENCACHE_SIZE 0x0c
198 #define PTC_GENCACHE_JMP 0x11
199 static const uint8_t gencache[] = {
200 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */
201 0x77, 0x0d, /* ja +0xd (next cache) */
202 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */
203 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */
204 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */
205 };
206
207 /*
208 * else if (size <= $CACHE_SIZE) {
209 * csize = $CACHE_SIZE;
210 * roots += $CACHE_NUM;
211 * } else {
212 * goto tofunc; ! goto tomalloc if ptcmalloc.
213 * } ! goto tofree if ptcfree.
214 */
215 #define PTC_FINCACHE_CMP 0x02
216 #define PTC_FINCACHE_JMP 0x07
217 #define PTC_FINCACHE_NUM 0x0a
218 #define PTC_FINCACHE_SIZE 0x0c
219 static const uint8_t fincache[] = {
220 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */
221 0x77, 0x00, /* ja +$JMP (to errout) */
222 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */
223 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */
224 };
225
226 /*
227 * if (*root == NULL)
228 * goto tomalloc;
229 *
230 * malloc_data_t *ret = *root;
231 * *root = *(void **)ret;
232 * t->tm_size += csize;
233 * ret->malloc_size = size;
234 *
235 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size);
236 * ret++;
237 *
238 * return ((void *)ret);
239 * tomalloc:
240 * return (malloc(orig_size));
241 */
242 #define PTC_MALFINI_ALLABEL 0x00
243 #define PTC_MALFINI_JMLABEL 0x20
244 #define PTC_MALFINI_JMADDR 0x25
245 static const uint8_t malfini[] = {
246 /* allocbuf: */
247 0x8b, 0x02, /* movl (%edx), %eax */
248 0x85, 0xc0, /* testl %eax, %eax */
249 0x74, 0x1a, /* je +0x1a (errout) */
250 0x8b, 0x18, /* movl (%eax), %esi */
251 0x89, 0x1a, /* movl %esi, (%edx) */
252 0x29, 0x39, /* subl %edi, (%ecx) */
253 0x89, 0x30, /* movl %esi, ($eax) */
254 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */
255 0x29, 0xf2, /* subl %esi, %edx */
256 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */
257 0x83, 0xc0, 0x08, /* addl %0x8, %eax */
258 0x5b, /* popl %ebx */
259 0x5e, /* popl %esi */
260 0x5f, /* popl %edi */
261 0xc9, /* leave */
262 0xc3, /* ret */
263 /* errout: */
264 0x5b, /* popl %ebx */
265 0x5e, /* popl %esi */
266 0x5f, /* popl %edi */
267 0xc9, /* leave */
268 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */
269 };
270
271 /*
272 * if (t->tm_size + csize > umem_ptc_size)
273 * goto tofree;
274 *
275 * t->tm_size += csize
276 * *(void **)tag = *root;
277 * *root = tag;
278 * return;
279 * tofree:
280 * free(buf);
281 * return;
282 */
283 #define PTC_FRFINI_RBUFLABEL 0x00
284 #define PTC_FRFINI_CACHEMAX 0x06
285 #define PTC_FRFINI_DONELABEL 0x14
286 #define PTC_FRFINI_JFLABEL 0x19
287 #define PTC_FRFINI_JFADDR 0x1e
288 static const uint8_t freefini[] = {
289 /* freebuf: */
290 0x8b, 0x19, /* movl (%ecx),%ebx */
291 0x01, 0xfb, /* addl %edi,%ebx */
292 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */
293 0x73, 0x0d, /* jae +0xd <tofree> */
294 0x01, 0x39, /* addl %edi,(%ecx) */
295 0x8b, 0x3a, /* movl (%edx),%edi */
296 0x89, 0x38, /* movl %edi,(%eax) */
297 0x89, 0x02, /* movl %eax,(%edx) */
298 /* done: */
299 0x5b, /* popl %ebx */
300 0x5e, /* popl %esi */
301 0x5f, /* popl %edi */
302 0xc9, /* leave */
303 0xc3, /* ret */
304 /* realfree: */
305 0x5b, /* popl %ebx */
306 0x5e, /* popl %esi */
307 0x5f, /* popl %edi */
308 0xc9, /* leave */
309 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */
310 };
311
312 /*
313 * Construct the initial part of malloc. off contains the offset from curthread
314 * to the root of the tmem structure. ep is the address of the label to error
315 * and jump to free. csize is the size of the largest umem_cache in ptcumem.
316 */
317 static int
genasm_malinit(uint8_t * bp,uint32_t off,uint32_t ep,uint32_t csize)318 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize)
319 {
320 uint32_t addr;
321
322 bcopy(malinit, bp, sizeof (malinit));
323 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT);
324 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr));
325 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize));
326 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV);
327 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr));
328 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off));
329
330 return (sizeof (malinit));
331 }
332
333 static int
genasm_frinit(uint8_t * bp,uint32_t off,uint32_t dp,uint32_t ep,uint32_t mc)334 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc)
335 {
336 uint32_t addr;
337
338 bcopy(freeinit, bp, sizeof (freeinit));
339 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE);
340 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr));
341 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE);
342 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr));
343 bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc));
344 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV);
345 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr));
346 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off));
347 return (sizeof (freeinit));
348 }
349
350 /*
351 * Create the initial cache entry of the specified size. The value of ap tells
352 * us what the address of the label to try and allocate a buffer. This value is
353 * an offset from the current base to that value.
354 */
355 static int
genasm_firstcache(uint8_t * bp,uint32_t csize,uint32_t ap)356 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap)
357 {
358 uint32_t addr;
359
360 bcopy(inicache, bp, sizeof (inicache));
361 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize));
362 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize));
363 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP);
364 ASSERT(addr != 0);
365 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr));
366
367 return (sizeof (inicache));
368 }
369
370 static int
genasm_gencache(uint8_t * bp,int num,uint32_t csize,uint32_t ap)371 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap)
372 {
373 uint32_t addr;
374 uint8_t coff;
375
376 ASSERT(256 / PTC_ROOT_SIZE > num);
377 ASSERT(num != 0);
378 bcopy(gencache, bp, sizeof (gencache));
379 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize));
380 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize));
381 coff = num * PTC_ROOT_SIZE;
382 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff));
383 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP);
384 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr));
385
386 return (sizeof (gencache));
387 }
388
389 static int
genasm_lastcache(uint8_t * bp,int num,uint32_t csize,uint32_t ep)390 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep)
391 {
392 uint8_t addr;
393
394 ASSERT(ep <= 0xff && ep > 7);
395 ASSERT(256 / PTC_ROOT_SIZE > num);
396 bcopy(fincache, bp, sizeof (fincache));
397 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize));
398 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize));
399 addr = num * PTC_ROOT_SIZE;
400 bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr));
401 addr = ep - PTC_FINCACHE_JMP - 1;
402 bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr));
403
404 return (sizeof (fincache));
405 }
406
407 static int
genasm_malfini(uint8_t * bp,uintptr_t mptr)408 genasm_malfini(uint8_t *bp, uintptr_t mptr)
409 {
410 uint32_t addr;
411
412 bcopy(malfini, bp, sizeof (malfini));
413 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR));
414 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr));
415
416 return (sizeof (malfini));
417 }
418
419 static int
genasm_frfini(uint8_t * bp,uint32_t maxthr,uintptr_t fptr)420 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr)
421 {
422 uint32_t addr;
423
424 bcopy(freefini, bp, sizeof (freefini));
425 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr));
426 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR));
427 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr));
428
429 return (sizeof (freefini));
430 }
431
432 /*
433 * The malloc inline assembly is constructed as follows:
434 *
435 * o Malloc prologue assembly
436 * o Generic first-cache check
437 * o n Generic cache checks (where n = _tmem_get_entries() - 2)
438 * o Generic last-cache check
439 * o Malloc epilogue assembly
440 *
441 * Generally there are at least three caches. When there is only one cache we
442 * only use the generic last-cache. In the case where there are two caches, we
443 * just leave out the middle ones.
444 */
445 static int
genasm_malloc(void * base,size_t len,int nents,int * umem_alloc_sizes)446 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes)
447 {
448 int ii, off;
449 uint8_t *bp;
450 size_t total;
451 uint32_t allocoff, erroff;
452
453 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache);
454
455 if (nents >= 2)
456 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
457
458 if (total > len)
459 return (1);
460
461 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL;
462 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL;
463
464 bp = base;
465
466 off = genasm_malinit(bp, umem_tmem_off, erroff,
467 umem_alloc_sizes[nents-1]);
468 bp += off;
469 allocoff -= off;
470 erroff -= off;
471
472 if (nents > 1) {
473 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff);
474 bp += off;
475 allocoff -= off;
476 erroff -= off;
477 }
478
479 for (ii = 1; ii < nents - 1; ii++) {
480 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff);
481 bp += off;
482 allocoff -= off;
483 erroff -= off;
484 }
485
486 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
487 erroff);
488 bp += genasm_malfini(bp, umem_genasm_omptr);
489 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
490
491 return (0);
492 }
493
494 static int
genasm_free(void * base,size_t len,int nents,int * umem_alloc_sizes)495 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes)
496 {
497 uint8_t *bp;
498 int ii, off;
499 size_t total;
500 uint32_t rbufoff, retoff, erroff;
501
502 /* Assume that nents has already been audited for us */
503 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache);
504 if (nents >= 2)
505 total += sizeof (inicache) + sizeof (gencache) * (nents - 2);
506
507 if (total > len)
508 return (1);
509
510 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL);
511 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL);
512 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL);
513
514 bp = base;
515
516 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff,
517 umem_alloc_sizes[nents - 1]);
518 bp += off;
519 erroff -= off;
520 rbufoff -= off;
521
522 if (nents > 1) {
523 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff);
524 bp += off;
525 erroff -= off;
526 rbufoff -= off;
527 }
528
529 for (ii = 1; ii < nents - 1; ii++) {
530 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff);
531 bp += off;
532 rbufoff -= off;
533 erroff -= off;
534 }
535
536 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1],
537 erroff);
538 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr);
539 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base);
540
541 return (0);
542 }
543
544 boolean_t
umem_genasm(int * alloc_sizes,umem_cache_t ** caches,int ncaches)545 umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches)
546 {
547 int nents, i;
548 uint8_t *mptr;
549 uint8_t *fptr;
550 uint64_t v, *vptr;
551 size_t mplen, fplen;
552 uintptr_t mpbase, fpbase;
553 boolean_t ret = B_FALSE;
554
555 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5);
556 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5);
557 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 ||
558 umem_genasm_fptr == 0 || umem_genasm_fsize == 0) {
559 return (B_FALSE);
560 }
561
562 mplen = P2ROUNDUP(umem_genasm_msize, pagesize);
563 mpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize);
564 fplen = P2ROUNDUP(umem_genasm_fsize, pagesize);
565 fpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize);
566
567 /*
568 * If the values straddle a page boundary, then we might need to
569 * actually remap two pages.
570 */
571 if (P2ALIGN(umem_genasm_msize + (uintptr_t)umem_genasm_mptr,
572 pagesize) != mpbase) {
573 mplen += pagesize;
574 }
575
576 if (P2ALIGN(umem_genasm_fsize + (uintptr_t)umem_genasm_fptr,
577 pagesize) != fpbase) {
578 fplen += pagesize;
579 }
580
581 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_WRITE |
582 PROT_EXEC) != 0) {
583 return (B_FALSE);
584 }
585
586 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_WRITE |
587 PROT_EXEC) != 0) {
588 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) !=
589 0) {
590 umem_panic("genasm failed to restore memory "
591 "protection: %d", errno);
592 }
593 return (B_FALSE);
594 }
595
596 /*
597 * The total number of caches that we can service is the minimum of:
598 * o the amount supported by libc
599 * o the total number of umem caches
600 * o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For
601 * 32-bit, this is 63.
602 */
603 nents = _tmem_get_nentries();
604
605 if (UMEM_GENASM_MAX32 < nents)
606 nents = UMEM_GENASM_MAX32;
607
608 if (ncaches < nents)
609 nents = ncaches;
610
611 /*
612 * If the number of per-thread caches has been set to zero or the
613 * per-thread cache size has been set to zero, don't bother trying to
614 * write any assembly and just use the default malloc and free. When we
615 * return, indicate that there is no PTC support.
616 */
617 if (nents == 0 || umem_ptc_size == 0) {
618 goto out;
619 }
620
621 /* Take into account the jump */
622 if (genasm_malloc(mptr, umem_genasm_msize, nents,
623 alloc_sizes) != 0) {
624 goto out;
625 }
626
627 if (genasm_free(fptr, umem_genasm_fsize, nents,
628 alloc_sizes) != 0) {
629 goto out;
630 }
631
632 /* nop out the jump with a multibyte jump */
633 vptr = (void *)umem_genasm_mptr;
634 v = MULTINOP;
635 v |= *vptr & (0xffffffULL << 40);
636 (void) atomic_swap_64(vptr, v);
637 vptr = (void *)umem_genasm_fptr;
638 v = MULTINOP;
639 v |= *vptr & (0xffffffULL << 40);
640 (void) atomic_swap_64(vptr, v);
641
642 for (i = 0; i < nents; i++)
643 caches[i]->cache_flags |= UMF_PTC;
644
645 ret = B_TRUE;
646 out:
647 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 0) {
648 umem_panic("genasm failed to restore memory protection: %d",
649 errno);
650 }
651
652 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_EXEC) != 0) {
653 umem_panic("genasm failed to restore memory protection: %d",
654 errno);
655 }
656
657 return (ret);
658 }
659