1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2019 Joyent, Inc. All rights reserved. 23 */ 24 25 /* 26 * Don't Panic! If you find the blocks of assembly that follow confusing and 27 * you're questioning why they exist, please go read section 8 of the umem.c big 28 * theory statement. Next familiarize yourself with the malloc and free 29 * implementations in libumem's malloc.c. 30 * 31 * What follows is the i386 implementation of the thread caching automatic 32 * assembly generation. With i386 a function only has three registers it's 33 * allowed to change without restoring them: eax, ecx, and edx. All others have 34 * to be preserved. Since the set of registers we have available is so small, we 35 * have to make use of esi, ebx, and edi and save their original values to the 36 * stack. 37 * 38 * Malloc register usage: 39 * o. esi: Size of the malloc (passed into us and modified) 40 * o. edi: Size of the cache 41 * o. eax: Buffer to return 42 * o. ebx: Scratch space and temporary values 43 * o. ecx: Pointer to the tmem_t in the ulwp_t. 44 * o. edx: Pointer to the tmem_t array of roots 45 * 46 * Free register usage: 47 * o. esi: Size of the malloc (passed into us and modified) 48 * o. edi: Size of the cache 49 * o. eax: Buffer to free 50 * o. ebx: Scratch space and temporary values 51 * o. ecx: Pointer to the tmem_t in the ulwp_t. 52 * o. edx: Pointer to the tmem_t array of roots 53 * 54 * Once we determine what cache we are using, we increment %edx to the 55 * appropriate offset and set %edi with the size of the cache. This means that 56 * when we break out to the normal buffer allocation point %edx contains the 57 * head of the linked list and %edi is the amount that we have to adjust the 58 * total amount cached by the thread. 59 * 60 * Each block of assembly has psuedocode that describes its purpose. 61 */ 62 63 /* 64 * umem_base must be first. 65 */ 66 #include "umem_base.h" 67 68 #include <inttypes.h> 69 #include <strings.h> 70 #include <umem_impl.h> 71 #include <atomic.h> 72 #include <sys/mman.h> 73 #include <errno.h> 74 75 const int umem_genasm_supported = 1; 76 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; 77 static size_t umem_genasm_msize = 512; 78 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; 79 static size_t umem_genasm_fsize = 512; 80 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; 81 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; 82 /* 83 * The maximum number of caches we can support. We use a single byte addl so 84 * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63 85 */ 86 #define UMEM_GENASM_MAX32 63 87 88 #define PTC_JMPADDR(dest, src) (dest - (src + 4)) 89 #define PTC_ROOT_SIZE sizeof (uintptr_t) 90 #define MULTINOP 0x0000441f0f 91 92 /* 93 * void *ptcmalloc(size_t orig_size); 94 * 95 * size_t size = orig_size + 8; 96 * 97 * if (size < orig_size) 98 * goto tomalloc; ! This is overflow 99 * 100 * if (size > cache_size) 101 * goto tomalloc; 102 * 103 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 104 * void **roots = t->tm_roots; 105 */ 106 #define PTC_MALINIT_JOUT 0x0e 107 #define PTC_MALINIT_MCS 0x14 108 #define PTC_MALINIT_JOV 0x1a 109 #define PTC_MALINIT_SOFF 0x27 110 static const uint8_t malinit[] = { 111 0x55, /* pushl %ebp */ 112 0x89, 0xe5, /* movl %esp, %ebp */ 113 0x57, /* pushl %edi */ 114 0x56, /* pushl %esi */ 115 0x53, /* pushl %ebx */ 116 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */ 117 0x83, 0xc6, 0x08, /* addl $0x8,%esi */ 118 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */ 119 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 120 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ 121 0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */ 122 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */ 123 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */ 124 }; 125 126 /* 127 * void ptcfree(void *buf); 128 * 129 * if (buf == NULL) 130 * return; 131 * 132 * malloc_data_t *tag = buf; 133 * tag--; 134 * int size = tag->malloc_size; 135 * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); 136 * 137 * if (tagval != MALLOC_MAGIC) 138 * goto tofree; 139 * 140 * if (size > cache_max) 141 * goto tofree; 142 * 143 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 144 * void **roots = t->tm_roots; 145 */ 146 #define PTC_FRINI_JDONE 0x0d 147 #define PTC_FRINI_JFREE 0x23 148 #define PTC_FRINI_MCS 0x29 149 #define PTC_FRINI_JOV 0x2f 150 #define PTC_FRINI_SOFF 0x3c 151 static const uint8_t freeinit[] = { 152 0x55, /* pushl %ebp */ 153 0x89, 0xe5, /* movl %esp, %ebp */ 154 0x57, /* pushl %edi */ 155 0x56, /* pushl %esi */ 156 0x53, /* pushl %ebx */ 157 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */ 158 0x85, 0xc0, /* testl %eax, %eax */ 159 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */ 160 0x83, 0xe8, 0x08, /* subl $0x8,%eax */ 161 0x8b, 0x30, /* movl (%eax),%esi */ 162 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */ 163 0x01, 0xf2, /* addl %esi,%edx */ 164 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */ 165 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */ 166 167 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 168 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ 169 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ 170 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */ 171 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */ 172 }; 173 174 /* 175 * if (size <= $CACHE_SIZE) { 176 * csize = $CACHE_SIZE; 177 * } else ... ! goto next cache 178 */ 179 #define PTC_INICACHE_CMP 0x02 180 #define PTC_INICACHE_SIZE 0x09 181 #define PTC_INICACHE_JMP 0x0e 182 static const uint8_t inicache[] = { 183 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 184 0x77, 0x0a, /* ja +0xa */ 185 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */ 186 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ 187 }; 188 189 /* 190 * if (size <= $CACHE_SIZE) { 191 * csize = $CACHE_SIZE; 192 * roots += $CACHE_NUM; 193 * } else ... ! goto next cache 194 */ 195 #define PTC_GENCACHE_CMP 0x02 196 #define PTC_GENCACHE_NUM 0x0a 197 #define PTC_GENCACHE_SIZE 0x0c 198 #define PTC_GENCACHE_JMP 0x11 199 static const uint8_t gencache[] = { 200 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */ 201 0x77, 0x0d, /* ja +0xd (next cache) */ 202 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */ 203 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */ 204 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ 205 }; 206 207 /* 208 * else if (size <= $CACHE_SIZE) { 209 * csize = $CACHE_SIZE; 210 * roots += $CACHE_NUM; 211 * } else { 212 * goto tofunc; ! goto tomalloc if ptcmalloc. 213 * } ! goto tofree if ptcfree. 214 */ 215 #define PTC_FINCACHE_CMP 0x02 216 #define PTC_FINCACHE_JMP 0x07 217 #define PTC_FINCACHE_NUM 0x0a 218 #define PTC_FINCACHE_SIZE 0x0c 219 static const uint8_t fincache[] = { 220 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */ 221 0x77, 0x00, /* ja +$JMP (to errout) */ 222 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */ 223 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */ 224 }; 225 226 /* 227 * if (*root == NULL) 228 * goto tomalloc; 229 * 230 * malloc_data_t *ret = *root; 231 * *root = *(void **)ret; 232 * t->tm_size += csize; 233 * ret->malloc_size = size; 234 * 235 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 236 * ret++; 237 * 238 * return ((void *)ret); 239 * tomalloc: 240 * return (malloc(orig_size)); 241 */ 242 #define PTC_MALFINI_ALLABEL 0x00 243 #define PTC_MALFINI_JMLABEL 0x20 244 #define PTC_MALFINI_JMADDR 0x25 245 static const uint8_t malfini[] = { 246 /* allocbuf: */ 247 0x8b, 0x02, /* movl (%edx), %eax */ 248 0x85, 0xc0, /* testl %eax, %eax */ 249 0x74, 0x1a, /* je +0x1a (errout) */ 250 0x8b, 0x18, /* movl (%eax), %esi */ 251 0x89, 0x1a, /* movl %esi, (%edx) */ 252 0x29, 0x39, /* subl %edi, (%ecx) */ 253 0x89, 0x30, /* movl %esi, ($eax) */ 254 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */ 255 0x29, 0xf2, /* subl %esi, %edx */ 256 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */ 257 0x83, 0xc0, 0x08, /* addl %0x8, %eax */ 258 0x5b, /* popl %ebx */ 259 0x5e, /* popl %esi */ 260 0x5f, /* popl %edi */ 261 0xc9, /* leave */ 262 0xc3, /* ret */ 263 /* errout: */ 264 0x5b, /* popl %ebx */ 265 0x5e, /* popl %esi */ 266 0x5f, /* popl %edi */ 267 0xc9, /* leave */ 268 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */ 269 }; 270 271 /* 272 * if (t->tm_size + csize > umem_ptc_size) 273 * goto tofree; 274 * 275 * t->tm_size += csize 276 * *(void **)tag = *root; 277 * *root = tag; 278 * return; 279 * tofree: 280 * free(buf); 281 * return; 282 */ 283 #define PTC_FRFINI_RBUFLABEL 0x00 284 #define PTC_FRFINI_CACHEMAX 0x06 285 #define PTC_FRFINI_DONELABEL 0x14 286 #define PTC_FRFINI_JFLABEL 0x19 287 #define PTC_FRFINI_JFADDR 0x1e 288 static const uint8_t freefini[] = { 289 /* freebuf: */ 290 0x8b, 0x19, /* movl (%ecx),%ebx */ 291 0x01, 0xfb, /* addl %edi,%ebx */ 292 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */ 293 0x73, 0x0d, /* jae +0xd <tofree> */ 294 0x01, 0x39, /* addl %edi,(%ecx) */ 295 0x8b, 0x3a, /* movl (%edx),%edi */ 296 0x89, 0x38, /* movl %edi,(%eax) */ 297 0x89, 0x02, /* movl %eax,(%edx) */ 298 /* done: */ 299 0x5b, /* popl %ebx */ 300 0x5e, /* popl %esi */ 301 0x5f, /* popl %edi */ 302 0xc9, /* leave */ 303 0xc3, /* ret */ 304 /* realfree: */ 305 0x5b, /* popl %ebx */ 306 0x5e, /* popl %esi */ 307 0x5f, /* popl %edi */ 308 0xc9, /* leave */ 309 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ 310 }; 311 312 /* 313 * Construct the initial part of malloc. off contains the offset from curthread 314 * to the root of the tmem structure. ep is the address of the label to error 315 * and jump to free. csize is the size of the largest umem_cache in ptcumem. 316 */ 317 static int 318 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) 319 { 320 uint32_t addr; 321 322 bcopy(malinit, bp, sizeof (malinit)); 323 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); 324 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); 325 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); 326 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); 327 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); 328 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); 329 330 return (sizeof (malinit)); 331 } 332 333 static int 334 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc) 335 { 336 uint32_t addr; 337 338 bcopy(freeinit, bp, sizeof (freeinit)); 339 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); 340 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); 341 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); 342 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); 343 bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc)); 344 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); 345 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); 346 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); 347 return (sizeof (freeinit)); 348 } 349 350 /* 351 * Create the initial cache entry of the specified size. The value of ap tells 352 * us what the address of the label to try and allocate a buffer. This value is 353 * an offset from the current base to that value. 354 */ 355 static int 356 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) 357 { 358 uint32_t addr; 359 360 bcopy(inicache, bp, sizeof (inicache)); 361 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); 362 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); 363 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); 364 ASSERT(addr != 0); 365 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); 366 367 return (sizeof (inicache)); 368 } 369 370 static int 371 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) 372 { 373 uint32_t addr; 374 uint8_t coff; 375 376 ASSERT(256 / PTC_ROOT_SIZE > num); 377 ASSERT(num != 0); 378 bcopy(gencache, bp, sizeof (gencache)); 379 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); 380 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); 381 coff = num * PTC_ROOT_SIZE; 382 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); 383 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); 384 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); 385 386 return (sizeof (gencache)); 387 } 388 389 static int 390 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) 391 { 392 uint8_t addr; 393 394 ASSERT(ep <= 0xff && ep > 7); 395 ASSERT(256 / PTC_ROOT_SIZE > num); 396 bcopy(fincache, bp, sizeof (fincache)); 397 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); 398 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); 399 addr = num * PTC_ROOT_SIZE; 400 bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr)); 401 addr = ep - PTC_FINCACHE_JMP - 1; 402 bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr)); 403 404 return (sizeof (fincache)); 405 } 406 407 static int 408 genasm_malfini(uint8_t *bp, uintptr_t mptr) 409 { 410 uint32_t addr; 411 412 bcopy(malfini, bp, sizeof (malfini)); 413 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); 414 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); 415 416 return (sizeof (malfini)); 417 } 418 419 static int 420 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) 421 { 422 uint32_t addr; 423 424 bcopy(freefini, bp, sizeof (freefini)); 425 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); 426 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); 427 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); 428 429 return (sizeof (freefini)); 430 } 431 432 /* 433 * The malloc inline assembly is constructed as follows: 434 * 435 * o Malloc prologue assembly 436 * o Generic first-cache check 437 * o n Generic cache checks (where n = _tmem_get_entries() - 2) 438 * o Generic last-cache check 439 * o Malloc epilogue assembly 440 * 441 * Generally there are at least three caches. When there is only one cache we 442 * only use the generic last-cache. In the case where there are two caches, we 443 * just leave out the middle ones. 444 */ 445 static int 446 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) 447 { 448 int ii, off; 449 uint8_t *bp; 450 size_t total; 451 uint32_t allocoff, erroff; 452 453 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); 454 455 if (nents >= 2) 456 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 457 458 if (total > len) 459 return (1); 460 461 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; 462 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; 463 464 bp = base; 465 466 off = genasm_malinit(bp, umem_tmem_off, erroff, 467 umem_alloc_sizes[nents-1]); 468 bp += off; 469 allocoff -= off; 470 erroff -= off; 471 472 if (nents > 1) { 473 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); 474 bp += off; 475 allocoff -= off; 476 erroff -= off; 477 } 478 479 for (ii = 1; ii < nents - 1; ii++) { 480 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); 481 bp += off; 482 allocoff -= off; 483 erroff -= off; 484 } 485 486 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 487 erroff); 488 bp += genasm_malfini(bp, umem_genasm_omptr); 489 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 490 491 return (0); 492 } 493 494 static int 495 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) 496 { 497 uint8_t *bp; 498 int ii, off; 499 size_t total; 500 uint32_t rbufoff, retoff, erroff; 501 502 /* Assume that nents has already been audited for us */ 503 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); 504 if (nents >= 2) 505 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 506 507 if (total > len) 508 return (1); 509 510 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); 511 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); 512 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); 513 514 bp = base; 515 516 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, 517 umem_alloc_sizes[nents - 1]); 518 bp += off; 519 erroff -= off; 520 rbufoff -= off; 521 522 if (nents > 1) { 523 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); 524 bp += off; 525 erroff -= off; 526 rbufoff -= off; 527 } 528 529 for (ii = 1; ii < nents - 1; ii++) { 530 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); 531 bp += off; 532 rbufoff -= off; 533 erroff -= off; 534 } 535 536 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 537 erroff); 538 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); 539 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 540 541 return (0); 542 } 543 544 boolean_t 545 umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches) 546 { 547 int nents, i; 548 uint8_t *mptr; 549 uint8_t *fptr; 550 uint64_t v, *vptr; 551 size_t mplen, fplen; 552 uintptr_t mpbase, fpbase; 553 boolean_t ret = B_FALSE; 554 555 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); 556 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); 557 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || 558 umem_genasm_fptr == 0 || umem_genasm_fsize == 0) { 559 return (B_FALSE); 560 } 561 562 mplen = P2ROUNDUP(umem_genasm_msize, pagesize); 563 mpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize); 564 fplen = P2ROUNDUP(umem_genasm_fsize, pagesize); 565 fpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize); 566 567 /* 568 * If the values straddle a page boundary, then we might need to 569 * actually remap two pages. 570 */ 571 if (P2ALIGN(umem_genasm_msize + (uintptr_t)umem_genasm_mptr, 572 pagesize) != mpbase) { 573 mplen += pagesize; 574 } 575 576 if (P2ALIGN(umem_genasm_fsize + (uintptr_t)umem_genasm_fptr, 577 pagesize) != fpbase) { 578 fplen += pagesize; 579 } 580 581 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_WRITE | 582 PROT_EXEC) != 0) { 583 return (B_FALSE); 584 } 585 586 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_WRITE | 587 PROT_EXEC) != 0) { 588 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 589 0) { 590 umem_panic("genasm failed to restore memory " 591 "protection: %d", errno); 592 } 593 return (B_FALSE); 594 } 595 596 /* 597 * The total number of caches that we can service is the minimum of: 598 * o the amount supported by libc 599 * o the total number of umem caches 600 * o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For 601 * 32-bit, this is 63. 602 */ 603 nents = _tmem_get_nentries(); 604 605 if (UMEM_GENASM_MAX32 < nents) 606 nents = UMEM_GENASM_MAX32; 607 608 if (ncaches < nents) 609 nents = ncaches; 610 611 /* 612 * If the number of per-thread caches has been set to zero or the 613 * per-thread cache size has been set to zero, don't bother trying to 614 * write any assembly and just use the default malloc and free. When we 615 * return, indicate that there is no PTC support. 616 */ 617 if (nents == 0 || umem_ptc_size == 0) { 618 goto out; 619 } 620 621 /* Take into account the jump */ 622 if (genasm_malloc(mptr, umem_genasm_msize, nents, 623 alloc_sizes) != 0) { 624 goto out; 625 } 626 627 if (genasm_free(fptr, umem_genasm_fsize, nents, 628 alloc_sizes) != 0) { 629 goto out; 630 } 631 632 /* nop out the jump with a multibyte jump */ 633 vptr = (void *)umem_genasm_mptr; 634 v = MULTINOP; 635 v |= *vptr & (0xffffffULL << 40); 636 (void) atomic_swap_64(vptr, v); 637 vptr = (void *)umem_genasm_fptr; 638 v = MULTINOP; 639 v |= *vptr & (0xffffffULL << 40); 640 (void) atomic_swap_64(vptr, v); 641 642 for (i = 0; i < nents; i++) 643 caches[i]->cache_flags |= UMF_PTC; 644 645 ret = B_TRUE; 646 out: 647 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 0) { 648 umem_panic("genasm failed to restore memory protection: %d", 649 errno); 650 } 651 652 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_EXEC) != 0) { 653 umem_panic("genasm failed to restore memory protection: %d", 654 errno); 655 } 656 657 return (ret); 658 } 659