1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2019 Joyent, Inc. All rights reserved. 23 */ 24 25 /* 26 * Don't Panic! If you find the blocks of assembly that follow confusing and 27 * you're questioning why they exist, please go read section 8 of the umem.c big 28 * theory statement. Next familiarize yourself with the malloc and free 29 * implementations in libumem's malloc.c. 30 * 31 * What follows is the amd64 implementation of the thread caching automatic 32 * assembly generation. The amd64 calling conventions are documented in the 33 * 64-bit System V ABI. For our purposes what matters is that our first argument 34 * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We 35 * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11. 36 * 37 * For both our implementation of malloc and free we only use the registers we 38 * don't have to preserve. 39 * 40 * Malloc register usage: 41 * o. rdi: Original size to malloc. This never changes and is preserved. 42 * o. rsi: Adjusted malloc size for malloc_data_tag(s). 43 * o. rcx: Pointer to the tmem_t in the ulwp_t. 44 * o. rdx: Pointer to the tmem_t array of roots 45 * o. r8: Size of the cache 46 * o. r9: Scratch register 47 * 48 * Free register usage: 49 * o. rdi: Original buffer to free. This never changes and is preserved. 50 * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s). 51 * o. rcx: Pointer to the tmem_t in the ulwp_t. 52 * o. rdx: Pointer to the tmem_t array of roots 53 * o. r8: Size of the cache 54 * o. r9: Scratch register 55 * 56 * Once we determine what cache we are using, we increment %rdx to the 57 * appropriate offset and set %r8 with the size of the cache. This means that 58 * when we break out to the normal buffer allocation point %rdx contains the 59 * head of the linked list and %r8 is the amount that we have to adjust the 60 * thread's cached amount by. 61 * 62 * Each block of assembly has psuedocode that describes its purpose. 63 */ 64 65 /* 66 * umem_base must be first. 67 */ 68 #include "umem_base.h" 69 70 #include <inttypes.h> 71 #include <strings.h> 72 #include <umem_impl.h> 73 #include <atomic.h> 74 #include <sys/mman.h> 75 #include <errno.h> 76 77 78 #include <stdio.h> 79 80 const int umem_genasm_supported = 1; 81 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; 82 static size_t umem_genasm_msize = 576; 83 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; 84 static size_t umem_genasm_fsize = 576; 85 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; 86 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; 87 88 #define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t)) 89 #define PTC_JMPADDR(dest, src) (dest - (src + 4)) 90 #define PTC_ROOT_SIZE sizeof (uintptr_t) 91 #define MULTINOP 0x0000441f0f 92 93 /* 94 * void *ptcmalloc(size_t orig_size); 95 * 96 * size_t size = orig_size + 8; 97 * if (size > UMEM_SECOND_ALIGN) 98 * size += 8; 99 * 100 * if (size < orig_size) 101 * goto tomalloc; ! This is overflow 102 * 103 * if (size > cache_max) 104 * goto tomalloc 105 * 106 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 107 * void **roots = t->tm_roots; 108 */ 109 #define PTC_MALINIT_JOUT 0x13 110 #define PTC_MALINIT_MCS 0x1a 111 #define PTC_MALINIT_JOV 0x20 112 #define PTC_MALINIT_SOFF 0x30 113 static const uint8_t malinit[] = { 114 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */ 115 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */ 116 0x76, 0x04, /* jbe +0x4 */ 117 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */ 118 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */ 119 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */ 120 0x48, 0x81, 0xfe, 121 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 122 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ 123 0x64, 0x48, 0x8b, 0x0c, 0x25, 124 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ 125 0x48, 0x81, 0xc1, 126 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ 127 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ 128 }; 129 130 /* 131 * void ptcfree(void *buf); 132 * 133 * if (buf == NULL) 134 * return; 135 * 136 * malloc_data_t *tag = buf; 137 * tag--; 138 * int size = tag->malloc_size; 139 * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); 140 * if (tagval == MALLOC_SECOND_MAGIC) { 141 * tag--; 142 * } else if (tagval != MALLOC_MAGIC) { 143 * goto tofree; 144 * } 145 * 146 * if (size > cache_max) 147 * goto tofree; 148 * 149 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 150 * void **roots = t->tm_roots; 151 */ 152 #define PTC_FRINI_JDONE 0x05 153 #define PTC_FRINI_JFREE 0x25 154 #define PTC_FRINI_MCS 0x30 155 #define PTC_FRINI_JOV 0x36 156 #define PTC_FRINI_SOFF 0x46 157 static const uint8_t freeinit[] = { 158 0x48, 0x85, 0xff, /* testq %rdi,%rdi */ 159 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */ 160 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */ 161 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */ 162 0x01, 0xf0, /* addl %esi,%eax */ 163 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */ 164 0x75, 0x06, /* jne +0x6 (checkover) */ 165 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */ 166 0xeb, 0x0f, /* jmp +0xf (freebuf) */ 167 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */ 168 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */ 169 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */ 170 0x48, 0x81, 0xfe, 171 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 172 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ 173 0x64, 0x48, 0x8b, 0x0c, 0x25, 174 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ 175 0x48, 0x81, 0xc1, 176 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ 177 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ 178 }; 179 180 /* 181 * if (size <= $CACHE_SIZE) { 182 * csize = $CACHE_SIZE; 183 * } else ... ! goto next cache 184 */ 185 #define PTC_INICACHE_CMP 0x03 186 #define PTC_INICACHE_SIZE 0x0c 187 #define PTC_INICACHE_JMP 0x11 188 static const uint8_t inicache[] = { 189 0x48, 0x81, 0xfe, 190 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 191 0x77, 0x0c, /* ja +0xc (next cache) */ 192 0x49, 0xc7, 0xc0, 193 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 194 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */ 195 }; 196 197 /* 198 * if (size <= $CACHE_SIZE) { 199 * csize = $CACHE_SIZE; 200 * roots += $CACHE_NUM; 201 * } else ... ! goto next cache 202 */ 203 #define PTC_GENCACHE_CMP 0x03 204 #define PTC_GENCACHE_SIZE 0x0c 205 #define PTC_GENCACHE_NUM 0x13 206 #define PTC_GENCACHE_JMP 0x18 207 static const uint8_t gencache[] = { 208 0x48, 0x81, 0xfe, 209 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 210 0x77, 0x14, /* ja +0xc (next cache) */ 211 0x49, 0xc7, 0xc0, 212 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 213 0x48, 0x81, 0xc2, 214 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ 215 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */ 216 }; 217 218 /* 219 * else if (size <= $CACHE_SIZE) { 220 * csize = $CACHE_SIZE; 221 * roots += $CACHE_NUM; 222 * } else { 223 * goto tofunc; ! goto tomalloc if ptcmalloc. 224 * } ! goto tofree if ptcfree. 225 */ 226 #define PTC_FINCACHE_CMP 0x03 227 #define PTC_FINCACHE_JMP 0x08 228 #define PTC_FINCACHE_SIZE 0x0c 229 #define PTC_FINCACHE_NUM 0x13 230 static const uint8_t fincache[] = { 231 0x48, 0x81, 0xfe, 232 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 233 0x77, 0x00, /* ja +JMP (to real malloc) */ 234 0x49, 0xc7, 0xc0, 235 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 236 0x48, 0x81, 0xc2, 237 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ 238 239 }; 240 241 /* 242 * if (*root == NULL) 243 * goto tomalloc; 244 * 245 * malloc_data_t *ret = *root; 246 * *root = *(void **)ret; 247 * t->tm_size += csize; 248 * ret->malloc_size = size; 249 * 250 * if (size > UMEM_SECOND_ALIGN) { 251 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 252 * ret += 2; 253 * } else { 254 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 255 * ret += 1; 256 * } 257 * 258 * return ((void *)ret); 259 * tomalloc: 260 * return (malloc(orig_size)); 261 */ 262 #define PTC_MALFINI_ALLABEL 0x00 263 #define PTC_MALFINI_JMLABEL 0x40 264 #define PTC_MALFINI_JMADDR 0x41 265 static const uint8_t malfini[] = { 266 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */ 267 0x48, 0x85, 0xc0, /* testq %rax,%rax */ 268 0x74, 0x38, /* je +0x38 (errout) */ 269 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */ 270 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */ 271 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */ 272 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */ 273 0x76, 0x15, /* jbe +0x15 */ 274 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */ 275 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */ 276 0x41, 0x29, 0xf1, /* subl %esi, %r9d */ 277 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */ 278 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */ 279 0xc3, /* ret */ 280 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */ 281 0x89, 0x30, /* movl %esi,(%rax) */ 282 0x41, 0x29, 0xf1, /* subl %esi,%r9d */ 283 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */ 284 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */ 285 0xc3, /* ret */ 286 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */ 287 }; 288 289 /* 290 * if (t->tm_size + csize > umem_ptc_size) 291 * goto tofree; 292 * 293 * t->tm_size += csize 294 * *(void **)tag = *root; 295 * *root = tag; 296 * return; 297 * tofree: 298 * free(buf); 299 * return; 300 */ 301 #define PTC_FRFINI_RBUFLABEL 0x00 302 #define PTC_FRFINI_CACHEMAX 0x09 303 #define PTC_FRFINI_DONELABEL 0x1b 304 #define PTC_FRFINI_JFLABEL 0x1c 305 #define PTC_FRFINI_JFADDR 0x1d 306 static const uint8_t freefini[] = { 307 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */ 308 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */ 309 0x49, 0x81, 0xf9, 310 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */ 311 0x77, 0x0d, /* jae +0xd (torfree) */ 312 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */ 313 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */ 314 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */ 315 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */ 316 0xc3, /* ret */ 317 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ 318 }; 319 320 /* 321 * Construct the initial part of malloc. off contains the offset from curthread 322 * to the root of the tmem structure. ep is the address of the label to error 323 * and jump to free. csize is the size of the largest umem_cache in ptcumem. 324 */ 325 static int 326 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) 327 { 328 uint32_t addr; 329 330 bcopy(malinit, bp, sizeof (malinit)); 331 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); 332 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); 333 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); 334 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); 335 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); 336 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); 337 338 return (sizeof (malinit)); 339 } 340 341 static int 342 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs) 343 { 344 uint32_t addr; 345 346 bcopy(freeinit, bp, sizeof (freeinit)); 347 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); 348 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); 349 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); 350 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); 351 bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs)); 352 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); 353 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); 354 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); 355 return (sizeof (freeinit)); 356 } 357 358 359 /* 360 * Create the initial cache entry of the specified size. The value of ap tells 361 * us what the address of the label to try and allocate a buffer. This value is 362 * an offset from the current base to that value. 363 */ 364 static int 365 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) 366 { 367 uint32_t addr; 368 369 bcopy(inicache, bp, sizeof (inicache)); 370 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); 371 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); 372 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); 373 ASSERT(addr != 0); 374 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); 375 376 return (sizeof (inicache)); 377 } 378 379 static int 380 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) 381 { 382 uint32_t addr; 383 uint32_t coff; 384 385 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); 386 ASSERT(num != 0); 387 bcopy(gencache, bp, sizeof (gencache)); 388 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); 389 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); 390 coff = num * PTC_ROOT_SIZE; 391 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); 392 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); 393 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); 394 395 return (sizeof (gencache)); 396 } 397 398 static int 399 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) 400 { 401 uint8_t eap; 402 uint32_t coff; 403 404 ASSERT(ep <= 0xff && ep > 7); 405 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); 406 bcopy(fincache, bp, sizeof (fincache)); 407 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); 408 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); 409 coff = num * PTC_ROOT_SIZE; 410 bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff)); 411 eap = ep - PTC_FINCACHE_JMP - 1; 412 bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap)); 413 414 return (sizeof (fincache)); 415 } 416 417 static int 418 genasm_malfini(uint8_t *bp, uintptr_t mptr) 419 { 420 uint32_t addr; 421 422 bcopy(malfini, bp, sizeof (malfini)); 423 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); 424 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); 425 426 return (sizeof (malfini)); 427 } 428 429 static int 430 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) 431 { 432 uint32_t addr; 433 434 bcopy(freefini, bp, sizeof (freefini)); 435 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); 436 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); 437 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); 438 439 return (sizeof (freefini)); 440 } 441 442 /* 443 * The malloc inline assembly is constructed as follows: 444 * 445 * o Malloc prologue assembly 446 * o Generic first-cache check 447 * o n Generic cache checks (where n = _tmem_get_entries() - 2) 448 * o Generic last-cache check 449 * o Malloc epilogue assembly 450 * 451 * Generally there are at least three caches. When there is only one cache we 452 * only use the generic last-cache. In the case where there are two caches, we 453 * just leave out the middle ones. 454 */ 455 static int 456 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) 457 { 458 int ii, off; 459 uint8_t *bp; 460 size_t total; 461 uint32_t allocoff, erroff; 462 463 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); 464 465 if (nents >= 2) 466 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 467 468 if (total > len) 469 return (1); 470 471 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; 472 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; 473 474 bp = base; 475 476 off = genasm_malinit(bp, umem_tmem_off, erroff, 477 umem_alloc_sizes[nents-1]); 478 bp += off; 479 allocoff -= off; 480 erroff -= off; 481 482 if (nents > 1) { 483 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); 484 bp += off; 485 allocoff -= off; 486 erroff -= off; 487 } 488 489 for (ii = 1; ii < nents - 1; ii++) { 490 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); 491 bp += off; 492 allocoff -= off; 493 erroff -= off; 494 } 495 496 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 497 erroff); 498 bp += genasm_malfini(bp, umem_genasm_omptr); 499 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 500 501 return (0); 502 } 503 504 static int 505 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) 506 { 507 uint8_t *bp; 508 int ii, off; 509 size_t total; 510 uint32_t rbufoff, retoff, erroff; 511 512 /* Assume that nents has already been audited for us */ 513 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); 514 if (nents >= 2) 515 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 516 517 if (total > len) 518 return (1); 519 520 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); 521 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); 522 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); 523 524 bp = base; 525 526 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, 527 umem_alloc_sizes[nents - 1]); 528 bp += off; 529 erroff -= off; 530 rbufoff -= off; 531 532 if (nents > 1) { 533 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); 534 bp += off; 535 erroff -= off; 536 rbufoff -= off; 537 } 538 539 for (ii = 1; ii < nents - 1; ii++) { 540 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); 541 bp += off; 542 rbufoff -= off; 543 erroff -= off; 544 } 545 546 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 547 erroff); 548 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); 549 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 550 551 return (0); 552 } 553 554 boolean_t 555 umem_genasm(int *cp, umem_cache_t **caches, int nc) 556 { 557 int nents, i; 558 uint8_t *mptr; 559 uint8_t *fptr; 560 uint64_t v, *vptr; 561 size_t mplen, fplen; 562 uintptr_t mpbase, fpbase; 563 boolean_t ret = B_FALSE; 564 565 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); 566 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); 567 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || 568 umem_genasm_fptr == 0 || umem_genasm_fsize == 0) { 569 return (B_FALSE); 570 } 571 572 mplen = P2ROUNDUP(umem_genasm_msize, pagesize); 573 mpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize); 574 fplen = P2ROUNDUP(umem_genasm_fsize, pagesize); 575 fpbase = P2ALIGN((uintptr_t)umem_genasm_mptr, pagesize); 576 577 /* 578 * If the values straddle a page boundary, then we might need to 579 * actually remap two pages. 580 */ 581 if (P2ALIGN(umem_genasm_msize + (uintptr_t)umem_genasm_mptr, 582 pagesize) != mpbase) { 583 mplen += pagesize; 584 } 585 586 if (P2ALIGN(umem_genasm_fsize + (uintptr_t)umem_genasm_fptr, 587 pagesize) != fpbase) { 588 fplen += pagesize; 589 } 590 591 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_WRITE | 592 PROT_EXEC) != 0) { 593 return (B_FALSE); 594 } 595 596 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_WRITE | 597 PROT_EXEC) != 0) { 598 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 599 0) { 600 umem_panic("genasm failed to restore memory " 601 "protection: %d", errno); 602 } 603 return (B_FALSE); 604 } 605 606 /* 607 * The total number of caches that we can service is the minimum of: 608 * o the amount supported by libc 609 * o the total number of umem caches 610 * o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t) 611 * For 64-bit, this is MAX_UINT32 >> 3, a lot. 612 */ 613 nents = _tmem_get_nentries(); 614 615 if (UMEM_GENASM_MAX64 < nents) 616 nents = UMEM_GENASM_MAX64; 617 618 if (nc < nents) 619 nents = nc; 620 621 /* 622 * If the number of per-thread caches has been set to zero or the 623 * per-thread cache size has been set to zero, don't bother trying to 624 * write any assembly and just use the default malloc and free. When we 625 * return, indicate that there is no PTC support. 626 */ 627 if (nents == 0 || umem_ptc_size == 0) { 628 goto out; 629 } 630 631 /* Take into account the jump */ 632 if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0) { 633 goto out; 634 } 635 636 if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0) { 637 goto out; 638 } 639 640 /* nop out the jump with a multibyte jump */ 641 vptr = (void *)umem_genasm_mptr; 642 v = MULTINOP; 643 v |= *vptr & (0xffffffULL << 40); 644 (void) atomic_swap_64(vptr, v); 645 vptr = (void *)umem_genasm_fptr; 646 v = MULTINOP; 647 v |= *vptr & (0xffffffULL << 40); 648 (void) atomic_swap_64(vptr, v); 649 650 for (i = 0; i < nents; i++) 651 caches[i]->cache_flags |= UMF_PTC; 652 653 ret = B_TRUE; 654 out: 655 if (mprotect((void *)mpbase, mplen, PROT_READ | PROT_EXEC) != 0) { 656 umem_panic("genasm failed to restore memory protection: %d", 657 errno); 658 } 659 660 if (mprotect((void *)fpbase, fplen, PROT_READ | PROT_EXEC) != 0) { 661 umem_panic("genasm failed to restore memory protection: %d", 662 errno); 663 } 664 665 return (ret); 666 } 667