1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2013 Joyent, Inc. All rights reserved. 23 */ 24 25 /* 26 * Don't Panic! If you find the blocks of assembly that follow confusing and 27 * you're questioning why they exist, please go read section 8 of the umem.c big 28 * theory statement. Next familiarize yourself with the malloc and free 29 * implementations in libumem's malloc.c. 30 * 31 * What follows is the amd64 implementation of the thread caching automatic 32 * assembly generation. The amd64 calling conventions are documented in the 33 * 64-bit System V ABI. For our purposes what matters is that our first argument 34 * will come in rdi. Our functions have to preserve rbp, rbx, and r12->r15. We 35 * are free to do whatever we want with rax, rcx, rdx, rsi, rdi, and r8->r11. 36 * 37 * For both our implementation of malloc and free we only use the registers we 38 * don't have to preserve. 39 * 40 * Malloc register usage: 41 * o. rdi: Original size to malloc. This never changes and is preserved. 42 * o. rsi: Adjusted malloc size for malloc_data_tag(s). 43 * o. rcx: Pointer to the tmem_t in the ulwp_t. 44 * o. rdx: Pointer to the tmem_t array of roots 45 * o. r8: Size of the cache 46 * o. r9: Scratch register 47 * 48 * Free register usage: 49 * o. rdi: Original buffer to free. This never changes and is preserved. 50 * o. rax: The actual buffer, adjusted for the hidden malloc_data_t(s). 51 * o. rcx: Pointer to the tmem_t in the ulwp_t. 52 * o. rdx: Pointer to the tmem_t array of roots 53 * o. r8: Size of the cache 54 * o. r9: Scratch register 55 * 56 * Once we determine what cache we are using, we increment %rdx to the 57 * appropriate offset and set %r8 with the size of the cache. This means that 58 * when we break out to the normal buffer allocation point %rdx contains the 59 * head of the linked list and %r8 is the amount that we have to adjust the 60 * thread's cached amount by. 61 * 62 * Each block of assembly has psuedocode that describes its purpose. 63 */ 64 65 #include <atomic.h> 66 #include <inttypes.h> 67 #include <sys/types.h> 68 #include <strings.h> 69 #include <umem_impl.h> 70 #include "umem_base.h" 71 72 #include <stdio.h> 73 74 const int umem_genasm_supported = 1; 75 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; 76 static size_t umem_genasm_msize = 576; 77 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; 78 static size_t umem_genasm_fsize = 576; 79 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; 80 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; 81 82 #define UMEM_GENASM_MAX64 (UINT32_MAX / sizeof (uintptr_t)) 83 #define PTC_JMPADDR(dest, src) (dest - (src + 4)) 84 #define PTC_ROOT_SIZE sizeof (uintptr_t) 85 #define MULTINOP 0x0000441f0f 86 87 /* 88 * void *ptcmalloc(size_t orig_size); 89 * 90 * size_t size = orig_size + 8; 91 * if (size > UMEM_SECOND_ALIGN) 92 * size += 8; 93 * 94 * if (size < orig_size) 95 * goto tomalloc; ! This is overflow 96 * 97 * if (size > cache_max) 98 * goto tomalloc 99 * 100 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 101 * void **roots = t->tm_roots; 102 */ 103 #define PTC_MALINIT_JOUT 0x13 104 #define PTC_MALINIT_MCS 0x1a 105 #define PTC_MALINIT_JOV 0x20 106 #define PTC_MALINIT_SOFF 0x30 107 static const uint8_t malinit[] = { 108 0x48, 0x8d, 0x77, 0x08, /* leaq 0x8(%rdi),%rsi */ 109 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10, %rsi */ 110 0x76, 0x04, /* jbe +0x4 */ 111 0x48, 0x8d, 0x77, 0x10, /* leaq 0x10(%rdi),%rsi */ 112 0x48, 0x39, 0xfe, /* cmpq %rdi,%rsi */ 113 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jb +errout */ 114 0x48, 0x81, 0xfe, 115 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 116 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ 117 0x64, 0x48, 0x8b, 0x0c, 0x25, 118 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ 119 0x48, 0x81, 0xc1, 120 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ 121 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ 122 }; 123 124 /* 125 * void ptcfree(void *buf); 126 * 127 * if (buf == NULL) 128 * return; 129 * 130 * malloc_data_t *tag = buf; 131 * tag--; 132 * int size = tag->malloc_size; 133 * int tagval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); 134 * if (tagval == MALLOC_SECOND_MAGIC) { 135 * tag--; 136 * } else if (tagval != MALLOC_MAGIC) { 137 * goto tofree; 138 * } 139 * 140 * if (size > cache_max) 141 * goto tofree; 142 * 143 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 144 * void **roots = t->tm_roots; 145 */ 146 #define PTC_FRINI_JDONE 0x05 147 #define PTC_FRINI_JFREE 0x25 148 #define PTC_FRINI_MCS 0x30 149 #define PTC_FRINI_JOV 0x36 150 #define PTC_FRINI_SOFF 0x46 151 static const uint8_t freeinit[] = { 152 0x48, 0x85, 0xff, /* testq %rdi,%rdi */ 153 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* jmp $JDONE (done) */ 154 0x8b, 0x77, 0xf8, /* movl -0x8(%rdi),%esi */ 155 0x8b, 0x47, 0xfc, /* movl -0x4(%rdi),%eax */ 156 0x01, 0xf0, /* addl %esi,%eax */ 157 0x3d, 0x00, 0x70, 0xba, 0x16, /* cmpl $MALLOC_2_MAGIC, %eax */ 158 0x75, 0x06, /* jne +0x6 (checkover) */ 159 0x48, 0x8d, 0x47, 0xf0, /* leaq -0x10(%rdi),%eax */ 160 0xeb, 0x0f, /* jmp +0xf (freebuf) */ 161 0x3d, 0x00, 0xc0, 0x10, 0x3a, /* cmpl $MALLOC_MAGIC, %eax */ 162 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jmp +JFREE (goto torfree) */ 163 0x48, 0x8d, 0x47, 0xf8, /* leaq -0x8(%rdi),%rax */ 164 0x48, 0x81, 0xfe, 165 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 166 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +errout */ 167 0x64, 0x48, 0x8b, 0x0c, 0x25, 168 0x00, 0x00, 0x00, 0x00, /* movq %fs:0x0,%rcx */ 169 0x48, 0x81, 0xc1, 170 0x00, 0x00, 0x00, 0x00, /* addq $SOFF, %rcx */ 171 0x48, 0x8d, 0x51, 0x08, /* leaq 0x8(%rcx),%rdx */ 172 }; 173 174 /* 175 * if (size <= $CACHE_SIZE) { 176 * csize = $CACHE_SIZE; 177 * } else ... ! goto next cache 178 */ 179 #define PTC_INICACHE_CMP 0x03 180 #define PTC_INICACHE_SIZE 0x0c 181 #define PTC_INICACHE_JMP 0x11 182 static const uint8_t inicache[] = { 183 0x48, 0x81, 0xfe, 184 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 185 0x77, 0x0c, /* ja +0xc (next cache) */ 186 0x49, 0xc7, 0xc0, 187 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 188 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp $JMP (allocbuf) */ 189 }; 190 191 /* 192 * if (size <= $CACHE_SIZE) { 193 * csize = $CACHE_SIZE; 194 * roots += $CACHE_NUM; 195 * } else ... ! goto next cache 196 */ 197 #define PTC_GENCACHE_CMP 0x03 198 #define PTC_GENCACHE_SIZE 0x0c 199 #define PTC_GENCACHE_NUM 0x13 200 #define PTC_GENCACHE_JMP 0x18 201 static const uint8_t gencache[] = { 202 0x48, 0x81, 0xfe, 203 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 204 0x77, 0x14, /* ja +0xc (next cache) */ 205 0x49, 0xc7, 0xc0, 206 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 207 0x48, 0x81, 0xc2, 208 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ 209 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf ) */ 210 }; 211 212 /* 213 * else if (size <= $CACHE_SIZE) { 214 * csize = $CACHE_SIZE; 215 * roots += $CACHE_NUM; 216 * } else { 217 * goto tofunc; ! goto tomalloc if ptcmalloc. 218 * } ! goto tofree if ptcfree. 219 */ 220 #define PTC_FINCACHE_CMP 0x03 221 #define PTC_FINCACHE_JMP 0x08 222 #define PTC_FINCACHE_SIZE 0x0c 223 #define PTC_FINCACHE_NUM 0x13 224 static const uint8_t fincache[] = { 225 0x48, 0x81, 0xfe, 226 0x00, 0x00, 0x00, 0x00, /* cmpq sizeof ($CACHE), %rsi */ 227 0x77, 0x00, /* ja +JMP (to real malloc) */ 228 0x49, 0xc7, 0xc0, 229 0x00, 0x00, 0x00, 0x00, /* movq sizeof ($CACHE), %r8 */ 230 0x48, 0x81, 0xc2, 231 0x00, 0x00, 0x00, 0x00, /* addq $8*ii, %rdx */ 232 233 }; 234 235 /* 236 * if (*root == NULL) 237 * goto tomalloc; 238 * 239 * malloc_data_t *ret = *root; 240 * *root = *(void **)ret; 241 * t->tm_size += csize; 242 * ret->malloc_size = size; 243 * 244 * if (size > UMEM_SECOND_ALIGN) { 245 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 246 * ret += 2; 247 * } else { 248 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 249 * ret += 1; 250 * } 251 * 252 * return ((void *)ret); 253 * tomalloc: 254 * return (malloc(orig_size)); 255 */ 256 #define PTC_MALFINI_ALLABEL 0x00 257 #define PTC_MALFINI_JMLABEL 0x40 258 #define PTC_MALFINI_JMADDR 0x41 259 static const uint8_t malfini[] = { 260 0x48, 0x8b, 0x02, /* movl (%rdx),%rax */ 261 0x48, 0x85, 0xc0, /* testq %rax,%rax */ 262 0x74, 0x38, /* je +0x38 (errout) */ 263 0x4c, 0x8b, 0x08, /* movq (%rax),%r9 */ 264 0x4c, 0x89, 0x0a, /* movq %r9,(%rdx) */ 265 0x4c, 0x29, 0x01, /* subq %rsi,(%rcx) */ 266 0x48, 0x83, 0xfe, 0x10, /* cmpq $0x10,%rsi */ 267 0x76, 0x15, /* jbe +0x15 */ 268 0x41, 0xb9, 0x00, 0x70, 0xba, 0x16, /* movl $MALLOC_MAGIC_2, %r9d */ 269 0x89, 0x70, 0x08, /* movl %r9d,0x8(%rax) */ 270 0x41, 0x29, 0xf1, /* subl %esi, %r9d */ 271 0x44, 0x89, 0x48, 0x0c, /* movl %r9d, 0xc(%rax) */ 272 0x48, 0x83, 0xc0, 0x10, /* addq $0x10, %rax */ 273 0xc3, /* ret */ 274 0x41, 0xb9, 0x00, 0xc0, 0x10, 0x3a, /* movl %MALLOC_MAGIC, %r9d */ 275 0x89, 0x30, /* movl %esi,(%rax) */ 276 0x41, 0x29, 0xf1, /* subl %esi,%r9d */ 277 0x44, 0x89, 0x48, 0x04, /* movl %r9d,0x4(%rax) */ 278 0x48, 0x83, 0xc0, 0x08, /* addq $0x8,%rax */ 279 0xc3, /* ret */ 280 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $MALLOC */ 281 }; 282 283 /* 284 * if (t->tm_size + csize > umem_ptc_size) 285 * goto tofree; 286 * 287 * t->tm_size += csize 288 * *(void **)tag = *root; 289 * *root = tag; 290 * return; 291 * tofree: 292 * free(buf); 293 * return; 294 */ 295 #define PTC_FRFINI_RBUFLABEL 0x00 296 #define PTC_FRFINI_CACHEMAX 0x09 297 #define PTC_FRFINI_DONELABEL 0x1b 298 #define PTC_FRFINI_JFLABEL 0x1c 299 #define PTC_FRFINI_JFADDR 0x1d 300 static const uint8_t freefini[] = { 301 0x4c, 0x8b, 0x09, /* movq (%rcx),%r9 */ 302 0x4d, 0x01, 0xc1, /* addq %r8, %r9 */ 303 0x49, 0x81, 0xf9, 304 0x00, 0x00, 0x00, 0x00, /* cmpl $THR_CACHE_MAX, %r9 */ 305 0x77, 0x0d, /* jae +0xd (torfree) */ 306 0x4c, 0x01, 0x01, /* addq %r8,(%rcx) */ 307 0x4c, 0x8b, 0x0a, /* movq (%rdx),%r9 */ 308 0x4c, 0x89, 0x08, /* movq %r9,(%rax) */ 309 0x48, 0x89, 0x02, /* movq %rax,(%rdx) */ 310 0xc3, /* ret */ 311 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ 312 }; 313 314 /* 315 * Construct the initial part of malloc. off contains the offset from curthread 316 * to the root of the tmem structure. ep is the address of the label to error 317 * and jump to free. csize is the size of the largest umem_cache in ptcumem. 318 */ 319 static int 320 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) 321 { 322 uint32_t addr; 323 324 bcopy(malinit, bp, sizeof (malinit)); 325 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); 326 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); 327 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); 328 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); 329 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); 330 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); 331 332 return (sizeof (malinit)); 333 } 334 335 static int 336 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mcs) 337 { 338 uint32_t addr; 339 340 bcopy(freeinit, bp, sizeof (freeinit)); 341 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); 342 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); 343 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); 344 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); 345 bcopy(&mcs, bp + PTC_FRINI_MCS, sizeof (mcs)); 346 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); 347 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); 348 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); 349 return (sizeof (freeinit)); 350 } 351 352 353 /* 354 * Create the initial cache entry of the specified size. The value of ap tells 355 * us what the address of the label to try and allocate a buffer. This value is 356 * an offset from the current base to that value. 357 */ 358 static int 359 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) 360 { 361 uint32_t addr; 362 363 bcopy(inicache, bp, sizeof (inicache)); 364 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); 365 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); 366 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); 367 ASSERT(addr != 0); 368 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); 369 370 return (sizeof (inicache)); 371 } 372 373 static int 374 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) 375 { 376 uint32_t addr; 377 uint32_t coff; 378 379 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); 380 ASSERT(num != 0); 381 bcopy(gencache, bp, sizeof (gencache)); 382 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); 383 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); 384 coff = num * PTC_ROOT_SIZE; 385 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); 386 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); 387 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); 388 389 return (sizeof (gencache)); 390 } 391 392 static int 393 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) 394 { 395 uint8_t eap; 396 uint32_t coff; 397 398 ASSERT(ep <= 0xff && ep > 7); 399 ASSERT(UINT32_MAX / PTC_ROOT_SIZE > num); 400 bcopy(fincache, bp, sizeof (fincache)); 401 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); 402 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); 403 coff = num * PTC_ROOT_SIZE; 404 bcopy(&coff, bp + PTC_FINCACHE_NUM, sizeof (coff)); 405 eap = ep - PTC_FINCACHE_JMP - 1; 406 bcopy(&eap, bp + PTC_FINCACHE_JMP, sizeof (eap)); 407 408 return (sizeof (fincache)); 409 } 410 411 static int 412 genasm_malfini(uint8_t *bp, uintptr_t mptr) 413 { 414 uint32_t addr; 415 416 bcopy(malfini, bp, sizeof (malfini)); 417 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); 418 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); 419 420 return (sizeof (malfini)); 421 } 422 423 static int 424 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) 425 { 426 uint32_t addr; 427 428 bcopy(freefini, bp, sizeof (freefini)); 429 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); 430 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); 431 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); 432 433 return (sizeof (freefini)); 434 } 435 436 /* 437 * The malloc inline assembly is constructed as follows: 438 * 439 * o Malloc prologue assembly 440 * o Generic first-cache check 441 * o n Generic cache checks (where n = _tmem_get_entries() - 2) 442 * o Generic last-cache check 443 * o Malloc epilogue assembly 444 * 445 * Generally there are at least three caches. When there is only one cache we 446 * only use the generic last-cache. In the case where there are two caches, we 447 * just leave out the middle ones. 448 */ 449 static int 450 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) 451 { 452 int ii, off; 453 uint8_t *bp; 454 size_t total; 455 uint32_t allocoff, erroff; 456 457 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); 458 459 if (nents >= 2) 460 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 461 462 if (total > len) 463 return (1); 464 465 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; 466 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; 467 468 bp = base; 469 470 off = genasm_malinit(bp, umem_tmem_off, erroff, 471 umem_alloc_sizes[nents-1]); 472 bp += off; 473 allocoff -= off; 474 erroff -= off; 475 476 if (nents > 1) { 477 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); 478 bp += off; 479 allocoff -= off; 480 erroff -= off; 481 } 482 483 for (ii = 1; ii < nents - 1; ii++) { 484 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); 485 bp += off; 486 allocoff -= off; 487 erroff -= off; 488 } 489 490 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 491 erroff); 492 bp += genasm_malfini(bp, umem_genasm_omptr); 493 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 494 495 return (0); 496 } 497 498 static int 499 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) 500 { 501 uint8_t *bp; 502 int ii, off; 503 size_t total; 504 uint32_t rbufoff, retoff, erroff; 505 506 /* Assume that nents has already been audited for us */ 507 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); 508 if (nents >= 2) 509 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 510 511 if (total > len) 512 return (1); 513 514 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); 515 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); 516 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); 517 518 bp = base; 519 520 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, 521 umem_alloc_sizes[nents - 1]); 522 bp += off; 523 erroff -= off; 524 rbufoff -= off; 525 526 if (nents > 1) { 527 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); 528 bp += off; 529 erroff -= off; 530 rbufoff -= off; 531 } 532 533 for (ii = 1; ii < nents - 1; ii++) { 534 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); 535 bp += off; 536 rbufoff -= off; 537 erroff -= off; 538 } 539 540 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 541 erroff); 542 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); 543 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 544 545 return (0); 546 } 547 548 /*ARGSUSED*/ 549 int 550 umem_genasm(int *cp, umem_cache_t **caches, int nc) 551 { 552 int nents, i; 553 uint8_t *mptr; 554 uint8_t *fptr; 555 uint64_t v, *vptr; 556 557 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); 558 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); 559 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || 560 umem_genasm_fptr == 0 || umem_genasm_fsize == 0) 561 return (1); 562 563 /* 564 * The total number of caches that we can service is the minimum of: 565 * o the amount supported by libc 566 * o the total number of umem caches 567 * o we use a single byte addl, so it's MAX_UINT32 / sizeof (uintptr_t) 568 * For 64-bit, this is MAX_UINT32 >> 3, a lot. 569 */ 570 nents = _tmem_get_nentries(); 571 572 if (UMEM_GENASM_MAX64 < nents) 573 nents = UMEM_GENASM_MAX64; 574 575 if (nc < nents) 576 nents = nc; 577 578 /* Based on our constraints, this is not an error */ 579 if (nents == 0 || umem_ptc_size == 0) 580 return (0); 581 582 /* Take into account the jump */ 583 if (genasm_malloc(mptr, umem_genasm_msize, nents, cp) != 0) 584 return (1); 585 586 if (genasm_free(fptr, umem_genasm_fsize, nents, cp) != 0) 587 return (1); 588 589 590 /* nop out the jump with a multibyte jump */ 591 vptr = (void *)umem_genasm_mptr; 592 v = MULTINOP; 593 v |= *vptr & (0xffffffULL << 40); 594 (void) atomic_swap_64(vptr, v); 595 vptr = (void *)umem_genasm_fptr; 596 v = MULTINOP; 597 v |= *vptr & (0xffffffULL << 40); 598 (void) atomic_swap_64(vptr, v); 599 600 for (i = 0; i < nents; i++) 601 caches[i]->cache_flags |= UMF_PTC; 602 603 return (0); 604 } 605