1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2014 Joyent, Inc. All rights reserved. 23 */ 24 25 /* 26 * Don't Panic! If you find the blocks of assembly that follow confusing and 27 * you're questioning why they exist, please go read section 8 of the umem.c big 28 * theory statement. Next familiarize yourself with the malloc and free 29 * implementations in libumem's malloc.c. 30 * 31 * What follows is the i386 implementation of the thread caching automatic 32 * assembly generation. With i386 a function only has three registers it's 33 * allowed to change without restoring them: eax, ecx, and edx. All others have 34 * to be preserved. Since the set of registers we have available is so small, we 35 * have to make use of esi, ebx, and edi and save their original values to the 36 * stack. 37 * 38 * Malloc register usage: 39 * o. esi: Size of the malloc (passed into us and modified) 40 * o. edi: Size of the cache 41 * o. eax: Buffer to return 42 * o. ebx: Scratch space and temporary values 43 * o. ecx: Pointer to the tmem_t in the ulwp_t. 44 * o. edx: Pointer to the tmem_t array of roots 45 * 46 * Free register usage: 47 * o. esi: Size of the malloc (passed into us and modified) 48 * o. edi: Size of the cache 49 * o. eax: Buffer to free 50 * o. ebx: Scratch space and temporary values 51 * o. ecx: Pointer to the tmem_t in the ulwp_t. 52 * o. edx: Pointer to the tmem_t array of roots 53 * 54 * Once we determine what cache we are using, we increment %edx to the 55 * appropriate offset and set %edi with the size of the cache. This means that 56 * when we break out to the normal buffer allocation point %edx contains the 57 * head of the linked list and %edi is the amount that we have to adjust the 58 * total amount cached by the thread. 59 * 60 * Each block of assembly has psuedocode that describes its purpose. 61 */ 62 63 #include <inttypes.h> 64 #include <strings.h> 65 #include <umem_impl.h> 66 #include "umem_base.h" 67 68 #include <atomic.h> 69 70 const int umem_genasm_supported = 1; 71 static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc; 72 static size_t umem_genasm_msize = 512; 73 static uintptr_t umem_genasm_fptr = (uintptr_t)&_free; 74 static size_t umem_genasm_fsize = 512; 75 static uintptr_t umem_genasm_omptr = (uintptr_t)umem_malloc; 76 static uintptr_t umem_genasm_ofptr = (uintptr_t)umem_malloc_free; 77 /* 78 * The maximum number of caches we can support. We use a single byte addl so 79 * this is 255 (UINT8_MAX) / sizeof (uintptr_t). In this case 63 80 */ 81 #define UMEM_GENASM_MAX32 63 82 83 #define PTC_JMPADDR(dest, src) (dest - (src + 4)) 84 #define PTC_ROOT_SIZE sizeof (uintptr_t) 85 #define MULTINOP 0x0000441f0f 86 87 /* 88 * void *ptcmalloc(size_t orig_size); 89 * 90 * size_t size = orig_size + 8; 91 * 92 * if (size < orig_size) 93 * goto tomalloc; ! This is overflow 94 * 95 * if (size > cache_size) 96 * goto tomalloc; 97 * 98 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 99 * void **roots = t->tm_roots; 100 */ 101 #define PTC_MALINIT_JOUT 0x0e 102 #define PTC_MALINIT_MCS 0x14 103 #define PTC_MALINIT_JOV 0x1a 104 #define PTC_MALINIT_SOFF 0x27 105 static const uint8_t malinit[] = { 106 0x55, /* pushl %ebp */ 107 0x89, 0xe5, /* movl %esp, %ebp */ 108 0x57, /* pushl %edi */ 109 0x56, /* pushl %esi */ 110 0x53, /* pushl %ebx */ 111 0x8b, 0x75, 0x08, /* movl 0x8(%ebp), %esi */ 112 0x83, 0xc6, 0x08, /* addl $0x8,%esi */ 113 0x0f, 0x82, 0x00, 0x00, 0x00, 0x00, /* jc +$JMP (errout) */ 114 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 115 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ 116 0x65, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, /* movl %gs:0x0,%ecx */ 117 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $OFF, %ecx */ 118 0x8d, 0x51, 0x04 /* leal 0x4(%ecx), %edx */ 119 }; 120 121 /* 122 * void ptcfree(void *buf); 123 * 124 * if (buf == NULL) 125 * return; 126 * 127 * malloc_data_t *tag = buf; 128 * tag--; 129 * int size = tag->malloc_size; 130 * int tagtval = UMEM_MALLOC_DECODE(tag->malloc_tag, size); 131 * 132 * if (tagval != MALLOC_MAGIC) 133 * goto tofree; 134 * 135 * if (size > cache_max) 136 * goto tofree; 137 * 138 * tmem_t *t = (uintptr_t)curthread() + umem_thr_offset; 139 * void **roots = t->tm_roots; 140 */ 141 #define PTC_FRINI_JDONE 0x0d 142 #define PTC_FRINI_JFREE 0x23 143 #define PTC_FRINI_MCS 0x29 144 #define PTC_FRINI_JOV 0x2f 145 #define PTC_FRINI_SOFF 0x3c 146 static const uint8_t freeinit[] = { 147 0x55, /* pushl %ebp */ 148 0x89, 0xe5, /* movl %esp, %ebp */ 149 0x57, /* pushl %edi */ 150 0x56, /* pushl %esi */ 151 0x53, /* pushl %ebx */ 152 0x8b, 0x45, 0x08, /* movl 0x8(%ebp), %eax */ 153 0x85, 0xc0, /* testl %eax, %eax */ 154 0x0f, 0x84, 0x00, 0x00, 0x00, 0x00, /* je $JDONE (done) */ 155 0x83, 0xe8, 0x08, /* subl $0x8,%eax */ 156 0x8b, 0x30, /* movl (%eax),%esi */ 157 0x8b, 0x50, 0x04, /* movl 0x4(%eax),%edx */ 158 0x01, 0xf2, /* addl %esi,%edx */ 159 0x81, 0xfa, 0x00, 0xc0, 0x10, 0x3a, /* cmpl MAGIC32, %edx */ 160 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, /* jne +JFREE (goto freebuf) */ 161 162 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 163 0x0f, 0x87, 0x00, 0x00, 0x00, 0x00, /* ja +$JMP (errout) */ 164 0x65, 0x8b, 0x0d, 0x00, 0x0, 0x00, 0x00, /* movl %gs:0x0,%ecx */ 165 0x81, 0xc1, 0x00, 0x00, 0x00, 0x00, /* addl $0xOFF, %ecx */ 166 0x8d, 0x51, 0x04 /* leal 0x4(%ecx),%edx */ 167 }; 168 169 /* 170 * if (size <= $CACHE_SIZE) { 171 * csize = $CACHE_SIZE; 172 * } else ... ! goto next cache 173 */ 174 #define PTC_INICACHE_CMP 0x02 175 #define PTC_INICACHE_SIZE 0x09 176 #define PTC_INICACHE_JMP 0x0e 177 static const uint8_t inicache[] = { 178 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($C0), %esi */ 179 0x77, 0x0a, /* ja +0xa */ 180 0xbf, 0xff, 0x00, 0x00, 0x00, /* movl sizeof ($C0), %edi */ 181 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ 182 }; 183 184 /* 185 * if (size <= $CACHE_SIZE) { 186 * csize = $CACHE_SIZE; 187 * roots += $CACHE_NUM; 188 * } else ... ! goto next cache 189 */ 190 #define PTC_GENCACHE_CMP 0x02 191 #define PTC_GENCACHE_NUM 0x0a 192 #define PTC_GENCACHE_SIZE 0x0c 193 #define PTC_GENCACHE_JMP 0x11 194 static const uint8_t gencache[] = { 195 0x81, 0xfe, 0x00, 0x00, 0x00, 0x00, /* cmpl sizeof ($CACHE), %esi */ 196 0x77, 0x0d, /* ja +0xd (next cache) */ 197 0x83, 0xc2, 0x00, /* addl $4*$ii, %edx */ 198 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CACHE), %edi */ 199 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp +$JMP (allocbuf) */ 200 }; 201 202 /* 203 * else if (size <= $CACHE_SIZE) { 204 * csize = $CACHE_SIZE; 205 * roots += $CACHE_NUM; 206 * } else { 207 * goto tofunc; ! goto tomalloc if ptcmalloc. 208 * } ! goto tofree if ptcfree. 209 */ 210 #define PTC_FINCACHE_CMP 0x02 211 #define PTC_FINCACHE_JMP 0x07 212 #define PTC_FINCACHE_NUM 0x0a 213 #define PTC_FINCACHE_SIZE 0x0c 214 static const uint8_t fincache[] = { 215 0x81, 0xfe, 0xff, 0x00, 0x00, 0x00, /* cmpl sizeof ($CLAST), %esi */ 216 0x77, 0x00, /* ja +$JMP (to errout) */ 217 0x83, 0xc2, 0x00, /* addl $4*($NCACHES-1), %edx */ 218 0xbf, 0x00, 0x00, 0x00, 0x00, /* movl sizeof ($CLAST), %edi */ 219 }; 220 221 /* 222 * if (*root == NULL) 223 * goto tomalloc; 224 * 225 * malloc_data_t *ret = *root; 226 * *root = *(void **)ret; 227 * t->tm_size += csize; 228 * ret->malloc_size = size; 229 * 230 * ret->malloc_data = UMEM_MALLOC_ENCODE(MALLOC_SECOND_MAGIC, size); 231 * ret++; 232 * 233 * return ((void *)ret); 234 * tomalloc: 235 * return (malloc(orig_size)); 236 */ 237 #define PTC_MALFINI_ALLABEL 0x00 238 #define PTC_MALFINI_JMLABEL 0x20 239 #define PTC_MALFINI_JMADDR 0x25 240 static const uint8_t malfini[] = { 241 /* allocbuf: */ 242 0x8b, 0x02, /* movl (%edx), %eax */ 243 0x85, 0xc0, /* testl %eax, %eax */ 244 0x74, 0x1a, /* je +0x1a (errout) */ 245 0x8b, 0x18, /* movl (%eax), %esi */ 246 0x89, 0x1a, /* movl %esi, (%edx) */ 247 0x29, 0x39, /* subl %edi, (%ecx) */ 248 0x89, 0x30, /* movl %esi, ($eax) */ 249 0xba, 0x00, 0xc0, 0x10, 0x3a, /* movl $0x3a10c000,%edx */ 250 0x29, 0xf2, /* subl %esi, %edx */ 251 0x89, 0x50, 0x04, /* movl %edx, 0x4(%eax) */ 252 0x83, 0xc0, 0x08, /* addl %0x8, %eax */ 253 0x5b, /* popl %ebx */ 254 0x5e, /* popl %esi */ 255 0x5f, /* popl %edi */ 256 0xc9, /* leave */ 257 0xc3, /* ret */ 258 /* errout: */ 259 0x5b, /* popl %ebx */ 260 0x5e, /* popl %esi */ 261 0x5f, /* popl %edi */ 262 0xc9, /* leave */ 263 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp $malloc */ 264 }; 265 266 /* 267 * if (t->tm_size + csize > umem_ptc_size) 268 * goto tofree; 269 * 270 * t->tm_size += csize 271 * *(void **)tag = *root; 272 * *root = tag; 273 * return; 274 * tofree: 275 * free(buf); 276 * return; 277 */ 278 #define PTC_FRFINI_RBUFLABEL 0x00 279 #define PTC_FRFINI_CACHEMAX 0x06 280 #define PTC_FRFINI_DONELABEL 0x14 281 #define PTC_FRFINI_JFLABEL 0x19 282 #define PTC_FRFINI_JFADDR 0x1e 283 static const uint8_t freefini[] = { 284 /* freebuf: */ 285 0x8b, 0x19, /* movl (%ecx),%ebx */ 286 0x01, 0xfb, /* addl %edi,%ebx */ 287 0x81, 0xfb, 0x00, 0x00, 0x00, 0x00, /* cmpl maxsize, %ebx */ 288 0x73, 0x0d, /* jae +0xd <tofree> */ 289 0x01, 0x39, /* addl %edi,(%ecx) */ 290 0x8b, 0x3a, /* movl (%edx),%edi */ 291 0x89, 0x38, /* movl %edi,(%eax) */ 292 0x89, 0x02, /* movl %eax,(%edx) */ 293 /* done: */ 294 0x5b, /* popl %ebx */ 295 0x5e, /* popl %esi */ 296 0x5f, /* popl %edi */ 297 0xc9, /* leave */ 298 0xc3, /* ret */ 299 /* realfree: */ 300 0x5b, /* popl %ebx */ 301 0x5e, /* popl %esi */ 302 0x5f, /* popl %edi */ 303 0xc9, /* leave */ 304 0xe9, 0x00, 0x00, 0x00, 0x00 /* jmp free */ 305 }; 306 307 /* 308 * Construct the initial part of malloc. off contains the offset from curthread 309 * to the root of the tmem structure. ep is the address of the label to error 310 * and jump to free. csize is the size of the largest umem_cache in ptcumem. 311 */ 312 static int 313 genasm_malinit(uint8_t *bp, uint32_t off, uint32_t ep, uint32_t csize) 314 { 315 uint32_t addr; 316 317 bcopy(malinit, bp, sizeof (malinit)); 318 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOUT); 319 bcopy(&addr, bp + PTC_MALINIT_JOUT, sizeof (addr)); 320 bcopy(&csize, bp + PTC_MALINIT_MCS, sizeof (csize)); 321 addr = PTC_JMPADDR(ep, PTC_MALINIT_JOV); 322 bcopy(&addr, bp + PTC_MALINIT_JOV, sizeof (addr)); 323 bcopy(&off, bp + PTC_MALINIT_SOFF, sizeof (off)); 324 325 return (sizeof (malinit)); 326 } 327 328 static int 329 genasm_frinit(uint8_t *bp, uint32_t off, uint32_t dp, uint32_t ep, uint32_t mc) 330 { 331 uint32_t addr; 332 333 bcopy(freeinit, bp, sizeof (freeinit)); 334 addr = PTC_JMPADDR(dp, PTC_FRINI_JDONE); 335 bcopy(&addr, bp + PTC_FRINI_JDONE, sizeof (addr)); 336 addr = PTC_JMPADDR(ep, PTC_FRINI_JFREE); 337 bcopy(&addr, bp + PTC_FRINI_JFREE, sizeof (addr)); 338 bcopy(&mc, bp + PTC_FRINI_MCS, sizeof (mc)); 339 addr = PTC_JMPADDR(ep, PTC_FRINI_JOV); 340 bcopy(&addr, bp + PTC_FRINI_JOV, sizeof (addr)); 341 bcopy(&off, bp + PTC_FRINI_SOFF, sizeof (off)); 342 return (sizeof (freeinit)); 343 } 344 345 /* 346 * Create the initial cache entry of the specified size. The value of ap tells 347 * us what the address of the label to try and allocate a buffer. This value is 348 * an offset from the current base to that value. 349 */ 350 static int 351 genasm_firstcache(uint8_t *bp, uint32_t csize, uint32_t ap) 352 { 353 uint32_t addr; 354 355 bcopy(inicache, bp, sizeof (inicache)); 356 bcopy(&csize, bp + PTC_INICACHE_CMP, sizeof (csize)); 357 bcopy(&csize, bp + PTC_INICACHE_SIZE, sizeof (csize)); 358 addr = PTC_JMPADDR(ap, PTC_INICACHE_JMP); 359 ASSERT(addr != 0); 360 bcopy(&addr, bp + PTC_INICACHE_JMP, sizeof (addr)); 361 362 return (sizeof (inicache)); 363 } 364 365 static int 366 genasm_gencache(uint8_t *bp, int num, uint32_t csize, uint32_t ap) 367 { 368 uint32_t addr; 369 uint8_t coff; 370 371 ASSERT(256 / PTC_ROOT_SIZE > num); 372 ASSERT(num != 0); 373 bcopy(gencache, bp, sizeof (gencache)); 374 bcopy(&csize, bp + PTC_GENCACHE_CMP, sizeof (csize)); 375 bcopy(&csize, bp + PTC_GENCACHE_SIZE, sizeof (csize)); 376 coff = num * PTC_ROOT_SIZE; 377 bcopy(&coff, bp + PTC_GENCACHE_NUM, sizeof (coff)); 378 addr = PTC_JMPADDR(ap, PTC_GENCACHE_JMP); 379 bcopy(&addr, bp + PTC_GENCACHE_JMP, sizeof (addr)); 380 381 return (sizeof (gencache)); 382 } 383 384 static int 385 genasm_lastcache(uint8_t *bp, int num, uint32_t csize, uint32_t ep) 386 { 387 uint8_t addr; 388 389 ASSERT(ep <= 0xff && ep > 7); 390 ASSERT(256 / PTC_ROOT_SIZE > num); 391 bcopy(fincache, bp, sizeof (fincache)); 392 bcopy(&csize, bp + PTC_FINCACHE_CMP, sizeof (csize)); 393 bcopy(&csize, bp + PTC_FINCACHE_SIZE, sizeof (csize)); 394 addr = num * PTC_ROOT_SIZE; 395 bcopy(&addr, bp + PTC_FINCACHE_NUM, sizeof (addr)); 396 addr = ep - PTC_FINCACHE_JMP - 1; 397 bcopy(&addr, bp + PTC_FINCACHE_JMP, sizeof (addr)); 398 399 return (sizeof (fincache)); 400 } 401 402 static int 403 genasm_malfini(uint8_t *bp, uintptr_t mptr) 404 { 405 uint32_t addr; 406 407 bcopy(malfini, bp, sizeof (malfini)); 408 addr = PTC_JMPADDR(mptr, ((uintptr_t)bp + PTC_MALFINI_JMADDR)); 409 bcopy(&addr, bp + PTC_MALFINI_JMADDR, sizeof (addr)); 410 411 return (sizeof (malfini)); 412 } 413 414 static int 415 genasm_frfini(uint8_t *bp, uint32_t maxthr, uintptr_t fptr) 416 { 417 uint32_t addr; 418 419 bcopy(freefini, bp, sizeof (freefini)); 420 bcopy(&maxthr, bp + PTC_FRFINI_CACHEMAX, sizeof (maxthr)); 421 addr = PTC_JMPADDR(fptr, ((uintptr_t)bp + PTC_FRFINI_JFADDR)); 422 bcopy(&addr, bp + PTC_FRFINI_JFADDR, sizeof (addr)); 423 424 return (sizeof (freefini)); 425 } 426 427 /* 428 * The malloc inline assembly is constructed as follows: 429 * 430 * o Malloc prologue assembly 431 * o Generic first-cache check 432 * o n Generic cache checks (where n = _tmem_get_entries() - 2) 433 * o Generic last-cache check 434 * o Malloc epilogue assembly 435 * 436 * Generally there are at least three caches. When there is only one cache we 437 * only use the generic last-cache. In the case where there are two caches, we 438 * just leave out the middle ones. 439 */ 440 static int 441 genasm_malloc(void *base, size_t len, int nents, int *umem_alloc_sizes) 442 { 443 int ii, off; 444 uint8_t *bp; 445 size_t total; 446 uint32_t allocoff, erroff; 447 448 total = sizeof (malinit) + sizeof (malfini) + sizeof (fincache); 449 450 if (nents >= 2) 451 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 452 453 if (total > len) 454 return (1); 455 456 erroff = total - sizeof (malfini) + PTC_MALFINI_JMLABEL; 457 allocoff = total - sizeof (malfini) + PTC_MALFINI_ALLABEL; 458 459 bp = base; 460 461 off = genasm_malinit(bp, umem_tmem_off, erroff, 462 umem_alloc_sizes[nents-1]); 463 bp += off; 464 allocoff -= off; 465 erroff -= off; 466 467 if (nents > 1) { 468 off = genasm_firstcache(bp, umem_alloc_sizes[0], allocoff); 469 bp += off; 470 allocoff -= off; 471 erroff -= off; 472 } 473 474 for (ii = 1; ii < nents - 1; ii++) { 475 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], allocoff); 476 bp += off; 477 allocoff -= off; 478 erroff -= off; 479 } 480 481 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 482 erroff); 483 bp += genasm_malfini(bp, umem_genasm_omptr); 484 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 485 486 return (0); 487 } 488 489 static int 490 genasm_free(void *base, size_t len, int nents, int *umem_alloc_sizes) 491 { 492 uint8_t *bp; 493 int ii, off; 494 size_t total; 495 uint32_t rbufoff, retoff, erroff; 496 497 /* Assume that nents has already been audited for us */ 498 total = sizeof (freeinit) + sizeof (freefini) + sizeof (fincache); 499 if (nents >= 2) 500 total += sizeof (inicache) + sizeof (gencache) * (nents - 2); 501 502 if (total > len) 503 return (1); 504 505 erroff = total - (sizeof (freefini) - PTC_FRFINI_JFLABEL); 506 rbufoff = total - (sizeof (freefini) - PTC_FRFINI_RBUFLABEL); 507 retoff = total - (sizeof (freefini) - PTC_FRFINI_DONELABEL); 508 509 bp = base; 510 511 off = genasm_frinit(bp, umem_tmem_off, retoff, erroff, 512 umem_alloc_sizes[nents - 1]); 513 bp += off; 514 erroff -= off; 515 rbufoff -= off; 516 517 if (nents > 1) { 518 off = genasm_firstcache(bp, umem_alloc_sizes[0], rbufoff); 519 bp += off; 520 erroff -= off; 521 rbufoff -= off; 522 } 523 524 for (ii = 1; ii < nents - 1; ii++) { 525 off = genasm_gencache(bp, ii, umem_alloc_sizes[ii], rbufoff); 526 bp += off; 527 rbufoff -= off; 528 erroff -= off; 529 } 530 531 bp += genasm_lastcache(bp, nents - 1, umem_alloc_sizes[nents - 1], 532 erroff); 533 bp += genasm_frfini(bp, umem_ptc_size, umem_genasm_ofptr); 534 ASSERT(((uintptr_t)bp - total) == (uintptr_t)base); 535 536 return (0); 537 } 538 539 int 540 umem_genasm(int *alloc_sizes, umem_cache_t **caches, int ncaches) 541 { 542 int nents, i; 543 uint8_t *mptr; 544 uint8_t *fptr; 545 uint64_t v, *vptr; 546 547 mptr = (void *)((uintptr_t)umem_genasm_mptr + 5); 548 fptr = (void *)((uintptr_t)umem_genasm_fptr + 5); 549 if (umem_genasm_mptr == 0 || umem_genasm_msize == 0 || 550 umem_genasm_fptr == 0 || umem_genasm_fsize == 0) 551 return (1); 552 553 /* 554 * The total number of caches that we can service is the minimum of: 555 * o the amount supported by libc 556 * o the total number of umem caches 557 * o we use a single byte addl, so it's 255 / sizeof (uintptr_t). For 558 * 32-bit, this is 63. 559 */ 560 nents = _tmem_get_nentries(); 561 562 if (UMEM_GENASM_MAX32 < nents) 563 nents = UMEM_GENASM_MAX32; 564 565 if (ncaches < nents) 566 nents = ncaches; 567 568 /* Based on our constraints, this is not an error */ 569 if (nents == 0 || umem_ptc_size == 0) 570 return (0); 571 572 /* Take into account the jump */ 573 if (genasm_malloc(mptr, umem_genasm_msize, nents, 574 alloc_sizes) != 0) 575 return (1); 576 577 if (genasm_free(fptr, umem_genasm_fsize, nents, 578 alloc_sizes) != 0) 579 return (1); 580 581 /* nop out the jump with a multibyte jump */ 582 vptr = (void *)umem_genasm_mptr; 583 v = MULTINOP; 584 v |= *vptr & (0xffffffULL << 40); 585 (void) atomic_swap_64(vptr, v); 586 vptr = (void *)umem_genasm_fptr; 587 v = MULTINOP; 588 v |= *vptr & (0xffffffULL << 40); 589 (void) atomic_swap_64(vptr, v); 590 591 for (i = 0; i < nents; i++) 592 caches[i]->cache_flags |= UMF_PTC; 593 594 return (0); 595 } 596