1 //===------------- OrcABISupport.cpp - ABI specific support code ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" 10 #include "llvm/Support/FormatVariadic.h" 11 #include "llvm/Support/Process.h" 12 #include "llvm/Support/raw_ostream.h" 13 14 #define DEBUG_TYPE "orc" 15 16 using namespace llvm; 17 18 template <typename ORCABI> 19 bool stubAndPointerRangesOk(JITTargetAddress StubBlockAddr, 20 JITTargetAddress PointerBlockAddr, 21 unsigned NumStubs) { 22 constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement; 23 JITTargetAddress FirstStub = StubBlockAddr; 24 JITTargetAddress LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize); 25 JITTargetAddress FirstPointer = PointerBlockAddr; 26 JITTargetAddress LastPointer = 27 FirstPointer + ((NumStubs - 1) * ORCABI::StubSize); 28 29 if (FirstStub < FirstPointer) { 30 if (LastStub >= FirstPointer) 31 return false; // Ranges overlap. 32 return (FirstPointer - FirstStub <= MaxDisp) && 33 (LastPointer - LastStub <= MaxDisp); // out-of-range. 34 } 35 36 if (LastPointer >= FirstStub) 37 return false; // Ranges overlap. 38 39 return (FirstStub - FirstPointer <= MaxDisp) && 40 (LastStub - LastPointer <= MaxDisp); 41 } 42 43 namespace llvm { 44 namespace orc { 45 46 void OrcAArch64::writeResolverCode(char *ResolverWorkingMem, 47 JITTargetAddress ResolverTargetAddress, 48 JITTargetAddress ReentryFnAddr, 49 JITTargetAddress ReentryCtxAddr) { 50 51 const uint32_t ResolverCode[] = { 52 // resolver_entry: 53 0xa9bf47fd, // 0x000: stp x29, x17, [sp, #-16]! 54 0x910003fd, // 0x004: mov x29, sp 55 0xa9bf73fb, // 0x008: stp x27, x28, [sp, #-16]! 56 0xa9bf6bf9, // 0x00c: stp x25, x26, [sp, #-16]! 57 0xa9bf63f7, // 0x010: stp x23, x24, [sp, #-16]! 58 0xa9bf5bf5, // 0x014: stp x21, x22, [sp, #-16]! 59 0xa9bf53f3, // 0x018: stp x19, x20, [sp, #-16]! 60 0xa9bf3fee, // 0x01c: stp x14, x15, [sp, #-16]! 61 0xa9bf37ec, // 0x020: stp x12, x13, [sp, #-16]! 62 0xa9bf2fea, // 0x024: stp x10, x11, [sp, #-16]! 63 0xa9bf27e8, // 0x028: stp x8, x9, [sp, #-16]! 64 0xa9bf1fe6, // 0x02c: stp x6, x7, [sp, #-16]! 65 0xa9bf17e4, // 0x030: stp x4, x5, [sp, #-16]! 66 0xa9bf0fe2, // 0x034: stp x2, x3, [sp, #-16]! 67 0xa9bf07e0, // 0x038: stp x0, x1, [sp, #-16]! 68 0xadbf7ffe, // 0x03c: stp q30, q31, [sp, #-32]! 69 0xadbf77fc, // 0x040: stp q28, q29, [sp, #-32]! 70 0xadbf6ffa, // 0x044: stp q26, q27, [sp, #-32]! 71 0xadbf67f8, // 0x048: stp q24, q25, [sp, #-32]! 72 0xadbf5ff6, // 0x04c: stp q22, q23, [sp, #-32]! 73 0xadbf57f4, // 0x050: stp q20, q21, [sp, #-32]! 74 0xadbf4ff2, // 0x054: stp q18, q19, [sp, #-32]! 75 0xadbf47f0, // 0x058: stp q16, q17, [sp, #-32]! 76 0xadbf3fee, // 0x05c: stp q14, q15, [sp, #-32]! 77 0xadbf37ec, // 0x060: stp q12, q13, [sp, #-32]! 78 0xadbf2fea, // 0x064: stp q10, q11, [sp, #-32]! 79 0xadbf27e8, // 0x068: stp q8, q9, [sp, #-32]! 80 0xadbf1fe6, // 0x06c: stp q6, q7, [sp, #-32]! 81 0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]! 82 0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]! 83 0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]! 84 0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr 85 0xaa1e03e1, // 0x080: mov x1, x30 86 0xd1003021, // 0x084: sub x1, x1, #12 87 0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr 88 0xd63f0040, // 0x08c: blr x2 89 0xaa0003f1, // 0x090: mov x17, x0 90 0xacc107e0, // 0x094: ldp q0, q1, [sp], #32 91 0xacc10fe2, // 0x098: ldp q2, q3, [sp], #32 92 0xacc117e4, // 0x09c: ldp q4, q5, [sp], #32 93 0xacc11fe6, // 0x0a0: ldp q6, q7, [sp], #32 94 0xacc127e8, // 0x0a4: ldp q8, q9, [sp], #32 95 0xacc12fea, // 0x0a8: ldp q10, q11, [sp], #32 96 0xacc137ec, // 0x0ac: ldp q12, q13, [sp], #32 97 0xacc13fee, // 0x0b0: ldp q14, q15, [sp], #32 98 0xacc147f0, // 0x0b4: ldp q16, q17, [sp], #32 99 0xacc14ff2, // 0x0b8: ldp q18, q19, [sp], #32 100 0xacc157f4, // 0x0bc: ldp q20, q21, [sp], #32 101 0xacc15ff6, // 0x0c0: ldp q22, q23, [sp], #32 102 0xacc167f8, // 0x0c4: ldp q24, q25, [sp], #32 103 0xacc16ffa, // 0x0c8: ldp q26, q27, [sp], #32 104 0xacc177fc, // 0x0cc: ldp q28, q29, [sp], #32 105 0xacc17ffe, // 0x0d0: ldp q30, q31, [sp], #32 106 0xa8c107e0, // 0x0d4: ldp x0, x1, [sp], #16 107 0xa8c10fe2, // 0x0d8: ldp x2, x3, [sp], #16 108 0xa8c117e4, // 0x0dc: ldp x4, x5, [sp], #16 109 0xa8c11fe6, // 0x0e0: ldp x6, x7, [sp], #16 110 0xa8c127e8, // 0x0e4: ldp x8, x9, [sp], #16 111 0xa8c12fea, // 0x0e8: ldp x10, x11, [sp], #16 112 0xa8c137ec, // 0x0ec: ldp x12, x13, [sp], #16 113 0xa8c13fee, // 0x0f0: ldp x14, x15, [sp], #16 114 0xa8c153f3, // 0x0f4: ldp x19, x20, [sp], #16 115 0xa8c15bf5, // 0x0f8: ldp x21, x22, [sp], #16 116 0xa8c163f7, // 0x0fc: ldp x23, x24, [sp], #16 117 0xa8c16bf9, // 0x100: ldp x25, x26, [sp], #16 118 0xa8c173fb, // 0x104: ldp x27, x28, [sp], #16 119 0xa8c17bfd, // 0x108: ldp x29, x30, [sp], #16 120 0xd65f0220, // 0x10c: ret x17 121 0x01234567, // 0x110: Lreentry_fn_ptr: 122 0xdeadbeef, // 0x114: .quad 0 123 0x98765432, // 0x118: Lreentry_ctx_ptr: 124 0xcafef00d // 0x11c: .quad 0 125 }; 126 127 const unsigned ReentryFnAddrOffset = 0x110; 128 const unsigned ReentryCtxAddrOffset = 0x118; 129 130 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 131 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 132 sizeof(uint64_t)); 133 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 134 sizeof(uint64_t)); 135 } 136 137 void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem, 138 JITTargetAddress TrampolineBlockTargetAddress, 139 JITTargetAddress ResolverAddr, 140 unsigned NumTrampolines) { 141 142 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); 143 144 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, 145 sizeof(uint64_t)); 146 147 // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so 148 // subtract 32-bits. 149 OffsetToPtr -= 4; 150 151 uint32_t *Trampolines = 152 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); 153 154 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { 155 Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30 156 Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr 157 Trampolines[3 * I + 2] = 0xd63f0200; // blr x16 158 } 159 } 160 161 void OrcAArch64::writeIndirectStubsBlock( 162 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 163 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 164 // Stub format is: 165 // 166 // .section __orc_stubs 167 // stub1: 168 // ldr x0, ptr1 ; PC-rel load of ptr1 169 // br x0 ; Jump to resolver 170 // stub2: 171 // ldr x0, ptr2 ; PC-rel load of ptr2 172 // br x0 ; Jump to resolver 173 // 174 // ... 175 // 176 // .section __orc_ptrs 177 // ptr1: 178 // .quad 0x0 179 // ptr2: 180 // .quad 0x0 181 // 182 // ... 183 184 static_assert(StubSize == PointerSize, 185 "Pointer and stub size must match for algorithm below"); 186 assert(stubAndPointerRangesOk<OrcAArch64>( 187 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 188 "PointersBlock is out of range"); 189 uint64_t PtrDisplacement = 190 PointersBlockTargetAddress - StubsBlockTargetAddress; 191 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); 192 uint64_t PtrOffsetField = PtrDisplacement << 3; 193 194 for (unsigned I = 0; I < NumStubs; ++I) 195 Stub[I] = 0xd61f020058000010 | PtrOffsetField; 196 } 197 198 void OrcX86_64_Base::writeTrampolines( 199 char *TrampolineBlockWorkingMem, 200 JITTargetAddress TrampolineBlockTargetAddress, 201 JITTargetAddress ResolverAddr, unsigned NumTrampolines) { 202 203 unsigned OffsetToPtr = NumTrampolines * TrampolineSize; 204 205 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, 206 sizeof(uint64_t)); 207 208 uint64_t *Trampolines = 209 reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem); 210 uint64_t CallIndirPCRel = 0xf1c40000000015ff; 211 212 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) 213 Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16); 214 } 215 216 void OrcX86_64_Base::writeIndirectStubsBlock( 217 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 218 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 219 // Stub format is: 220 // 221 // .section __orc_stubs 222 // stub1: 223 // jmpq *ptr1(%rip) 224 // .byte 0xC4 ; <- Invalid opcode padding. 225 // .byte 0xF1 226 // stub2: 227 // jmpq *ptr2(%rip) 228 // 229 // ... 230 // 231 // .section __orc_ptrs 232 // ptr1: 233 // .quad 0x0 234 // ptr2: 235 // .quad 0x0 236 // 237 // ... 238 239 // Populate the stubs page stubs and mark it executable. 240 static_assert(StubSize == PointerSize, 241 "Pointer and stub size must match for algorithm below"); 242 assert(stubAndPointerRangesOk<OrcX86_64_Base>( 243 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 244 "PointersBlock is out of range"); 245 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); 246 uint64_t PtrOffsetField = 247 (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16; 248 for (unsigned I = 0; I < NumStubs; ++I) 249 Stub[I] = 0xF1C40000000025ff | PtrOffsetField; 250 } 251 252 void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem, 253 JITTargetAddress ResolverTargetAddress, 254 JITTargetAddress ReentryFnAddr, 255 JITTargetAddress ReentryCtxAddr) { 256 257 LLVM_DEBUG({ 258 dbgs() << "Writing resolver code to " 259 << formatv("{0:x16}", ResolverTargetAddress) << "\n"; 260 }); 261 262 const uint8_t ResolverCode[] = { 263 // resolver_entry: 264 0x55, // 0x00: pushq %rbp 265 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp 266 0x50, // 0x04: pushq %rax 267 0x53, // 0x05: pushq %rbx 268 0x51, // 0x06: pushq %rcx 269 0x52, // 0x07: pushq %rdx 270 0x56, // 0x08: pushq %rsi 271 0x57, // 0x09: pushq %rdi 272 0x41, 0x50, // 0x0a: pushq %r8 273 0x41, 0x51, // 0x0c: pushq %r9 274 0x41, 0x52, // 0x0e: pushq %r10 275 0x41, 0x53, // 0x10: pushq %r11 276 0x41, 0x54, // 0x12: pushq %r12 277 0x41, 0x55, // 0x14: pushq %r13 278 0x41, 0x56, // 0x16: pushq %r14 279 0x41, 0x57, // 0x18: pushq %r15 280 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp 281 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) 282 0x48, 0xbf, // 0x26: movabsq <CBMgr>, %rdi 283 284 // 0x28: JIT re-entry ctx addr. 285 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 286 287 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi 288 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi 289 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax 290 291 // 0x3a: JIT re-entry fn addr: 292 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 293 294 0xff, 0xd0, // 0x42: callq *%rax 295 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp) 296 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp) 297 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp 298 0x41, 0x5f, // 0x54: popq %r15 299 0x41, 0x5e, // 0x56: popq %r14 300 0x41, 0x5d, // 0x58: popq %r13 301 0x41, 0x5c, // 0x5a: popq %r12 302 0x41, 0x5b, // 0x5c: popq %r11 303 0x41, 0x5a, // 0x5e: popq %r10 304 0x41, 0x59, // 0x60: popq %r9 305 0x41, 0x58, // 0x62: popq %r8 306 0x5f, // 0x64: popq %rdi 307 0x5e, // 0x65: popq %rsi 308 0x5a, // 0x66: popq %rdx 309 0x59, // 0x67: popq %rcx 310 0x5b, // 0x68: popq %rbx 311 0x58, // 0x69: popq %rax 312 0x5d, // 0x6a: popq %rbp 313 0xc3, // 0x6b: retq 314 }; 315 316 const unsigned ReentryFnAddrOffset = 0x3a; 317 const unsigned ReentryCtxAddrOffset = 0x28; 318 319 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 320 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 321 sizeof(uint64_t)); 322 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 323 sizeof(uint64_t)); 324 } 325 326 void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem, 327 JITTargetAddress ResolverTargetAddress, 328 JITTargetAddress ReentryFnAddr, 329 JITTargetAddress ReentryCtxAddr) { 330 331 // resolverCode is similar to OrcX86_64 with differences specific to windows 332 // x64 calling convention: arguments go into rcx, rdx and come in reverse 333 // order, shadow space allocation on stack 334 const uint8_t ResolverCode[] = { 335 // resolver_entry: 336 0x55, // 0x00: pushq %rbp 337 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp 338 0x50, // 0x04: pushq %rax 339 0x53, // 0x05: pushq %rbx 340 0x51, // 0x06: pushq %rcx 341 0x52, // 0x07: pushq %rdx 342 0x56, // 0x08: pushq %rsi 343 0x57, // 0x09: pushq %rdi 344 0x41, 0x50, // 0x0a: pushq %r8 345 0x41, 0x51, // 0x0c: pushq %r9 346 0x41, 0x52, // 0x0e: pushq %r10 347 0x41, 0x53, // 0x10: pushq %r11 348 0x41, 0x54, // 0x12: pushq %r12 349 0x41, 0x55, // 0x14: pushq %r13 350 0x41, 0x56, // 0x16: pushq %r14 351 0x41, 0x57, // 0x18: pushq %r15 352 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp 353 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) 354 355 0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx 356 // 0x28: JIT re-entry ctx addr. 357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 358 359 0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8] 360 0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6 361 362 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax 363 // 0x3a: JIT re-entry fn addr: 364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 365 366 // 0x42: sub rsp, 0x20 (Allocate shadow space) 367 0x48, 0x83, 0xEC, 0x20, 368 0xff, 0xd0, // 0x46: callq *%rax 369 370 // 0x48: add rsp, 0x20 (Free shadow space) 371 0x48, 0x83, 0xC4, 0x20, 372 373 0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp) 374 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp) 375 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp 376 0x41, 0x5f, // 0x5C: popq %r15 377 0x41, 0x5e, // 0x5E: popq %r14 378 0x41, 0x5d, // 0x60: popq %r13 379 0x41, 0x5c, // 0x62: popq %r12 380 0x41, 0x5b, // 0x64: popq %r11 381 0x41, 0x5a, // 0x66: popq %r10 382 0x41, 0x59, // 0x68: popq %r9 383 0x41, 0x58, // 0x6a: popq %r8 384 0x5f, // 0x6c: popq %rdi 385 0x5e, // 0x6d: popq %rsi 386 0x5a, // 0x6e: popq %rdx 387 0x59, // 0x6f: popq %rcx 388 0x5b, // 0x70: popq %rbx 389 0x58, // 0x71: popq %rax 390 0x5d, // 0x72: popq %rbp 391 0xc3, // 0x73: retq 392 }; 393 394 const unsigned ReentryFnAddrOffset = 0x3a; 395 const unsigned ReentryCtxAddrOffset = 0x28; 396 397 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 398 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 399 sizeof(uint64_t)); 400 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 401 sizeof(uint64_t)); 402 } 403 404 void OrcI386::writeResolverCode(char *ResolverWorkingMem, 405 JITTargetAddress ResolverTargetAddress, 406 JITTargetAddress ReentryFnAddr, 407 JITTargetAddress ReentryCtxAddr) { 408 409 assert((ReentryFnAddr >> 32) == 0 && "ReentryFnAddr out of range"); 410 assert((ReentryCtxAddr >> 32) == 0 && "ReentryCtxAddr out of range"); 411 412 const uint8_t ResolverCode[] = { 413 // resolver_entry: 414 0x55, // 0x00: pushl %ebp 415 0x89, 0xe5, // 0x01: movl %esp, %ebp 416 0x54, // 0x03: pushl %esp 417 0x83, 0xe4, 0xf0, // 0x04: andl $-0x10, %esp 418 0x50, // 0x07: pushl %eax 419 0x53, // 0x08: pushl %ebx 420 0x51, // 0x09: pushl %ecx 421 0x52, // 0x0a: pushl %edx 422 0x56, // 0x0b: pushl %esi 423 0x57, // 0x0c: pushl %edi 424 0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl $0x218, %esp 425 0x0f, 0xae, 0x44, 0x24, 0x10, // 0x13: fxsave 0x10(%esp) 426 0x8b, 0x75, 0x04, // 0x18: movl 0x4(%ebp), %esi 427 0x83, 0xee, 0x05, // 0x1b: subl $0x5, %esi 428 0x89, 0x74, 0x24, 0x04, // 0x1e: movl %esi, 0x4(%esp) 429 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, 430 0x00, // 0x22: movl <cbmgr>, (%esp) 431 0xb8, 0x00, 0x00, 0x00, 0x00, // 0x29: movl <reentry>, %eax 432 0xff, 0xd0, // 0x2e: calll *%eax 433 0x89, 0x45, 0x04, // 0x30: movl %eax, 0x4(%ebp) 434 0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x33: fxrstor 0x10(%esp) 435 0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl $0x218, %esp 436 0x5f, // 0x3e: popl %edi 437 0x5e, // 0x3f: popl %esi 438 0x5a, // 0x40: popl %edx 439 0x59, // 0x41: popl %ecx 440 0x5b, // 0x42: popl %ebx 441 0x58, // 0x43: popl %eax 442 0x8b, 0x65, 0xfc, // 0x44: movl -0x4(%ebp), %esp 443 0x5d, // 0x48: popl %ebp 444 0xc3 // 0x49: retl 445 }; 446 447 const unsigned ReentryFnAddrOffset = 0x2a; 448 const unsigned ReentryCtxAddrOffset = 0x25; 449 450 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 451 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 452 sizeof(uint32_t)); 453 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 454 sizeof(uint32_t)); 455 } 456 457 void OrcI386::writeTrampolines(char *TrampolineWorkingMem, 458 JITTargetAddress TrampolineBlockTargetAddress, 459 JITTargetAddress ResolverAddr, 460 unsigned NumTrampolines) { 461 assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range"); 462 463 uint64_t CallRelImm = 0xF1C4C400000000e8; 464 uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5; 465 466 uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem); 467 for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize) 468 Trampolines[I] = CallRelImm | (ResolverRel << 8); 469 } 470 471 void OrcI386::writeIndirectStubsBlock( 472 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 473 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 474 assert((StubsBlockTargetAddress >> 32) == 0 && 475 "StubsBlockTargetAddress is out of range"); 476 assert((PointersBlockTargetAddress >> 32) == 0 && 477 "PointersBlockTargetAddress is out of range"); 478 479 // Stub format is: 480 // 481 // .section __orc_stubs 482 // stub1: 483 // jmpq *ptr1 484 // .byte 0xC4 ; <- Invalid opcode padding. 485 // .byte 0xF1 486 // stub2: 487 // jmpq *ptr2 488 // 489 // ... 490 // 491 // .section __orc_ptrs 492 // ptr1: 493 // .quad 0x0 494 // ptr2: 495 // .quad 0x0 496 // 497 // ... 498 499 assert(stubAndPointerRangesOk<OrcI386>( 500 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 501 "PointersBlock is out of range"); 502 503 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem); 504 uint64_t PtrAddr = PointersBlockTargetAddress; 505 for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4) 506 Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16); 507 } 508 509 void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem, 510 JITTargetAddress ResolverTargetAddress, 511 JITTargetAddress ReentryFnAddr, 512 JITTargetAddress ReentryCtxAddr, 513 bool isBigEndian) { 514 515 const uint32_t ResolverCode[] = { 516 // resolver_entry: 517 0x27bdff98, // 0x00: addiu $sp,$sp,-104 518 0xafa20000, // 0x04: sw $v0,0($sp) 519 0xafa30004, // 0x08: sw $v1,4($sp) 520 0xafa40008, // 0x0c: sw $a0,8($sp) 521 0xafa5000c, // 0x10: sw $a1,12($sp) 522 0xafa60010, // 0x14: sw $a2,16($sp) 523 0xafa70014, // 0x18: sw $a3,20($sp) 524 0xafb00018, // 0x1c: sw $s0,24($sp) 525 0xafb1001c, // 0x20: sw $s1,28($sp) 526 0xafb20020, // 0x24: sw $s2,32($sp) 527 0xafb30024, // 0x28: sw $s3,36($sp) 528 0xafb40028, // 0x2c: sw $s4,40($sp) 529 0xafb5002c, // 0x30: sw $s5,44($sp) 530 0xafb60030, // 0x34: sw $s6,48($sp) 531 0xafb70034, // 0x38: sw $s7,52($sp) 532 0xafa80038, // 0x3c: sw $t0,56($sp) 533 0xafa9003c, // 0x40: sw $t1,60($sp) 534 0xafaa0040, // 0x44: sw $t2,64($sp) 535 0xafab0044, // 0x48: sw $t3,68($sp) 536 0xafac0048, // 0x4c: sw $t4,72($sp) 537 0xafad004c, // 0x50: sw $t5,76($sp) 538 0xafae0050, // 0x54: sw $t6,80($sp) 539 0xafaf0054, // 0x58: sw $t7,84($sp) 540 0xafb80058, // 0x5c: sw $t8,88($sp) 541 0xafb9005c, // 0x60: sw $t9,92($sp) 542 0xafbe0060, // 0x64: sw $fp,96($sp) 543 0xafbf0064, // 0x68: sw $ra,100($sp) 544 545 // JIT re-entry ctx addr. 546 0x00000000, // 0x6c: lui $a0,ctx 547 0x00000000, // 0x70: addiu $a0,$a0,ctx 548 549 0x03e02825, // 0x74: move $a1, $ra 550 0x24a5ffec, // 0x78: addiu $a1,$a1,-20 551 552 // JIT re-entry fn addr: 553 0x00000000, // 0x7c: lui $t9,reentry 554 0x00000000, // 0x80: addiu $t9,$t9,reentry 555 556 0x0320f809, // 0x84: jalr $t9 557 0x00000000, // 0x88: nop 558 0x8fbf0064, // 0x8c: lw $ra,100($sp) 559 0x8fbe0060, // 0x90: lw $fp,96($sp) 560 0x8fb9005c, // 0x94: lw $t9,92($sp) 561 0x8fb80058, // 0x98: lw $t8,88($sp) 562 0x8faf0054, // 0x9c: lw $t7,84($sp) 563 0x8fae0050, // 0xa0: lw $t6,80($sp) 564 0x8fad004c, // 0xa4: lw $t5,76($sp) 565 0x8fac0048, // 0xa8: lw $t4,72($sp) 566 0x8fab0044, // 0xac: lw $t3,68($sp) 567 0x8faa0040, // 0xb0: lw $t2,64($sp) 568 0x8fa9003c, // 0xb4: lw $t1,60($sp) 569 0x8fa80038, // 0xb8: lw $t0,56($sp) 570 0x8fb70034, // 0xbc: lw $s7,52($sp) 571 0x8fb60030, // 0xc0: lw $s6,48($sp) 572 0x8fb5002c, // 0xc4: lw $s5,44($sp) 573 0x8fb40028, // 0xc8: lw $s4,40($sp) 574 0x8fb30024, // 0xcc: lw $s3,36($sp) 575 0x8fb20020, // 0xd0: lw $s2,32($sp) 576 0x8fb1001c, // 0xd4: lw $s1,28($sp) 577 0x8fb00018, // 0xd8: lw $s0,24($sp) 578 0x8fa70014, // 0xdc: lw $a3,20($sp) 579 0x8fa60010, // 0xe0: lw $a2,16($sp) 580 0x8fa5000c, // 0xe4: lw $a1,12($sp) 581 0x8fa40008, // 0xe8: lw $a0,8($sp) 582 0x27bd0068, // 0xec: addiu $sp,$sp,104 583 0x0300f825, // 0xf0: move $ra, $t8 584 0x03200008, // 0xf4: jr $t9 585 0x00000000, // 0xf8: move $t9, $v0/v1 586 }; 587 588 const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui 589 const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui 590 const unsigned Offsett = 0xf8; 591 592 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 593 594 // Depending on endian return value will be in v0 or v1. 595 uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825; 596 memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9)); 597 598 uint32_t ReentryCtxLUi = 599 0x3c040000 | (((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF); 600 uint32_t ReentryCtxADDiu = 0x24840000 | ((ReentryCtxAddr)&0xFFFF); 601 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi, 602 sizeof(ReentryCtxLUi)); 603 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu, 604 sizeof(ReentryCtxADDiu)); 605 606 uint32_t ReentryFnLUi = 607 0x3c190000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF); 608 uint32_t ReentryFnADDiu = 0x27390000 | ((ReentryFnAddr)&0xFFFF); 609 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi, 610 sizeof(ReentryFnLUi)); 611 memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu, 612 sizeof(ReentryFnADDiu)); 613 } 614 615 void OrcMips32_Base::writeTrampolines( 616 char *TrampolineBlockWorkingMem, 617 JITTargetAddress TrampolineBlockTargetAddress, 618 JITTargetAddress ResolverAddr, unsigned NumTrampolines) { 619 620 assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range"); 621 622 uint32_t *Trampolines = 623 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); 624 uint32_t RHiAddr = ((ResolverAddr + 0x8000) >> 16); 625 626 for (unsigned I = 0; I < NumTrampolines; ++I) { 627 // move $t8,$ra 628 // lui $t9,ResolverAddr 629 // addiu $t9,$t9,ResolverAddr 630 // jalr $t9 631 // nop 632 Trampolines[5 * I + 0] = 0x03e0c025; 633 Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF); 634 Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr & 0xFFFF); 635 Trampolines[5 * I + 3] = 0x0320f809; 636 Trampolines[5 * I + 4] = 0x00000000; 637 } 638 } 639 640 void OrcMips32_Base::writeIndirectStubsBlock( 641 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 642 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 643 assert((StubsBlockTargetAddress >> 32) == 0 && 644 "InitialPtrVal is out of range"); 645 646 // Stub format is: 647 // 648 // .section __orc_stubs 649 // stub1: 650 // lui $t9, ptr1 651 // lw $t9, %lo(ptr1)($t9) 652 // jr $t9 653 // stub2: 654 // lui $t9, ptr2 655 // lw $t9,%lo(ptr1)($t9) 656 // jr $t9 657 // 658 // ... 659 // 660 // .section __orc_ptrs 661 // ptr1: 662 // .word 0x0 663 // ptr2: 664 // .word 0x0 665 // 666 // i.. 667 668 assert(stubAndPointerRangesOk<OrcMips32_Base>( 669 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 670 "PointersBlock is out of range"); 671 672 // Populate the stubs page stubs and mark it executable. 673 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); 674 uint64_t PtrAddr = PointersBlockTargetAddress; 675 676 for (unsigned I = 0; I < NumStubs; ++I) { 677 uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16); 678 Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1 679 Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9) 680 Stub[4 * I + 2] = 0x03200008; // jr $t9 681 Stub[4 * I + 3] = 0x00000000; // nop 682 PtrAddr += 4; 683 } 684 } 685 686 void OrcMips64::writeResolverCode(char *ResolverWorkingMem, 687 JITTargetAddress ResolverTargetAddress, 688 JITTargetAddress ReentryFnAddr, 689 JITTargetAddress ReentryCtxAddr) { 690 691 const uint32_t ResolverCode[] = { 692 //resolver_entry: 693 0x67bdff30, // 0x00: daddiu $sp,$sp,-208 694 0xffa20000, // 0x04: sd v0,0(sp) 695 0xffa30008, // 0x08: sd v1,8(sp) 696 0xffa40010, // 0x0c: sd a0,16(sp) 697 0xffa50018, // 0x10: sd a1,24(sp) 698 0xffa60020, // 0x14: sd a2,32(sp) 699 0xffa70028, // 0x18: sd a3,40(sp) 700 0xffa80030, // 0x1c: sd a4,48(sp) 701 0xffa90038, // 0x20: sd a5,56(sp) 702 0xffaa0040, // 0x24: sd a6,64(sp) 703 0xffab0048, // 0x28: sd a7,72(sp) 704 0xffac0050, // 0x2c: sd t0,80(sp) 705 0xffad0058, // 0x30: sd t1,88(sp) 706 0xffae0060, // 0x34: sd t2,96(sp) 707 0xffaf0068, // 0x38: sd t3,104(sp) 708 0xffb00070, // 0x3c: sd s0,112(sp) 709 0xffb10078, // 0x40: sd s1,120(sp) 710 0xffb20080, // 0x44: sd s2,128(sp) 711 0xffb30088, // 0x48: sd s3,136(sp) 712 0xffb40090, // 0x4c: sd s4,144(sp) 713 0xffb50098, // 0x50: sd s5,152(sp) 714 0xffb600a0, // 0x54: sd s6,160(sp) 715 0xffb700a8, // 0x58: sd s7,168(sp) 716 0xffb800b0, // 0x5c: sd t8,176(sp) 717 0xffb900b8, // 0x60: sd t9,184(sp) 718 0xffbe00c0, // 0x64: sd fp,192(sp) 719 0xffbf00c8, // 0x68: sd ra,200(sp) 720 721 // JIT re-entry ctx addr. 722 0x00000000, // 0x6c: lui $a0,heighest(ctx) 723 0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx) 724 0x00000000, // 0x74: dsll $a0,$a0,16 725 0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx) 726 0x00000000, // 0x7c: dsll $a0,$a0,16 727 0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx) 728 729 0x03e02825, // 0x84: move $a1, $ra 730 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36 731 732 // JIT re-entry fn addr: 733 0x00000000, // 0x8c: lui $t9,reentry 734 0x00000000, // 0x90: daddiu $t9,$t9,reentry 735 0x00000000, // 0x94: dsll $t9,$t9, 736 0x00000000, // 0x98: daddiu $t9,$t9, 737 0x00000000, // 0x9c: dsll $t9,$t9, 738 0x00000000, // 0xa0: daddiu $t9,$t9, 739 0x0320f809, // 0xa4: jalr $t9 740 0x00000000, // 0xa8: nop 741 0xdfbf00c8, // 0xac: ld ra, 200(sp) 742 0xdfbe00c0, // 0xb0: ld fp, 192(sp) 743 0xdfb900b8, // 0xb4: ld t9, 184(sp) 744 0xdfb800b0, // 0xb8: ld t8, 176(sp) 745 0xdfb700a8, // 0xbc: ld s7, 168(sp) 746 0xdfb600a0, // 0xc0: ld s6, 160(sp) 747 0xdfb50098, // 0xc4: ld s5, 152(sp) 748 0xdfb40090, // 0xc8: ld s4, 144(sp) 749 0xdfb30088, // 0xcc: ld s3, 136(sp) 750 0xdfb20080, // 0xd0: ld s2, 128(sp) 751 0xdfb10078, // 0xd4: ld s1, 120(sp) 752 0xdfb00070, // 0xd8: ld s0, 112(sp) 753 0xdfaf0068, // 0xdc: ld t3, 104(sp) 754 0xdfae0060, // 0xe0: ld t2, 96(sp) 755 0xdfad0058, // 0xe4: ld t1, 88(sp) 756 0xdfac0050, // 0xe8: ld t0, 80(sp) 757 0xdfab0048, // 0xec: ld a7, 72(sp) 758 0xdfaa0040, // 0xf0: ld a6, 64(sp) 759 0xdfa90038, // 0xf4: ld a5, 56(sp) 760 0xdfa80030, // 0xf8: ld a4, 48(sp) 761 0xdfa70028, // 0xfc: ld a3, 40(sp) 762 0xdfa60020, // 0x100: ld a2, 32(sp) 763 0xdfa50018, // 0x104: ld a1, 24(sp) 764 0xdfa40010, // 0x108: ld a0, 16(sp) 765 0xdfa30008, // 0x10c: ld v1, 8(sp) 766 0x67bd00d0, // 0x110: daddiu $sp,$sp,208 767 0x0300f825, // 0x114: move $ra, $t8 768 0x03200008, // 0x118: jr $t9 769 0x0040c825, // 0x11c: move $t9, $v0 770 }; 771 772 const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui 773 const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui 774 775 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 776 777 uint32_t ReentryCtxLUi = 778 0x3c040000 | (((ReentryCtxAddr + 0x800080008000) >> 48) & 0xFFFF); 779 uint32_t ReentryCtxDADDiu = 780 0x64840000 | (((ReentryCtxAddr + 0x80008000) >> 32) & 0xFFFF); 781 uint32_t ReentryCtxDSLL = 0x00042438; 782 uint32_t ReentryCtxDADDiu2 = 783 0x64840000 | ((((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF)); 784 uint32_t ReentryCtxDSLL2 = 0x00042438; 785 uint32_t ReentryCtxDADDiu3 = 0x64840000 | ((ReentryCtxAddr)&0xFFFF); 786 787 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi, 788 sizeof(ReentryCtxLUi)); 789 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu, 790 sizeof(ReentryCtxDADDiu)); 791 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL, 792 sizeof(ReentryCtxDSLL)); 793 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2, 794 sizeof(ReentryCtxDADDiu2)); 795 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2, 796 sizeof(ReentryCtxDSLL2)); 797 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3, 798 sizeof(ReentryCtxDADDiu3)); 799 800 uint32_t ReentryFnLUi = 801 0x3c190000 | (((ReentryFnAddr + 0x800080008000) >> 48) & 0xFFFF); 802 803 uint32_t ReentryFnDADDiu = 804 0x67390000 | (((ReentryFnAddr + 0x80008000) >> 32) & 0xFFFF); 805 806 uint32_t ReentryFnDSLL = 0x0019cc38; 807 808 uint32_t ReentryFnDADDiu2 = 809 0x67390000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF); 810 811 uint32_t ReentryFnDSLL2 = 0x0019cc38; 812 813 uint32_t ReentryFnDADDiu3 = 0x67390000 | ((ReentryFnAddr)&0xFFFF); 814 815 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi, 816 sizeof(ReentryFnLUi)); 817 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu, 818 sizeof(ReentryFnDADDiu)); 819 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL, 820 sizeof(ReentryFnDSLL)); 821 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2, 822 sizeof(ReentryFnDADDiu2)); 823 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2, 824 sizeof(ReentryFnDSLL2)); 825 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3, 826 sizeof(ReentryFnDADDiu3)); 827 } 828 829 void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem, 830 JITTargetAddress TrampolineBlockTargetAddress, 831 JITTargetAddress ResolverAddr, 832 unsigned NumTrampolines) { 833 834 uint32_t *Trampolines = 835 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); 836 837 uint64_t HeighestAddr = ((ResolverAddr + 0x800080008000) >> 48); 838 uint64_t HeigherAddr = ((ResolverAddr + 0x80008000) >> 32); 839 uint64_t HiAddr = ((ResolverAddr + 0x8000) >> 16); 840 841 for (unsigned I = 0; I < NumTrampolines; ++I) { 842 Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra 843 Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr 844 Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr) 845 Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16 846 Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) 847 Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16 848 Trampolines[10 * I + 6] = 849 0x67390000 | (ResolverAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr) 850 Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9 851 Trampolines[10 * I + 8] = 0x00000000; // nop 852 Trampolines[10 * I + 9] = 0x00000000; // nop 853 } 854 } 855 856 void OrcMips64::writeIndirectStubsBlock( 857 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 858 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 859 // Stub format is: 860 // 861 // .section __orc_stubs 862 // stub1: 863 // lui $t9,ptr1 864 // dsll $t9,$t9,16 865 // daddiu $t9,$t9,%hi(ptr) 866 // dsll $t9,$t9,16 867 // ld $t9,%lo(ptr) 868 // jr $t9 869 // stub2: 870 // lui $t9,ptr1 871 // dsll $t9,$t9,16 872 // daddiu $t9,$t9,%hi(ptr) 873 // dsll $t9,$t9,16 874 // ld $t9,%lo(ptr) 875 // jr $t9 876 // 877 // ... 878 // 879 // .section __orc_ptrs 880 // ptr1: 881 // .dword 0x0 882 // ptr2: 883 // .dword 0x0 884 // 885 // ... 886 887 assert(stubAndPointerRangesOk<OrcMips64>( 888 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 889 "PointersBlock is out of range"); 890 891 // Populate the stubs page stubs and mark it executable. 892 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); 893 uint64_t PtrAddr = PointersBlockTargetAddress; 894 895 for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { 896 uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48); 897 uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32); 898 uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16); 899 Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1 900 Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr) 901 Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16 902 Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr) 903 Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16 904 Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr) 905 Stub[8 * I + 6] = 0x03200008; // jr $t9 906 Stub[8 * I + 7] = 0x00000000; // nop 907 } 908 } 909 910 void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, 911 JITTargetAddress ResolverTargetAddress, 912 JITTargetAddress ReentryFnAddr, 913 JITTargetAddress ReentryCtxAddr) { 914 915 const uint32_t ResolverCode[] = { 916 0xef810113, // 0x00: addi sp,sp,-264 917 0x00813023, // 0x04: sd s0,0(sp) 918 0x00913423, // 0x08: sd s1,8(sp) 919 0x01213823, // 0x0c: sd s2,16(sp) 920 0x01313c23, // 0x10: sd s3,24(sp) 921 0x03413023, // 0x14: sd s4,32(sp) 922 0x03513423, // 0x18: sd s5,40(sp) 923 0x03613823, // 0x1c: sd s6,48(sp) 924 0x03713c23, // 0x20: sd s7,56(sp) 925 0x05813023, // 0x24: sd s8,64(sp) 926 0x05913423, // 0x28: sd s9,72(sp) 927 0x05a13823, // 0x2c: sd s10,80(sp) 928 0x05b13c23, // 0x30: sd s11,88(sp) 929 0x06113023, // 0x34: sd ra,96(sp) 930 0x06a13423, // 0x38: sd a0,104(sp) 931 0x06b13823, // 0x3c: sd a1,112(sp) 932 0x06c13c23, // 0x40: sd a2,120(sp) 933 0x08d13023, // 0x44: sd a3,128(sp) 934 0x08e13423, // 0x48: sd a4,136(sp) 935 0x08f13823, // 0x4c: sd a5,144(sp) 936 0x09013c23, // 0x50: sd a6,152(sp) 937 0x0b113023, // 0x54: sd a7,160(sp) 938 0x0a813427, // 0x58: fsd fs0,168(sp) 939 0x0a913827, // 0x5c: fsd fs1,176(sp) 940 0x0b213c27, // 0x60: fsd fs2,184(sp) 941 0x0d313027, // 0x64: fsd fs3,192(sp) 942 0x0d413427, // 0x68: fsd fs4,200(sp) 943 0x0d513827, // 0x6c: fsd fs5,208(sp) 944 0x0d613c27, // 0x70: fsd fs6,216(sp) 945 0x0f713027, // 0x74: fsd fs7,224(sp) 946 0x0f813427, // 0x78: fsd fs8,232(sp) 947 0x0f913827, // 0x7c: fsd fs9,240(sp) 948 0x0fa13c27, // 0x80: fsd fs10,248(sp) 949 0x11b13027, // 0x84: fsd fs11,256(sp) 950 0x00000517, // 0x88: auipc a0,0x0 951 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138 952 0x00030593, // 0x90: mv a1,t1 953 0xff458593, // 0x94: addi a1,a1,-12 954 0x00000617, // 0x98: auipc a2,0x0 955 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140 956 0x000600e7, // 0xa0: jalr a2 957 0x00050293, // 0xa4: mv t0,a0 958 0x00013403, // 0xa8: ld s0,0(sp) 959 0x00813483, // 0xac: ld s1,8(sp) 960 0x01013903, // 0xb0: ld s2,16(sp) 961 0x01813983, // 0xb4: ld s3,24(sp) 962 0x02013a03, // 0xb8: ld s4,32(sp) 963 0x02813a83, // 0xbc: ld s5,40(sp) 964 0x03013b03, // 0xc0: ld s6,48(sp) 965 0x03813b83, // 0xc4: ld s7,56(sp) 966 0x04013c03, // 0xc8: ld s8,64(sp) 967 0x04813c83, // 0xcc: ld s9,72(sp) 968 0x05013d03, // 0xd0: ld s10,80(sp) 969 0x05813d83, // 0xd4: ld s11,88(sp) 970 0x06013083, // 0xd8: ld ra,96(sp) 971 0x06813503, // 0xdc: ld a0,104(sp) 972 0x07013583, // 0xe0: ld a1,112(sp) 973 0x07813603, // 0xe4: ld a2,120(sp) 974 0x08013683, // 0xe8: ld a3,128(sp) 975 0x08813703, // 0xec: ld a4,136(sp) 976 0x09013783, // 0xf0: ld a5,144(sp) 977 0x09813803, // 0xf4: ld a6,152(sp) 978 0x0a013883, // 0xf8: ld a7,160(sp) 979 0x0a813407, // 0xfc: fld fs0,168(sp) 980 0x0b013487, // 0x100: fld fs1,176(sp) 981 0x0b813907, // 0x104: fld fs2,184(sp) 982 0x0c013987, // 0x108: fld fs3,192(sp) 983 0x0c813a07, // 0x10c: fld fs4,200(sp) 984 0x0d013a87, // 0x110: fld fs5,208(sp) 985 0x0d813b07, // 0x114: fld fs6,216(sp) 986 0x0e013b87, // 0x118: fld fs7,224(sp) 987 0x0e813c07, // 0x11c: fld fs8,232(sp) 988 0x0f013c87, // 0x120: fld fs9,240(sp) 989 0x0f813d07, // 0x124: fld fs10,248(sp) 990 0x10013d87, // 0x128: fld fs11,256(sp) 991 0x10810113, // 0x12c: addi sp,sp,264 992 0x00028067, // 0x130: jr t0 993 0x12345678, // 0x134: padding to align at 8 byte 994 0x12345678, // 0x138: Lreentry_ctx_ptr: 995 0xdeadbeef, // 0x13c: .quad 0 996 0x98765432, // 0x140: Lreentry_fn_ptr: 997 0xcafef00d // 0x144: .quad 0 998 }; 999 1000 const unsigned ReentryCtxAddrOffset = 0x138; 1001 const unsigned ReentryFnAddrOffset = 0x140; 1002 1003 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 1004 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 1005 sizeof(uint64_t)); 1006 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 1007 sizeof(uint64_t)); 1008 } 1009 1010 void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem, 1011 JITTargetAddress TrampolineBlockTargetAddress, 1012 JITTargetAddress ResolverAddr, 1013 unsigned NumTrampolines) { 1014 1015 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); 1016 1017 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, 1018 sizeof(uint64_t)); 1019 1020 uint32_t *Trampolines = 1021 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); 1022 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { 1023 uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000; 1024 uint32_t Lo12 = OffsetToPtr - Hi20; 1025 Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) 1026 Trampolines[4 * I + 1] = 1027 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) 1028 Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0 1029 Trampolines[4 * I + 3] = 0xdeadface; // padding 1030 } 1031 } 1032 1033 void OrcRiscv64::writeIndirectStubsBlock( 1034 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 1035 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 1036 // Stub format is: 1037 // 1038 // .section __orc_stubs 1039 // stub1: 1040 // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 1041 // ld t0, %lo(t0) 1042 // jr t0 ; Jump to resolver 1043 // .quad 0 ; Pad to 16 bytes 1044 // stub2: 1045 // auipc t0, %hi(ptr1) ; PC-rel load of ptr1 1046 // ld t0, %lo(t0) 1047 // jr t0 ; Jump to resolver 1048 // .quad 0 1049 // 1050 // ... 1051 // 1052 // .section __orc_ptrs 1053 // ptr1: 1054 // .quad 0x0 1055 // ptr2: 1056 // .quad 0x0 1057 // 1058 // ... 1059 1060 assert(stubAndPointerRangesOk<OrcRiscv64>( 1061 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 1062 "PointersBlock is out of range"); 1063 1064 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); 1065 1066 for (unsigned I = 0; I < NumStubs; ++I) { 1067 uint64_t PtrDisplacement = 1068 PointersBlockTargetAddress - StubsBlockTargetAddress; 1069 uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000; 1070 uint32_t Lo12 = PtrDisplacement - Hi20; 1071 Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr) 1072 Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr) 1073 Stub[4 * I + 2] = 0x00028067; // jr t0 1074 Stub[4 * I + 3] = 0xfeedbeef; // padding 1075 PointersBlockTargetAddress += PointerSize; 1076 StubsBlockTargetAddress += StubSize; 1077 } 1078 } 1079 1080 void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, 1081 JITTargetAddress ResolverTargetAddress, 1082 JITTargetAddress ReentryFnAddr, 1083 JITTargetAddress ReentryCtxAddr) { 1084 1085 LLVM_DEBUG({ 1086 dbgs() << "Writing resolver code to " 1087 << formatv("{0:x16}", ResolverTargetAddress) << "\n"; 1088 }); 1089 1090 const uint32_t ResolverCode[] = { 1091 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78) 1092 0x29c00061, // 0x4: st.d $ra, $sp, 0 1093 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8) 1094 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10) 1095 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18) 1096 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20) 1097 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28) 1098 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30) 1099 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38) 1100 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40) 1101 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48) 1102 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50) 1103 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58) 1104 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60) 1105 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68) 1106 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70) 1107 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78) 1108 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80) 1109 0x1c000004, // 0x48: pcaddu12i $a0, 0 1110 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70) 1111 0x001501a5, // 0x50: move $a1, $t1 1112 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4) 1113 0x1c000006, // 0x58: pcaddu12i $a2, 0 1114 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68) 1115 0x4c0000c1, // 0x60: jirl $ra, $a2, 0 1116 0x0015008c, // 0x64: move $t0, $a0 1117 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80) 1118 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78) 1119 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70) 1120 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68) 1121 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60) 1122 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58) 1123 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50) 1124 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48) 1125 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40) 1126 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38) 1127 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30) 1128 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28) 1129 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20) 1130 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18) 1131 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10) 1132 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8) 1133 0x28c00061, // 0xa8: ld.d $ra, $sp, 0 1134 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88) 1135 0x4c000180, // 0xb0: jr $t0 1136 0x00000000, // 0xb4: padding to align at 8 bytes 1137 0x01234567, // 0xb8: Lreentry_ctx_ptr: 1138 0xdeedbeef, // 0xbc: .dword 0 1139 0x98765432, // 0xc0: Lreentry_fn_ptr: 1140 0xcafef00d, // 0xc4: .dword 0 1141 }; 1142 1143 const unsigned ReentryCtxAddrOffset = 0xb8; 1144 const unsigned ReentryFnAddrOffset = 0xc0; 1145 1146 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); 1147 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr, 1148 sizeof(uint64_t)); 1149 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr, 1150 sizeof(uint64_t)); 1151 } 1152 1153 void OrcLoongArch64::writeTrampolines( 1154 char *TrampolineBlockWorkingMem, 1155 JITTargetAddress TrampolineBlockTargetAddress, 1156 JITTargetAddress ResolverAddr, unsigned NumTrampolines) { 1157 1158 LLVM_DEBUG({ 1159 dbgs() << "Writing trampoline code to " 1160 << formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n"; 1161 }); 1162 1163 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8); 1164 1165 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr, 1166 sizeof(uint64_t)); 1167 1168 uint32_t *Trampolines = 1169 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem); 1170 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) { 1171 uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000; 1172 uint32_t Lo12 = OffsetToPtr - Hi20; 1173 Trampolines[4 * I + 0] = 1174 0x1c00000c | 1175 (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr) 1176 Trampolines[4 * I + 1] = 1177 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) 1178 Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0 1179 Trampolines[4 * I + 3] = 0x0; // padding 1180 } 1181 } 1182 1183 void OrcLoongArch64::writeIndirectStubsBlock( 1184 char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, 1185 JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { 1186 // Stub format is: 1187 // 1188 // .section __orc_stubs 1189 // stub1: 1190 // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1 1191 // ld.d $t0, $t0, %pc_lo12(ptr1) 1192 // jr $t0 ; Jump to resolver 1193 // .dword 0 ; Pad to 16 bytes 1194 // stub2: 1195 // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2 1196 // ld.d $t0, $t0, %pc_lo12(ptr2) 1197 // jr $t0 ; Jump to resolver 1198 // .dword 0 ; Pad to 16 bytes 1199 // ... 1200 // 1201 // .section __orc_ptrs 1202 // ptr1: 1203 // .dword 0x0 1204 // ptr2: 1205 // .dword 0x0 1206 // ... 1207 LLVM_DEBUG({ 1208 dbgs() << "Writing stubs code to " 1209 << formatv("{0:x16}", StubsBlockTargetAddress) << "\n"; 1210 }); 1211 assert(stubAndPointerRangesOk<OrcLoongArch64>( 1212 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && 1213 "PointersBlock is out of range"); 1214 1215 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem); 1216 1217 for (unsigned I = 0; I < NumStubs; ++I) { 1218 uint64_t PtrDisplacement = 1219 PointersBlockTargetAddress - StubsBlockTargetAddress; 1220 uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000; 1221 uint32_t Lo12 = PtrDisplacement - Hi20; 1222 Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff) 1223 << 5); // pcaddu12i $t0, %pc_hi20(Lptr) 1224 Stub[4 * I + 1] = 1225 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr) 1226 Stub[4 * I + 2] = 0x4c000180; // jr $t0 1227 Stub[4 * I + 3] = 0x0; // padding 1228 PointersBlockTargetAddress += PointerSize; 1229 StubsBlockTargetAddress += StubSize; 1230 } 1231 } 1232 1233 } // End namespace orc. 1234 } // End namespace llvm. 1235