xref: /freebsd/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp (revision 1719886f6d08408b834d270c59ffcfd821c8f63a)
1 //===------------- OrcABISupport.cpp - ABI specific support code ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
10 #include "llvm/Support/FormatVariadic.h"
11 #include "llvm/Support/Process.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 #define DEBUG_TYPE "orc"
15 
16 using namespace llvm;
17 using namespace llvm::orc;
18 
19 template <typename ORCABI>
20 static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr,
21                                    ExecutorAddr PointerBlockAddr,
22                                    unsigned NumStubs) {
23   constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement;
24   ExecutorAddr FirstStub = StubBlockAddr;
25   ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
26   ExecutorAddr FirstPointer = PointerBlockAddr;
27   ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
28 
29   if (FirstStub < FirstPointer) {
30     if (LastStub >= FirstPointer)
31       return false; // Ranges overlap.
32     return (FirstPointer - FirstStub <= MaxDisp) &&
33            (LastPointer - LastStub <= MaxDisp); // out-of-range.
34   }
35 
36   if (LastPointer >= FirstStub)
37     return false; // Ranges overlap.
38 
39   return (FirstStub - FirstPointer <= MaxDisp) &&
40          (LastStub - LastPointer <= MaxDisp);
41 }
42 
43 namespace llvm {
44 namespace orc {
45 
46 void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
47                                    ExecutorAddr ResolverTargetAddress,
48                                    ExecutorAddr ReentryFnAddr,
49                                    ExecutorAddr ReentryCtxAddr) {
50 
51   const uint32_t ResolverCode[] = {
52     // resolver_entry:
53     0xa9bf47fd,        // 0x000:  stp  x29, x17, [sp, #-16]!
54     0x910003fd,        // 0x004:  mov  x29, sp
55     0xa9bf73fb,        // 0x008:  stp  x27, x28, [sp, #-16]!
56     0xa9bf6bf9,        // 0x00c:  stp  x25, x26, [sp, #-16]!
57     0xa9bf63f7,        // 0x010:  stp  x23, x24, [sp, #-16]!
58     0xa9bf5bf5,        // 0x014:  stp  x21, x22, [sp, #-16]!
59     0xa9bf53f3,        // 0x018:  stp  x19, x20, [sp, #-16]!
60     0xa9bf3fee,        // 0x01c:  stp  x14, x15, [sp, #-16]!
61     0xa9bf37ec,        // 0x020:  stp  x12, x13, [sp, #-16]!
62     0xa9bf2fea,        // 0x024:  stp  x10, x11, [sp, #-16]!
63     0xa9bf27e8,        // 0x028:  stp   x8,  x9, [sp, #-16]!
64     0xa9bf1fe6,        // 0x02c:  stp   x6,  x7, [sp, #-16]!
65     0xa9bf17e4,        // 0x030:  stp   x4,  x5, [sp, #-16]!
66     0xa9bf0fe2,        // 0x034:  stp   x2,  x3, [sp, #-16]!
67     0xa9bf07e0,        // 0x038:  stp   x0,  x1, [sp, #-16]!
68     0xadbf7ffe,        // 0x03c:  stp  q30, q31, [sp, #-32]!
69     0xadbf77fc,        // 0x040:  stp  q28, q29, [sp, #-32]!
70     0xadbf6ffa,        // 0x044:  stp  q26, q27, [sp, #-32]!
71     0xadbf67f8,        // 0x048:  stp  q24, q25, [sp, #-32]!
72     0xadbf5ff6,        // 0x04c:  stp  q22, q23, [sp, #-32]!
73     0xadbf57f4,        // 0x050:  stp  q20, q21, [sp, #-32]!
74     0xadbf4ff2,        // 0x054:  stp  q18, q19, [sp, #-32]!
75     0xadbf47f0,        // 0x058:  stp  q16, q17, [sp, #-32]!
76     0xadbf3fee,        // 0x05c:  stp  q14, q15, [sp, #-32]!
77     0xadbf37ec,        // 0x060:  stp  q12, q13, [sp, #-32]!
78     0xadbf2fea,        // 0x064:  stp  q10, q11, [sp, #-32]!
79     0xadbf27e8,        // 0x068:  stp   q8,  q9, [sp, #-32]!
80     0xadbf1fe6,        // 0x06c:  stp   q6,  q7, [sp, #-32]!
81     0xadbf17e4,        // 0x070:  stp   q4,  q5, [sp, #-32]!
82     0xadbf0fe2,        // 0x074:  stp   q2,  q3, [sp, #-32]!
83     0xadbf07e0,        // 0x078:  stp   q0,  q1, [sp, #-32]!
84     0x580004e0,        // 0x07c:  ldr   x0, Lreentry_ctx_ptr
85     0xaa1e03e1,        // 0x080:  mov   x1, x30
86     0xd1003021,        // 0x084:  sub   x1,  x1, #12
87     0x58000442,        // 0x088:  ldr   x2, Lreentry_fn_ptr
88     0xd63f0040,        // 0x08c:  blr   x2
89     0xaa0003f1,        // 0x090:  mov   x17, x0
90     0xacc107e0,        // 0x094:  ldp   q0,  q1, [sp], #32
91     0xacc10fe2,        // 0x098:  ldp   q2,  q3, [sp], #32
92     0xacc117e4,        // 0x09c:  ldp   q4,  q5, [sp], #32
93     0xacc11fe6,        // 0x0a0:  ldp   q6,  q7, [sp], #32
94     0xacc127e8,        // 0x0a4:  ldp   q8,  q9, [sp], #32
95     0xacc12fea,        // 0x0a8:  ldp  q10, q11, [sp], #32
96     0xacc137ec,        // 0x0ac:  ldp  q12, q13, [sp], #32
97     0xacc13fee,        // 0x0b0:  ldp  q14, q15, [sp], #32
98     0xacc147f0,        // 0x0b4:  ldp  q16, q17, [sp], #32
99     0xacc14ff2,        // 0x0b8:  ldp  q18, q19, [sp], #32
100     0xacc157f4,        // 0x0bc:  ldp  q20, q21, [sp], #32
101     0xacc15ff6,        // 0x0c0:  ldp  q22, q23, [sp], #32
102     0xacc167f8,        // 0x0c4:  ldp  q24, q25, [sp], #32
103     0xacc16ffa,        // 0x0c8:  ldp  q26, q27, [sp], #32
104     0xacc177fc,        // 0x0cc:  ldp  q28, q29, [sp], #32
105     0xacc17ffe,        // 0x0d0:  ldp  q30, q31, [sp], #32
106     0xa8c107e0,        // 0x0d4:  ldp   x0,  x1, [sp], #16
107     0xa8c10fe2,        // 0x0d8:  ldp   x2,  x3, [sp], #16
108     0xa8c117e4,        // 0x0dc:  ldp   x4,  x5, [sp], #16
109     0xa8c11fe6,        // 0x0e0:  ldp   x6,  x7, [sp], #16
110     0xa8c127e8,        // 0x0e4:  ldp   x8,  x9, [sp], #16
111     0xa8c12fea,        // 0x0e8:  ldp  x10, x11, [sp], #16
112     0xa8c137ec,        // 0x0ec:  ldp  x12, x13, [sp], #16
113     0xa8c13fee,        // 0x0f0:  ldp  x14, x15, [sp], #16
114     0xa8c153f3,        // 0x0f4:  ldp  x19, x20, [sp], #16
115     0xa8c15bf5,        // 0x0f8:  ldp  x21, x22, [sp], #16
116     0xa8c163f7,        // 0x0fc:  ldp  x23, x24, [sp], #16
117     0xa8c16bf9,        // 0x100:  ldp  x25, x26, [sp], #16
118     0xa8c173fb,        // 0x104:  ldp  x27, x28, [sp], #16
119     0xa8c17bfd,        // 0x108:  ldp  x29, x30, [sp], #16
120     0xd65f0220,        // 0x10c:  ret  x17
121     0x01234567,        // 0x110:  Lreentry_fn_ptr:
122     0xdeadbeef,        // 0x114:      .quad 0
123     0x98765432,        // 0x118:  Lreentry_ctx_ptr:
124     0xcafef00d         // 0x11c:      .quad 0
125   };
126 
127   const unsigned ReentryFnAddrOffset = 0x110;
128   const unsigned ReentryCtxAddrOffset = 0x118;
129 
130   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
131   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
132          sizeof(uint64_t));
133   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
134          sizeof(uint64_t));
135 }
136 
137 void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
138                                   ExecutorAddr TrampolineBlockTargetAddress,
139                                   ExecutorAddr ResolverAddr,
140                                   unsigned NumTrampolines) {
141 
142   unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
143 
144   memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
145          sizeof(uint64_t));
146 
147   // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
148   // subtract 32-bits.
149   OffsetToPtr -= 4;
150 
151   uint32_t *Trampolines =
152       reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
153 
154   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
155     Trampolines[3 * I + 0] = 0xaa1e03f1;                      // mov x17, x30
156     Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr
157     Trampolines[3 * I + 2] = 0xd63f0200;                      // blr x16
158   }
159 }
160 
161 void OrcAArch64::writeIndirectStubsBlock(
162     char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
163     ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
164   // Stub format is:
165   //
166   // .section __orc_stubs
167   // stub1:
168   //                 ldr     x16, ptr1       ; PC-rel load of ptr1
169   //                 br      x16             ; Jump to resolver
170   // stub2:
171   //                 ldr     x16, ptr2       ; PC-rel load of ptr2
172   //                 br      x16             ; Jump to resolver
173   //
174   // ...
175   //
176   // .section __orc_ptrs
177   // ptr1:
178   //                 .quad 0x0
179   // ptr2:
180   //                 .quad 0x0
181   //
182   // ...
183 
184   static_assert(StubSize == PointerSize,
185                 "Pointer and stub size must match for algorithm below");
186   assert(stubAndPointerRangesOk<OrcAArch64>(
187              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
188          "PointersBlock is out of range");
189   uint64_t PtrDisplacement =
190       PointersBlockTargetAddress - StubsBlockTargetAddress;
191   assert((PtrDisplacement % 8 == 0) &&
192          "Displacement to pointer is not a multiple of 8");
193   uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
194   uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5;
195 
196   for (unsigned I = 0; I < NumStubs; ++I)
197     Stub[I] = 0xd61f020058000010 | PtrOffsetField;
198 }
199 
200 void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
201                                       ExecutorAddr TrampolineBlockTargetAddress,
202                                       ExecutorAddr ResolverAddr,
203                                       unsigned NumTrampolines) {
204 
205   unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
206 
207   memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
208          sizeof(uint64_t));
209 
210   uint64_t *Trampolines =
211       reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem);
212   uint64_t CallIndirPCRel = 0xf1c40000000015ff;
213 
214   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
215     Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
216 }
217 
218 void OrcX86_64_Base::writeIndirectStubsBlock(
219     char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
220     ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
221   // Stub format is:
222   //
223   // .section __orc_stubs
224   // stub1:
225   //                 jmpq    *ptr1(%rip)
226   //                 .byte   0xC4         ; <- Invalid opcode padding.
227   //                 .byte   0xF1
228   // stub2:
229   //                 jmpq    *ptr2(%rip)
230   //
231   // ...
232   //
233   // .section __orc_ptrs
234   // ptr1:
235   //                 .quad 0x0
236   // ptr2:
237   //                 .quad 0x0
238   //
239   // ...
240 
241   // Populate the stubs page stubs and mark it executable.
242   static_assert(StubSize == PointerSize,
243                 "Pointer and stub size must match for algorithm below");
244   assert(stubAndPointerRangesOk<OrcX86_64_Base>(
245              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
246          "PointersBlock is out of range");
247   uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
248   uint64_t PtrOffsetField =
249       (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16;
250   for (unsigned I = 0; I < NumStubs; ++I)
251     Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
252 }
253 
254 void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
255                                        ExecutorAddr ResolverTargetAddress,
256                                        ExecutorAddr ReentryFnAddr,
257                                        ExecutorAddr ReentryCtxAddr) {
258 
259   LLVM_DEBUG({
260     dbgs() << "Writing resolver code to "
261            << formatv("{0:x16}", ResolverTargetAddress) << "\n";
262   });
263 
264   const uint8_t ResolverCode[] = {
265       // resolver_entry:
266       0x55,                                     // 0x00: pushq     %rbp
267       0x48, 0x89, 0xe5,                         // 0x01: movq      %rsp, %rbp
268       0x50,                                     // 0x04: pushq     %rax
269       0x53,                                     // 0x05: pushq     %rbx
270       0x51,                                     // 0x06: pushq     %rcx
271       0x52,                                     // 0x07: pushq     %rdx
272       0x56,                                     // 0x08: pushq     %rsi
273       0x57,                                     // 0x09: pushq     %rdi
274       0x41, 0x50,                               // 0x0a: pushq     %r8
275       0x41, 0x51,                               // 0x0c: pushq     %r9
276       0x41, 0x52,                               // 0x0e: pushq     %r10
277       0x41, 0x53,                               // 0x10: pushq     %r11
278       0x41, 0x54,                               // 0x12: pushq     %r12
279       0x41, 0x55,                               // 0x14: pushq     %r13
280       0x41, 0x56,                               // 0x16: pushq     %r14
281       0x41, 0x57,                               // 0x18: pushq     %r15
282       0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq      0x208, %rsp
283       0x48, 0x0f, 0xae, 0x04, 0x24,             // 0x21: fxsave64  (%rsp)
284       0x48, 0xbf,                               // 0x26: movabsq   <CBMgr>, %rdi
285 
286       // 0x28: JIT re-entry ctx addr.
287       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288 
289       0x48, 0x8b, 0x75, 0x08,                   // 0x30: movq      8(%rbp), %rsi
290       0x48, 0x83, 0xee, 0x06,                   // 0x34: subq      $6, %rsi
291       0x48, 0xb8,                               // 0x38: movabsq   <REntry>, %rax
292 
293       // 0x3a: JIT re-entry fn addr:
294       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
295 
296       0xff, 0xd0,                               // 0x42: callq     *%rax
297       0x48, 0x89, 0x45, 0x08,                   // 0x44: movq      %rax, 8(%rbp)
298       0x48, 0x0f, 0xae, 0x0c, 0x24,             // 0x48: fxrstor64 (%rsp)
299       0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq      0x208, %rsp
300       0x41, 0x5f,                               // 0x54: popq      %r15
301       0x41, 0x5e,                               // 0x56: popq      %r14
302       0x41, 0x5d,                               // 0x58: popq      %r13
303       0x41, 0x5c,                               // 0x5a: popq      %r12
304       0x41, 0x5b,                               // 0x5c: popq      %r11
305       0x41, 0x5a,                               // 0x5e: popq      %r10
306       0x41, 0x59,                               // 0x60: popq      %r9
307       0x41, 0x58,                               // 0x62: popq      %r8
308       0x5f,                                     // 0x64: popq      %rdi
309       0x5e,                                     // 0x65: popq      %rsi
310       0x5a,                                     // 0x66: popq      %rdx
311       0x59,                                     // 0x67: popq      %rcx
312       0x5b,                                     // 0x68: popq      %rbx
313       0x58,                                     // 0x69: popq      %rax
314       0x5d,                                     // 0x6a: popq      %rbp
315       0xc3,                                     // 0x6b: retq
316  };
317 
318   const unsigned ReentryFnAddrOffset = 0x3a;
319   const unsigned ReentryCtxAddrOffset = 0x28;
320 
321   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
322   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
323          sizeof(uint64_t));
324   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
325          sizeof(uint64_t));
326 }
327 
328 void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
329                                         ExecutorAddr ResolverTargetAddress,
330                                         ExecutorAddr ReentryFnAddr,
331                                         ExecutorAddr ReentryCtxAddr) {
332 
333   // resolverCode is similar to OrcX86_64 with differences specific to windows
334   // x64 calling convention: arguments go into rcx, rdx and come in reverse
335   // order, shadow space allocation on stack
336   const uint8_t ResolverCode[] = {
337       // resolver_entry:
338       0x55,                                      // 0x00: pushq     %rbp
339       0x48, 0x89, 0xe5,                          // 0x01: movq      %rsp, %rbp
340       0x50,                                      // 0x04: pushq     %rax
341       0x53,                                      // 0x05: pushq     %rbx
342       0x51,                                      // 0x06: pushq     %rcx
343       0x52,                                      // 0x07: pushq     %rdx
344       0x56,                                      // 0x08: pushq     %rsi
345       0x57,                                      // 0x09: pushq     %rdi
346       0x41, 0x50,                                // 0x0a: pushq     %r8
347       0x41, 0x51,                                // 0x0c: pushq     %r9
348       0x41, 0x52,                                // 0x0e: pushq     %r10
349       0x41, 0x53,                                // 0x10: pushq     %r11
350       0x41, 0x54,                                // 0x12: pushq     %r12
351       0x41, 0x55,                                // 0x14: pushq     %r13
352       0x41, 0x56,                                // 0x16: pushq     %r14
353       0x41, 0x57,                                // 0x18: pushq     %r15
354       0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00,  // 0x1a: subq      0x208, %rsp
355       0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64  (%rsp)
356 
357       0x48, 0xb9,                                // 0x26: movabsq   <CBMgr>, %rcx
358       // 0x28: JIT re-entry ctx addr.
359       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
360 
361       0x48, 0x8B, 0x55, 0x08,                    // 0x30: mov       rdx, [rbp+0x8]
362       0x48, 0x83, 0xea, 0x06,                    // 0x34: sub       rdx, 0x6
363 
364       0x48, 0xb8,                                // 0x38: movabsq   <REntry>, %rax
365       // 0x3a: JIT re-entry fn addr:
366       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367 
368       // 0x42: sub       rsp, 0x20 (Allocate shadow space)
369       0x48, 0x83, 0xEC, 0x20,
370       0xff, 0xd0,                                // 0x46: callq     *%rax
371 
372       // 0x48: add       rsp, 0x20 (Free shadow space)
373       0x48, 0x83, 0xC4, 0x20,
374 
375       0x48, 0x89, 0x45, 0x08,                    // 0x4C: movq      %rax, 8(%rbp)
376       0x48, 0x0f, 0xae, 0x0c, 0x24,              // 0x50: fxrstor64 (%rsp)
377       0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00,  // 0x55: addq      0x208, %rsp
378       0x41, 0x5f,                                // 0x5C: popq      %r15
379       0x41, 0x5e,                                // 0x5E: popq      %r14
380       0x41, 0x5d,                                // 0x60: popq      %r13
381       0x41, 0x5c,                                // 0x62: popq      %r12
382       0x41, 0x5b,                                // 0x64: popq      %r11
383       0x41, 0x5a,                                // 0x66: popq      %r10
384       0x41, 0x59,                                // 0x68: popq      %r9
385       0x41, 0x58,                                // 0x6a: popq      %r8
386       0x5f,                                      // 0x6c: popq      %rdi
387       0x5e,                                      // 0x6d: popq      %rsi
388       0x5a,                                      // 0x6e: popq      %rdx
389       0x59,                                      // 0x6f: popq      %rcx
390       0x5b,                                      // 0x70: popq      %rbx
391       0x58,                                      // 0x71: popq      %rax
392       0x5d,                                      // 0x72: popq      %rbp
393       0xc3,                                      // 0x73: retq
394   };
395 
396   const unsigned ReentryFnAddrOffset = 0x3a;
397   const unsigned ReentryCtxAddrOffset = 0x28;
398 
399   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
400   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
401          sizeof(uint64_t));
402   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
403          sizeof(uint64_t));
404 }
405 
406 void OrcI386::writeResolverCode(char *ResolverWorkingMem,
407                                 ExecutorAddr ResolverTargetAddress,
408                                 ExecutorAddr ReentryFnAddr,
409                                 ExecutorAddr ReentryCtxAddr) {
410 
411   assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range");
412   assert((ReentryCtxAddr.getValue() >> 32) == 0 &&
413          "ReentryCtxAddr out of range");
414 
415   const uint8_t ResolverCode[] = {
416       // resolver_entry:
417       0x55,                               // 0x00: pushl    %ebp
418       0x89, 0xe5,                         // 0x01: movl     %esp, %ebp
419       0x54,                               // 0x03: pushl    %esp
420       0x83, 0xe4, 0xf0,                   // 0x04: andl     $-0x10, %esp
421       0x50,                               // 0x07: pushl    %eax
422       0x53,                               // 0x08: pushl    %ebx
423       0x51,                               // 0x09: pushl    %ecx
424       0x52,                               // 0x0a: pushl    %edx
425       0x56,                               // 0x0b: pushl    %esi
426       0x57,                               // 0x0c: pushl    %edi
427       0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl     $0x218, %esp
428       0x0f, 0xae, 0x44, 0x24, 0x10,       // 0x13: fxsave   0x10(%esp)
429       0x8b, 0x75, 0x04,                   // 0x18: movl     0x4(%ebp), %esi
430       0x83, 0xee, 0x05,                   // 0x1b: subl     $0x5, %esi
431       0x89, 0x74, 0x24, 0x04,             // 0x1e: movl     %esi, 0x4(%esp)
432       0xc7, 0x04, 0x24, 0x00, 0x00, 0x00,
433       0x00,                               // 0x22: movl     <cbmgr>, (%esp)
434       0xb8, 0x00, 0x00, 0x00, 0x00,       // 0x29: movl     <reentry>, %eax
435       0xff, 0xd0,                         // 0x2e: calll    *%eax
436       0x89, 0x45, 0x04,                   // 0x30: movl     %eax, 0x4(%ebp)
437       0x0f, 0xae, 0x4c, 0x24, 0x10,       // 0x33: fxrstor  0x10(%esp)
438       0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl     $0x218, %esp
439       0x5f,                               // 0x3e: popl     %edi
440       0x5e,                               // 0x3f: popl     %esi
441       0x5a,                               // 0x40: popl     %edx
442       0x59,                               // 0x41: popl     %ecx
443       0x5b,                               // 0x42: popl     %ebx
444       0x58,                               // 0x43: popl     %eax
445       0x8b, 0x65, 0xfc,                   // 0x44: movl     -0x4(%ebp), %esp
446       0x5d,                               // 0x48: popl     %ebp
447       0xc3                                // 0x49: retl
448   };
449 
450   const unsigned ReentryFnAddrOffset = 0x2a;
451   const unsigned ReentryCtxAddrOffset = 0x25;
452 
453   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
454   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
455          sizeof(uint32_t));
456   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
457          sizeof(uint32_t));
458 }
459 
460 void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
461                                ExecutorAddr TrampolineBlockTargetAddress,
462                                ExecutorAddr ResolverAddr,
463                                unsigned NumTrampolines) {
464   assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
465 
466   uint64_t CallRelImm = 0xF1C4C400000000e8;
467   uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5;
468 
469   uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem);
470   for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize)
471     Trampolines[I] = CallRelImm | (ResolverRel << 8);
472 }
473 
474 void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
475                                       ExecutorAddr StubsBlockTargetAddress,
476                                       ExecutorAddr PointersBlockTargetAddress,
477                                       unsigned NumStubs) {
478   assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
479          "StubsBlockTargetAddress is out of range");
480   assert((PointersBlockTargetAddress.getValue() >> 32) == 0 &&
481          "PointersBlockTargetAddress is out of range");
482 
483   // Stub format is:
484   //
485   // .section __orc_stubs
486   // stub1:
487   //                 jmpq    *ptr1
488   //                 .byte   0xC4         ; <- Invalid opcode padding.
489   //                 .byte   0xF1
490   // stub2:
491   //                 jmpq    *ptr2
492   //
493   // ...
494   //
495   // .section __orc_ptrs
496   // ptr1:
497   //                 .quad 0x0
498   // ptr2:
499   //                 .quad 0x0
500   //
501   // ...
502 
503   assert(stubAndPointerRangesOk<OrcI386>(
504              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
505          "PointersBlock is out of range");
506 
507   uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
508   uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
509   for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4)
510     Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16);
511 }
512 
513 void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
514                                        ExecutorAddr ResolverTargetAddress,
515                                        ExecutorAddr ReentryFnAddr,
516                                        ExecutorAddr ReentryCtxAddr,
517                                        bool isBigEndian) {
518 
519   const uint32_t ResolverCode[] = {
520       // resolver_entry:
521       0x27bdff98,                    // 0x00: addiu $sp,$sp,-104
522       0xafa20000,                    // 0x04: sw $v0,0($sp)
523       0xafa30004,                    // 0x08: sw $v1,4($sp)
524       0xafa40008,                    // 0x0c: sw $a0,8($sp)
525       0xafa5000c,                    // 0x10: sw $a1,12($sp)
526       0xafa60010,                    // 0x14: sw $a2,16($sp)
527       0xafa70014,                    // 0x18: sw $a3,20($sp)
528       0xafb00018,                    // 0x1c: sw $s0,24($sp)
529       0xafb1001c,                    // 0x20: sw $s1,28($sp)
530       0xafb20020,                    // 0x24: sw $s2,32($sp)
531       0xafb30024,                    // 0x28: sw $s3,36($sp)
532       0xafb40028,                    // 0x2c: sw $s4,40($sp)
533       0xafb5002c,                    // 0x30: sw $s5,44($sp)
534       0xafb60030,                    // 0x34: sw $s6,48($sp)
535       0xafb70034,                    // 0x38: sw $s7,52($sp)
536       0xafa80038,                    // 0x3c: sw $t0,56($sp)
537       0xafa9003c,                    // 0x40: sw $t1,60($sp)
538       0xafaa0040,                    // 0x44: sw $t2,64($sp)
539       0xafab0044,                    // 0x48: sw $t3,68($sp)
540       0xafac0048,                    // 0x4c: sw $t4,72($sp)
541       0xafad004c,                    // 0x50: sw $t5,76($sp)
542       0xafae0050,                    // 0x54: sw $t6,80($sp)
543       0xafaf0054,                    // 0x58: sw $t7,84($sp)
544       0xafb80058,                    // 0x5c: sw $t8,88($sp)
545       0xafb9005c,                    // 0x60: sw $t9,92($sp)
546       0xafbe0060,                    // 0x64: sw $fp,96($sp)
547       0xafbf0064,                    // 0x68: sw $ra,100($sp)
548 
549       // JIT re-entry ctx addr.
550       0x00000000,                    // 0x6c: lui $a0,ctx
551       0x00000000,                    // 0x70: addiu $a0,$a0,ctx
552 
553       0x03e02825,                    // 0x74: move $a1, $ra
554       0x24a5ffec,                    // 0x78: addiu $a1,$a1,-20
555 
556       // JIT re-entry fn addr:
557       0x00000000,                    // 0x7c: lui $t9,reentry
558       0x00000000,                    // 0x80: addiu $t9,$t9,reentry
559 
560       0x0320f809,                    // 0x84: jalr $t9
561       0x00000000,                    // 0x88: nop
562       0x8fbf0064,                    // 0x8c: lw $ra,100($sp)
563       0x8fbe0060,                    // 0x90: lw $fp,96($sp)
564       0x8fb9005c,                    // 0x94: lw $t9,92($sp)
565       0x8fb80058,                    // 0x98: lw $t8,88($sp)
566       0x8faf0054,                    // 0x9c: lw $t7,84($sp)
567       0x8fae0050,                    // 0xa0: lw $t6,80($sp)
568       0x8fad004c,                    // 0xa4: lw $t5,76($sp)
569       0x8fac0048,                    // 0xa8: lw $t4,72($sp)
570       0x8fab0044,                    // 0xac: lw $t3,68($sp)
571       0x8faa0040,                    // 0xb0: lw $t2,64($sp)
572       0x8fa9003c,                    // 0xb4: lw $t1,60($sp)
573       0x8fa80038,                    // 0xb8: lw $t0,56($sp)
574       0x8fb70034,                    // 0xbc: lw $s7,52($sp)
575       0x8fb60030,                    // 0xc0: lw $s6,48($sp)
576       0x8fb5002c,                    // 0xc4: lw $s5,44($sp)
577       0x8fb40028,                    // 0xc8: lw $s4,40($sp)
578       0x8fb30024,                    // 0xcc: lw $s3,36($sp)
579       0x8fb20020,                    // 0xd0: lw $s2,32($sp)
580       0x8fb1001c,                    // 0xd4: lw $s1,28($sp)
581       0x8fb00018,                    // 0xd8: lw $s0,24($sp)
582       0x8fa70014,                    // 0xdc: lw $a3,20($sp)
583       0x8fa60010,                    // 0xe0: lw $a2,16($sp)
584       0x8fa5000c,                    // 0xe4: lw $a1,12($sp)
585       0x8fa40008,                    // 0xe8: lw $a0,8($sp)
586       0x27bd0068,                    // 0xec: addiu $sp,$sp,104
587       0x0300f825,                    // 0xf0: move $ra, $t8
588       0x03200008,                    // 0xf4: jr $t9
589       0x00000000,                    // 0xf8: move $t9, $v0/v1
590   };
591 
592   const unsigned ReentryFnAddrOffset = 0x7c;   // JIT re-entry fn addr lui
593   const unsigned ReentryCtxAddrOffset = 0x6c;  // JIT re-entry context addr lui
594   const unsigned Offsett = 0xf8;
595 
596   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
597 
598   // Depending on endian return value will be in v0 or v1.
599   uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
600   memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
601 
602   uint32_t ReentryCtxLUi =
603       0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
604   uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
605   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
606          sizeof(ReentryCtxLUi));
607   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu,
608          sizeof(ReentryCtxADDiu));
609 
610   uint32_t ReentryFnLUi =
611       0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
612   uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF);
613   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
614          sizeof(ReentryFnLUi));
615   memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu,
616          sizeof(ReentryFnADDiu));
617 }
618 
619 void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
620                                       ExecutorAddr TrampolineBlockTargetAddress,
621                                       ExecutorAddr ResolverAddr,
622                                       unsigned NumTrampolines) {
623 
624   assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
625 
626   uint32_t *Trampolines =
627       reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
628   uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
629 
630   for (unsigned I = 0; I < NumTrampolines; ++I) {
631     // move $t8,$ra
632     // lui $t9,ResolverAddr
633     // addiu $t9,$t9,ResolverAddr
634     // jalr $t9
635     // nop
636     Trampolines[5 * I + 0] = 0x03e0c025;
637     Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);
638     Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF);
639     Trampolines[5 * I + 3] = 0x0320f809;
640     Trampolines[5 * I + 4] = 0x00000000;
641   }
642 }
643 
644 void OrcMips32_Base::writeIndirectStubsBlock(
645     char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
646     ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
647   assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
648          "InitialPtrVal is out of range");
649 
650   // Stub format is:
651   //
652   // .section __orc_stubs
653   // stub1:
654   //                 lui $t9, ptr1
655   //                 lw $t9, %lo(ptr1)($t9)
656   //                 jr $t9
657   // stub2:
658   //                 lui $t9, ptr2
659   //                 lw $t9,%lo(ptr1)($t9)
660   //                 jr $t9
661   //
662   // ...
663   //
664   // .section __orc_ptrs
665   // ptr1:
666   //                 .word 0x0
667   // ptr2:
668   //                 .word 0x0
669   //
670   // i..
671 
672   assert(stubAndPointerRangesOk<OrcMips32_Base>(
673              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
674          "PointersBlock is out of range");
675 
676   // Populate the stubs page stubs and mark it executable.
677   uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
678   uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
679 
680   for (unsigned I = 0; I < NumStubs; ++I) {
681     uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
682     Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF);  // lui $t9,ptr1
683     Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9)
684     Stub[4 * I + 2] = 0x03200008;                      // jr $t9
685     Stub[4 * I + 3] = 0x00000000;                      // nop
686     PtrAddr += 4;
687   }
688 }
689 
690 void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
691                                   ExecutorAddr ResolverTargetAddress,
692                                   ExecutorAddr ReentryFnAddr,
693                                   ExecutorAddr ReentryCtxAddr) {
694 
695   const uint32_t ResolverCode[] = {
696        //resolver_entry:
697       0x67bdff30,                     // 0x00: daddiu $sp,$sp,-208
698       0xffa20000,                     // 0x04: sd v0,0(sp)
699       0xffa30008,                     // 0x08: sd v1,8(sp)
700       0xffa40010,                     // 0x0c: sd a0,16(sp)
701       0xffa50018,                     // 0x10: sd a1,24(sp)
702       0xffa60020,                     // 0x14: sd a2,32(sp)
703       0xffa70028,                     // 0x18: sd a3,40(sp)
704       0xffa80030,                     // 0x1c: sd a4,48(sp)
705       0xffa90038,                     // 0x20: sd a5,56(sp)
706       0xffaa0040,                     // 0x24: sd a6,64(sp)
707       0xffab0048,                     // 0x28: sd a7,72(sp)
708       0xffac0050,                     // 0x2c: sd t0,80(sp)
709       0xffad0058,                     // 0x30: sd t1,88(sp)
710       0xffae0060,                     // 0x34: sd t2,96(sp)
711       0xffaf0068,                     // 0x38: sd t3,104(sp)
712       0xffb00070,                     // 0x3c: sd s0,112(sp)
713       0xffb10078,                     // 0x40: sd s1,120(sp)
714       0xffb20080,                     // 0x44: sd s2,128(sp)
715       0xffb30088,                     // 0x48: sd s3,136(sp)
716       0xffb40090,                     // 0x4c: sd s4,144(sp)
717       0xffb50098,                     // 0x50: sd s5,152(sp)
718       0xffb600a0,                     // 0x54: sd s6,160(sp)
719       0xffb700a8,                     // 0x58: sd s7,168(sp)
720       0xffb800b0,                     // 0x5c: sd t8,176(sp)
721       0xffb900b8,                     // 0x60: sd t9,184(sp)
722       0xffbe00c0,                     // 0x64: sd fp,192(sp)
723       0xffbf00c8,                     // 0x68: sd ra,200(sp)
724 
725       // JIT re-entry ctx addr.
726       0x00000000,                     // 0x6c: lui $a0,heighest(ctx)
727       0x00000000,                     // 0x70: daddiu $a0,$a0,heigher(ctx)
728       0x00000000,                     // 0x74: dsll $a0,$a0,16
729       0x00000000,                     // 0x78: daddiu $a0,$a0,hi(ctx)
730       0x00000000,                     // 0x7c: dsll $a0,$a0,16
731       0x00000000,                     // 0x80: daddiu $a0,$a0,lo(ctx)
732 
733       0x03e02825,                     // 0x84: move $a1, $ra
734       0x64a5ffdc,                     // 0x88: daddiu $a1,$a1,-36
735 
736       // JIT re-entry fn addr:
737       0x00000000,                     // 0x8c: lui $t9,reentry
738       0x00000000,                     // 0x90: daddiu $t9,$t9,reentry
739       0x00000000,                     // 0x94: dsll $t9,$t9,
740       0x00000000,                     // 0x98: daddiu $t9,$t9,
741       0x00000000,                     // 0x9c: dsll $t9,$t9,
742       0x00000000,                     // 0xa0: daddiu $t9,$t9,
743       0x0320f809,                     // 0xa4: jalr $t9
744       0x00000000,                     // 0xa8: nop
745       0xdfbf00c8,                     // 0xac: ld ra, 200(sp)
746       0xdfbe00c0,                     // 0xb0: ld fp, 192(sp)
747       0xdfb900b8,                     // 0xb4: ld t9, 184(sp)
748       0xdfb800b0,                     // 0xb8: ld t8, 176(sp)
749       0xdfb700a8,                     // 0xbc: ld s7, 168(sp)
750       0xdfb600a0,                     // 0xc0: ld s6, 160(sp)
751       0xdfb50098,                     // 0xc4: ld s5, 152(sp)
752       0xdfb40090,                     // 0xc8: ld s4, 144(sp)
753       0xdfb30088,                     // 0xcc: ld s3, 136(sp)
754       0xdfb20080,                     // 0xd0: ld s2, 128(sp)
755       0xdfb10078,                     // 0xd4: ld s1, 120(sp)
756       0xdfb00070,                     // 0xd8: ld s0, 112(sp)
757       0xdfaf0068,                     // 0xdc: ld t3, 104(sp)
758       0xdfae0060,                     // 0xe0: ld t2, 96(sp)
759       0xdfad0058,                     // 0xe4: ld t1, 88(sp)
760       0xdfac0050,                     // 0xe8: ld t0, 80(sp)
761       0xdfab0048,                     // 0xec: ld a7, 72(sp)
762       0xdfaa0040,                     // 0xf0: ld a6, 64(sp)
763       0xdfa90038,                     // 0xf4: ld a5, 56(sp)
764       0xdfa80030,                     // 0xf8: ld a4, 48(sp)
765       0xdfa70028,                     // 0xfc: ld a3, 40(sp)
766       0xdfa60020,                     // 0x100: ld a2, 32(sp)
767       0xdfa50018,                     // 0x104: ld a1, 24(sp)
768       0xdfa40010,                     // 0x108: ld a0, 16(sp)
769       0xdfa30008,                     // 0x10c: ld v1, 8(sp)
770       0x67bd00d0,                     // 0x110: daddiu $sp,$sp,208
771       0x0300f825,                     // 0x114: move $ra, $t8
772       0x03200008,                     // 0x118: jr $t9
773       0x0040c825,                     // 0x11c: move $t9, $v0
774   };
775 
776   const unsigned ReentryFnAddrOffset = 0x8c;   // JIT re-entry fn addr lui
777   const unsigned ReentryCtxAddrOffset = 0x6c;  // JIT re-entry ctx addr lui
778 
779   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
780 
781   uint32_t ReentryCtxLUi =
782       0x3c040000 |
783       (((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
784   uint32_t ReentryCtxDADDiu =
785       0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
786   uint32_t ReentryCtxDSLL = 0x00042438;
787   uint32_t ReentryCtxDADDiu2 =
788       0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF));
789   uint32_t ReentryCtxDSLL2 = 0x00042438;
790   uint32_t ReentryCtxDADDiu3 =
791       0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
792 
793   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
794          sizeof(ReentryCtxLUi));
795   memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu,
796          sizeof(ReentryCtxDADDiu));
797   memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL,
798          sizeof(ReentryCtxDSLL));
799   memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2,
800          sizeof(ReentryCtxDADDiu2));
801   memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2,
802          sizeof(ReentryCtxDSLL2));
803   memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3,
804          sizeof(ReentryCtxDADDiu3));
805 
806   uint32_t ReentryFnLUi =
807       0x3c190000 |
808       (((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
809 
810   uint32_t ReentryFnDADDiu =
811       0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
812 
813   uint32_t ReentryFnDSLL = 0x0019cc38;
814 
815   uint32_t ReentryFnDADDiu2 =
816       0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
817 
818   uint32_t ReentryFnDSLL2 = 0x0019cc38;
819 
820   uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF);
821 
822   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
823          sizeof(ReentryFnLUi));
824   memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu,
825          sizeof(ReentryFnDADDiu));
826   memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL,
827          sizeof(ReentryFnDSLL));
828   memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2,
829          sizeof(ReentryFnDADDiu2));
830   memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2,
831          sizeof(ReentryFnDSLL2));
832   memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3,
833          sizeof(ReentryFnDADDiu3));
834 }
835 
836 void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
837                                  ExecutorAddr TrampolineBlockTargetAddress,
838                                  ExecutorAddr ResolverAddr,
839                                  unsigned NumTrampolines) {
840 
841   uint32_t *Trampolines =
842       reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
843 
844   uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48);
845   uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32);
846   uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
847 
848   for (unsigned I = 0; I < NumTrampolines; ++I) {
849     Trampolines[10 * I + 0] = 0x03e0c025;                            // move $t8,$ra
850     Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF);  // lui $t9,resolveAddr
851     Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF);   // daddiu $t9,$t9,%higher(resolveAddr)
852     Trampolines[10 * I + 3] = 0x0019cc38;                            // dsll $t9,$t9,16
853     Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF);        // daddiu $t9,$t9,%hi(ptr)
854     Trampolines[10 * I + 5] = 0x0019cc38;                            // dsll $t9,$t9,16
855     Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() &
856                                             0xFFFF); // daddiu $t9,$t9,%lo(ptr)
857     Trampolines[10 * I + 7] = 0x0320f809;                            // jalr $t9
858     Trampolines[10 * I + 8] = 0x00000000;                            // nop
859     Trampolines[10 * I + 9] = 0x00000000;                            // nop
860   }
861 }
862 
863 void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
864                                         ExecutorAddr StubsBlockTargetAddress,
865                                         ExecutorAddr PointersBlockTargetAddress,
866                                         unsigned NumStubs) {
867   // Stub format is:
868   //
869   // .section __orc_stubs
870   // stub1:
871   //                 lui $t9,ptr1
872   //                 dsll $t9,$t9,16
873   //                 daddiu $t9,$t9,%hi(ptr)
874   //                 dsll $t9,$t9,16
875   //                 ld $t9,%lo(ptr)
876   //                 jr $t9
877   // stub2:
878   //                 lui $t9,ptr1
879   //                 dsll $t9,$t9,16
880   //                 daddiu $t9,$t9,%hi(ptr)
881   //                 dsll $t9,$t9,16
882   //                 ld $t9,%lo(ptr)
883   //                 jr $t9
884   //
885   // ...
886   //
887   // .section __orc_ptrs
888   // ptr1:
889   //                 .dword 0x0
890   // ptr2:
891   //                 .dword 0x0
892   //
893   // ...
894 
895   assert(stubAndPointerRangesOk<OrcMips64>(
896              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
897          "PointersBlock is out of range");
898 
899   // Populate the stubs page stubs and mark it executable.
900   uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
901   uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
902 
903   for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
904     uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
905     uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32);
906     uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16);
907     Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF);  // lui $t9,ptr1
908     Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF);   // daddiu $t9,$t9,%higher(ptr)
909     Stub[8 * I + 2] = 0x0019cc38;                            // dsll $t9,$t9,16
910     Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF);        // daddiu $t9,$t9,%hi(ptr)
911     Stub[8 * I + 4] = 0x0019cc38;                            // dsll $t9,$t9,16
912     Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF);       // ld $t9,%lo(ptr)
913     Stub[8 * I + 6] = 0x03200008;                            // jr $t9
914     Stub[8 * I + 7] = 0x00000000;                            // nop
915   }
916 }
917 
918 void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
919                                    ExecutorAddr ResolverTargetAddress,
920                                    ExecutorAddr ReentryFnAddr,
921                                    ExecutorAddr ReentryCtxAddr) {
922 
923   const uint32_t ResolverCode[] = {
924       0xef810113, // 0x00: addi sp,sp,-264
925       0x00813023, // 0x04: sd s0,0(sp)
926       0x00913423, // 0x08: sd s1,8(sp)
927       0x01213823, // 0x0c: sd s2,16(sp)
928       0x01313c23, // 0x10: sd s3,24(sp)
929       0x03413023, // 0x14: sd s4,32(sp)
930       0x03513423, // 0x18: sd s5,40(sp)
931       0x03613823, // 0x1c: sd s6,48(sp)
932       0x03713c23, // 0x20: sd s7,56(sp)
933       0x05813023, // 0x24: sd s8,64(sp)
934       0x05913423, // 0x28: sd s9,72(sp)
935       0x05a13823, // 0x2c: sd s10,80(sp)
936       0x05b13c23, // 0x30: sd s11,88(sp)
937       0x06113023, // 0x34: sd ra,96(sp)
938       0x06a13423, // 0x38: sd a0,104(sp)
939       0x06b13823, // 0x3c: sd a1,112(sp)
940       0x06c13c23, // 0x40: sd a2,120(sp)
941       0x08d13023, // 0x44: sd a3,128(sp)
942       0x08e13423, // 0x48: sd a4,136(sp)
943       0x08f13823, // 0x4c: sd a5,144(sp)
944       0x09013c23, // 0x50: sd a6,152(sp)
945       0x0b113023, // 0x54: sd a7,160(sp)
946       0x0a813427, // 0x58: fsd fs0,168(sp)
947       0x0a913827, // 0x5c: fsd fs1,176(sp)
948       0x0b213c27, // 0x60: fsd fs2,184(sp)
949       0x0d313027, // 0x64: fsd fs3,192(sp)
950       0x0d413427, // 0x68: fsd fs4,200(sp)
951       0x0d513827, // 0x6c: fsd fs5,208(sp)
952       0x0d613c27, // 0x70: fsd fs6,216(sp)
953       0x0f713027, // 0x74: fsd fs7,224(sp)
954       0x0f813427, // 0x78: fsd fs8,232(sp)
955       0x0f913827, // 0x7c: fsd fs9,240(sp)
956       0x0fa13c27, // 0x80: fsd fs10,248(sp)
957       0x11b13027, // 0x84: fsd fs11,256(sp)
958       0x00000517, // 0x88: auipc a0,0x0
959       0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
960       0x00030593, // 0x90: mv a1,t1
961       0xff458593, // 0x94: addi a1,a1,-12
962       0x00000617, // 0x98: auipc a2,0x0
963       0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
964       0x000600e7, // 0xa0: jalr a2
965       0x00050293, // 0xa4: mv t0,a0
966       0x00013403, // 0xa8: ld s0,0(sp)
967       0x00813483, // 0xac: ld s1,8(sp)
968       0x01013903, // 0xb0: ld s2,16(sp)
969       0x01813983, // 0xb4: ld s3,24(sp)
970       0x02013a03, // 0xb8: ld s4,32(sp)
971       0x02813a83, // 0xbc: ld s5,40(sp)
972       0x03013b03, // 0xc0: ld s6,48(sp)
973       0x03813b83, // 0xc4: ld s7,56(sp)
974       0x04013c03, // 0xc8: ld s8,64(sp)
975       0x04813c83, // 0xcc: ld s9,72(sp)
976       0x05013d03, // 0xd0: ld s10,80(sp)
977       0x05813d83, // 0xd4: ld s11,88(sp)
978       0x06013083, // 0xd8: ld ra,96(sp)
979       0x06813503, // 0xdc: ld a0,104(sp)
980       0x07013583, // 0xe0: ld a1,112(sp)
981       0x07813603, // 0xe4: ld a2,120(sp)
982       0x08013683, // 0xe8: ld a3,128(sp)
983       0x08813703, // 0xec: ld a4,136(sp)
984       0x09013783, // 0xf0: ld a5,144(sp)
985       0x09813803, // 0xf4: ld a6,152(sp)
986       0x0a013883, // 0xf8: ld a7,160(sp)
987       0x0a813407, // 0xfc: fld fs0,168(sp)
988       0x0b013487, // 0x100: fld fs1,176(sp)
989       0x0b813907, // 0x104: fld fs2,184(sp)
990       0x0c013987, // 0x108: fld fs3,192(sp)
991       0x0c813a07, // 0x10c: fld fs4,200(sp)
992       0x0d013a87, // 0x110: fld fs5,208(sp)
993       0x0d813b07, // 0x114: fld fs6,216(sp)
994       0x0e013b87, // 0x118: fld fs7,224(sp)
995       0x0e813c07, // 0x11c: fld fs8,232(sp)
996       0x0f013c87, // 0x120: fld fs9,240(sp)
997       0x0f813d07, // 0x124: fld fs10,248(sp)
998       0x10013d87, // 0x128: fld fs11,256(sp)
999       0x10810113, // 0x12c: addi sp,sp,264
1000       0x00028067, // 0x130: jr t0
1001       0x12345678, // 0x134: padding to align at 8 byte
1002       0x12345678, // 0x138: Lreentry_ctx_ptr:
1003       0xdeadbeef, // 0x13c:      .quad 0
1004       0x98765432, // 0x140: Lreentry_fn_ptr:
1005       0xcafef00d  // 0x144:      .quad 0
1006   };
1007 
1008   const unsigned ReentryCtxAddrOffset = 0x138;
1009   const unsigned ReentryFnAddrOffset = 0x140;
1010 
1011   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1012   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1013          sizeof(uint64_t));
1014   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1015          sizeof(uint64_t));
1016 }
1017 
1018 void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
1019                                   ExecutorAddr TrampolineBlockTargetAddress,
1020                                   ExecutorAddr ResolverAddr,
1021                                   unsigned NumTrampolines) {
1022 
1023   unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1024 
1025   memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1026          sizeof(uint64_t));
1027 
1028   uint32_t *Trampolines =
1029       reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1030   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1031     uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
1032     uint32_t Lo12 = OffsetToPtr - Hi20;
1033     Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
1034     Trampolines[4 * I + 1] =
1035         0x0002b283 | ((Lo12 & 0xFFF) << 20);    // ld t0, %lo(Lptr)
1036     Trampolines[4 * I + 2] = 0x00028367;        // jalr t1, t0
1037     Trampolines[4 * I + 3] = 0xdeadface;        // padding
1038   }
1039 }
1040 
1041 void OrcRiscv64::writeIndirectStubsBlock(
1042     char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1043     ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1044   // Stub format is:
1045   //
1046   // .section __orc_stubs
1047   // stub1:
1048   //                 auipc   t0, %hi(ptr1)  ; PC-rel load of ptr1
1049   //                 ld      t0, %lo(t0)
1050   //                 jr      t0             ; Jump to resolver
1051   //                 .quad 0                ; Pad to 16 bytes
1052   // stub2:
1053   //                 auipc   t0, %hi(ptr1)  ; PC-rel load of ptr1
1054   //                 ld      t0, %lo(t0)
1055   //                 jr      t0             ; Jump to resolver
1056   //                 .quad 0
1057   //
1058   // ...
1059   //
1060   // .section __orc_ptrs
1061   // ptr1:
1062   //                 .quad 0x0
1063   // ptr2:
1064   //                 .quad 0x0
1065   //
1066   // ...
1067 
1068   assert(stubAndPointerRangesOk<OrcRiscv64>(
1069              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1070          "PointersBlock is out of range");
1071 
1072   uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1073 
1074   for (unsigned I = 0; I < NumStubs; ++I) {
1075     uint64_t PtrDisplacement =
1076         PointersBlockTargetAddress - StubsBlockTargetAddress;
1077     uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
1078     uint32_t Lo12 = PtrDisplacement - Hi20;
1079     Stub[4 * I + 0] = 0x00000297 | Hi20;                   // auipc t0, %hi(Lptr)
1080     Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
1081     Stub[4 * I + 2] = 0x00028067;                          // jr t0
1082     Stub[4 * I + 3] = 0xfeedbeef;                          // padding
1083     PointersBlockTargetAddress += PointerSize;
1084     StubsBlockTargetAddress += StubSize;
1085   }
1086 }
1087 
1088 void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
1089                                        ExecutorAddr ResolverTargetAddress,
1090                                        ExecutorAddr ReentryFnAddr,
1091                                        ExecutorAddr ReentryCtxAddr) {
1092 
1093   LLVM_DEBUG({
1094     dbgs() << "Writing resolver code to "
1095            << formatv("{0:x16}", ResolverTargetAddress) << "\n";
1096   });
1097 
1098   const uint32_t ResolverCode[] = {
1099       0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78)
1100       0x29c00061, // 0x4: st.d $ra, $sp, 0
1101       0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8)
1102       0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10)
1103       0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18)
1104       0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20)
1105       0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28)
1106       0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30)
1107       0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38)
1108       0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40)
1109       0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48)
1110       0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50)
1111       0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58)
1112       0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60)
1113       0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68)
1114       0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70)
1115       0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78)
1116       0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80)
1117       0x1c000004, // 0x48: pcaddu12i $a0, 0
1118       0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70)
1119       0x001501a5, // 0x50: move $a1, $t1
1120       0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4)
1121       0x1c000006, // 0x58: pcaddu12i $a2, 0
1122       0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68)
1123       0x4c0000c1, // 0x60: jirl $ra, $a2, 0
1124       0x0015008c, // 0x64: move $t0, $a0
1125       0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80)
1126       0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78)
1127       0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70)
1128       0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68)
1129       0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60)
1130       0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58)
1131       0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50)
1132       0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48)
1133       0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40)
1134       0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38)
1135       0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30)
1136       0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28)
1137       0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20)
1138       0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18)
1139       0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10)
1140       0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8)
1141       0x28c00061, // 0xa8: ld.d $ra, $sp, 0
1142       0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88)
1143       0x4c000180, // 0xb0: jr $t0
1144       0x00000000, // 0xb4: padding to align at 8 bytes
1145       0x01234567, // 0xb8: Lreentry_ctx_ptr:
1146       0xdeedbeef, // 0xbc:      .dword 0
1147       0x98765432, // 0xc0: Lreentry_fn_ptr:
1148       0xcafef00d, // 0xc4:      .dword 0
1149   };
1150 
1151   const unsigned ReentryCtxAddrOffset = 0xb8;
1152   const unsigned ReentryFnAddrOffset = 0xc0;
1153 
1154   memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1155   memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1156          sizeof(uint64_t));
1157   memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1158          sizeof(uint64_t));
1159 }
1160 
1161 void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
1162                                       ExecutorAddr TrampolineBlockTargetAddress,
1163                                       ExecutorAddr ResolverAddr,
1164                                       unsigned NumTrampolines) {
1165 
1166   LLVM_DEBUG({
1167     dbgs() << "Writing trampoline code to "
1168            << formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n";
1169   });
1170 
1171   unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1172 
1173   memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1174          sizeof(uint64_t));
1175 
1176   uint32_t *Trampolines =
1177       reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1178   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1179     uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000;
1180     uint32_t Lo12 = OffsetToPtr - Hi20;
1181     Trampolines[4 * I + 0] =
1182         0x1c00000c |
1183         (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1184     Trampolines[4 * I + 1] =
1185         0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1186     Trampolines[4 * I + 2] = 0x4c00018d;     // jirl $t1, $t0, 0
1187     Trampolines[4 * I + 3] = 0x0;            // padding
1188   }
1189 }
1190 
1191 void OrcLoongArch64::writeIndirectStubsBlock(
1192     char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1193     ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1194   // Stub format is:
1195   //
1196   // .section __orc_stubs
1197   // stub1:
1198   //        pcaddu12i $t0, %pc_hi20(ptr1)      ; PC-rel load of ptr1
1199   //        ld.d      $t0, $t0, %pc_lo12(ptr1)
1200   //        jr        $t0                      ; Jump to resolver
1201   //        .dword    0                        ; Pad to 16 bytes
1202   // stub2:
1203   //        pcaddu12i $t0, %pc_hi20(ptr2)      ; PC-rel load of ptr2
1204   //        ld.d      $t0, $t0, %pc_lo12(ptr2)
1205   //        jr        $t0                      ; Jump to resolver
1206   //        .dword    0                        ; Pad to 16 bytes
1207   // ...
1208   //
1209   // .section __orc_ptrs
1210   // ptr1:
1211   //        .dword 0x0
1212   // ptr2:
1213   //        .dword 0x0
1214   // ...
1215   LLVM_DEBUG({
1216     dbgs() << "Writing stubs code to "
1217            << formatv("{0:x16}", StubsBlockTargetAddress) << "\n";
1218   });
1219   assert(stubAndPointerRangesOk<OrcLoongArch64>(
1220              StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1221          "PointersBlock is out of range");
1222 
1223   uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1224 
1225   for (unsigned I = 0; I < NumStubs; ++I) {
1226     uint64_t PtrDisplacement =
1227         PointersBlockTargetAddress - StubsBlockTargetAddress;
1228     uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000;
1229     uint32_t Lo12 = PtrDisplacement - Hi20;
1230     Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff)
1231                                     << 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1232     Stub[4 * I + 1] =
1233         0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1234     Stub[4 * I + 2] = 0x4c000180;            // jr $t0
1235     Stub[4 * I + 3] = 0x0;                   // padding
1236     PointersBlockTargetAddress += PointerSize;
1237     StubsBlockTargetAddress += StubSize;
1238   }
1239 }
1240 
1241 } // End namespace orc.
1242 } // End namespace llvm.
1243