xref: /freebsd/contrib/llvm-project/lld/MachO/Arch/ARM64.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===- ARM64.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Arch/ARM64Common.h"
10 #include "InputFiles.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
14 
15 #include "lld/Common/ErrorHandler.h"
16 #include "mach-o/compact_unwind_encoding.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/BinaryFormat/MachO.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/LEB128.h"
21 #include "llvm/Support/MathExtras.h"
22 
23 using namespace llvm;
24 using namespace llvm::MachO;
25 using namespace llvm::support::endian;
26 using namespace lld;
27 using namespace lld::macho;
28 
29 namespace {
30 
31 struct ARM64 : ARM64Common {
32   ARM64();
33   void writeStub(uint8_t *buf, const Symbol &, uint64_t) const override;
34   void writeStubHelperHeader(uint8_t *buf) const override;
35   void writeStubHelperEntry(uint8_t *buf, const Symbol &,
36                             uint64_t entryAddr) const override;
37 
38   void writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
39                             uint64_t &stubOffset, uint64_t selrefVA,
40                             Symbol *objcMsgSend) const override;
41   void populateThunk(InputSection *thunk, Symbol *funcSym) override;
42   void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
43 
44   void initICFSafeThunkBody(InputSection *thunk,
45                             Symbol *targetSym) const override;
46   Symbol *getThunkBranchTarget(InputSection *thunk) const override;
47   uint32_t getICFSafeThunkSize() const override;
48 };
49 
50 } // namespace
51 
52 // Random notes on reloc types:
53 // ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12
54 // POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte
55 // absolute version of this relocation. The semantics of the absolute relocation
56 // are weird -- it results in the value of the GOT slot being written, instead
57 // of the address. Let's not support it unless we find a real-world use case.
58 static constexpr std::array<RelocAttrs, 11> relocAttrsArray{{
59 #define B(x) RelocAttrBits::x
60     {"UNSIGNED",
61      B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(BYTE4) | B(BYTE8)},
62     {"SUBTRACTOR", B(SUBTRAHEND) | B(EXTERN) | B(BYTE4) | B(BYTE8)},
63     {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
64     {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
65     {"PAGEOFF12", B(ABSOLUTE) | B(EXTERN) | B(BYTE4)},
66     {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
67     {"GOT_LOAD_PAGEOFF12",
68      B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
69     {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
70     {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
71     {"TLVP_LOAD_PAGEOFF12",
72      B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
73     {"ADDEND", B(ADDEND)},
74 #undef B
75 }};
76 
77 static constexpr uint32_t stubCode[] = {
78     0x90000010, // 00: adrp  x16, __la_symbol_ptr@page
79     0xf9400210, // 04: ldr   x16, [x16, __la_symbol_ptr@pageoff]
80     0xd61f0200, // 08: br    x16
81 };
82 
83 void ARM64::writeStub(uint8_t *buf8, const Symbol &sym,
84                       uint64_t pointerVA) const {
85   ::writeStub(buf8, stubCode, sym, pointerVA);
86 }
87 
88 static constexpr uint32_t stubHelperHeaderCode[] = {
89     0x90000011, // 00: adrp  x17, _dyld_private@page
90     0x91000231, // 04: add   x17, x17, _dyld_private@pageoff
91     0xa9bf47f0, // 08: stp   x16/x17, [sp, #-16]!
92     0x90000010, // 0c: adrp  x16, dyld_stub_binder@page
93     0xf9400210, // 10: ldr   x16, [x16, dyld_stub_binder@pageoff]
94     0xd61f0200, // 14: br    x16
95 };
96 
97 void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
98   ::writeStubHelperHeader<LP64>(buf8, stubHelperHeaderCode);
99 }
100 
101 static constexpr uint32_t stubHelperEntryCode[] = {
102     0x18000050, // 00: ldr  w16, l0
103     0x14000000, // 04: b    stubHelperHeader
104     0x00000000, // 08: l0: .long 0
105 };
106 
107 void ARM64::writeStubHelperEntry(uint8_t *buf8, const Symbol &sym,
108                                  uint64_t entryVA) const {
109   ::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
110 }
111 
112 static constexpr uint32_t objcStubsFastCode[] = {
113     0x90000001, // adrp  x1, __objc_selrefs@page
114     0xf9400021, // ldr   x1, [x1, @selector("foo")@pageoff]
115     0x90000010, // adrp  x16, _got@page
116     0xf9400210, // ldr   x16, [x16, _objc_msgSend@pageoff]
117     0xd61f0200, // br    x16
118     0xd4200020, // brk   #0x1
119     0xd4200020, // brk   #0x1
120     0xd4200020, // brk   #0x1
121 };
122 
123 static constexpr uint32_t objcStubsSmallCode[] = {
124     0x90000001, // adrp  x1, __objc_selrefs@page
125     0xf9400021, // ldr   x1, [x1, @selector("foo")@pageoff]
126     0x14000000, // b     _objc_msgSend
127 };
128 
129 void ARM64::writeObjCMsgSendStub(uint8_t *buf, Symbol *sym, uint64_t stubsAddr,
130                                  uint64_t &stubOffset, uint64_t selrefVA,
131                                  Symbol *objcMsgSend) const {
132   uint64_t objcMsgSendAddr;
133   uint64_t objcStubSize;
134   uint64_t objcMsgSendIndex;
135 
136   if (config->objcStubsMode == ObjCStubsMode::fast) {
137     objcStubSize = target->objcStubsFastSize;
138     objcMsgSendAddr = in.got->addr;
139     objcMsgSendIndex = objcMsgSend->gotIndex;
140     ::writeObjCMsgSendFastStub<LP64>(buf, objcStubsFastCode, sym, stubsAddr,
141                                      stubOffset, selrefVA, objcMsgSendAddr,
142                                      objcMsgSendIndex);
143   } else {
144     assert(config->objcStubsMode == ObjCStubsMode::small);
145     objcStubSize = target->objcStubsSmallSize;
146     if (auto *d = dyn_cast<Defined>(objcMsgSend)) {
147       objcMsgSendAddr = d->getVA();
148       objcMsgSendIndex = 0;
149     } else {
150       objcMsgSendAddr = in.stubs->addr;
151       objcMsgSendIndex = objcMsgSend->stubsIndex;
152     }
153     ::writeObjCMsgSendSmallStub<LP64>(buf, objcStubsSmallCode, sym, stubsAddr,
154                                       stubOffset, selrefVA, objcMsgSendAddr,
155                                       objcMsgSendIndex);
156   }
157   stubOffset += objcStubSize;
158 }
159 
160 // A thunk is the relaxed variation of stubCode. We don't need the
161 // extra indirection through a lazy pointer because the target address
162 // is known at link time.
163 static constexpr uint32_t thunkCode[] = {
164     0x90000010, // 00: adrp  x16, <thunk.ptr>@page
165     0x91000210, // 04: add   x16, [x16,<thunk.ptr>@pageoff]
166     0xd61f0200, // 08: br    x16
167 };
168 
169 void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
170   thunk->align = 4;
171   thunk->data = {reinterpret_cast<const uint8_t *>(thunkCode),
172                  sizeof(thunkCode)};
173   thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_PAGEOFF12,
174                              /*pcrel=*/false, /*length=*/2,
175                              /*offset=*/4, /*addend=*/0,
176                              /*referent=*/funcSym);
177   thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_PAGE21,
178                              /*pcrel=*/true, /*length=*/2,
179                              /*offset=*/0, /*addend=*/0,
180                              /*referent=*/funcSym);
181 }
182 // Just a single direct branch to the target function.
183 static constexpr uint32_t icfSafeThunkCode[] = {
184     0x14000000, // 08: b    target
185 };
186 
187 void ARM64::initICFSafeThunkBody(InputSection *thunk, Symbol *targetSym) const {
188   // The base data here will not be itself modified, we'll just be adding a
189   // reloc below. So we can directly use the constexpr above as the data.
190   thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
191                  sizeof(icfSafeThunkCode)};
192 
193   thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
194                              /*pcrel=*/true, /*length=*/2,
195                              /*offset=*/0, /*addend=*/0,
196                              /*referent=*/targetSym);
197 }
198 
199 Symbol *ARM64::getThunkBranchTarget(InputSection *thunk) const {
200   assert(thunk->relocs.size() == 1 &&
201          "expected a single reloc on ARM64 ICF thunk");
202   auto &reloc = thunk->relocs[0];
203   assert(isa<Symbol *>(reloc.referent) &&
204          "ARM64 thunk reloc is expected to point to a Symbol");
205 
206   return cast<Symbol *>(reloc.referent);
207 }
208 
209 uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
210 
211 ARM64::ARM64() : ARM64Common(LP64()) {
212   cpuType = CPU_TYPE_ARM64;
213   cpuSubtype = CPU_SUBTYPE_ARM64_ALL;
214 
215   stubSize = sizeof(stubCode);
216   thunkSize = sizeof(thunkCode);
217 
218   objcStubsFastSize = sizeof(objcStubsFastCode);
219   objcStubsFastAlignment = 32;
220   objcStubsSmallSize = sizeof(objcStubsSmallCode);
221   objcStubsSmallAlignment = 4;
222 
223   // Branch immediate is two's complement 26 bits, which is implicitly
224   // multiplied by 4 (since all functions are 4-aligned: The branch range
225   // is -4*(2**(26-1))..4*(2**(26-1) - 1).
226   backwardBranchRange = 128 * 1024 * 1024;
227   forwardBranchRange = backwardBranchRange - 4;
228 
229   modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
230   subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
231   unsignedRelocType = ARM64_RELOC_UNSIGNED;
232 
233   stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
234   stubHelperEntrySize = sizeof(stubHelperEntryCode);
235 
236   relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
237 }
238 
239 namespace {
240 struct Adrp {
241   uint32_t destRegister;
242   int64_t addend;
243 };
244 
245 struct Add {
246   uint8_t destRegister;
247   uint8_t srcRegister;
248   uint32_t addend;
249 };
250 
251 enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 };
252 
253 struct Ldr {
254   uint8_t destRegister;
255   uint8_t baseRegister;
256   uint8_t p2Size;
257   bool isFloat;
258   ExtendType extendType;
259   int64_t offset;
260 };
261 } // namespace
262 
263 static bool parseAdrp(uint32_t insn, Adrp &adrp) {
264   if ((insn & 0x9f000000) != 0x90000000)
265     return false;
266   adrp.destRegister = insn & 0x1f;
267   uint64_t immHi = (insn >> 5) & 0x7ffff;
268   uint64_t immLo = (insn >> 29) & 0x3;
269   adrp.addend = SignExtend64<21>(immLo | (immHi << 2)) * 4096;
270   return true;
271 }
272 
273 static bool parseAdd(uint32_t insn, Add &add) {
274   if ((insn & 0xffc00000) != 0x91000000)
275     return false;
276   add.destRegister = insn & 0x1f;
277   add.srcRegister = (insn >> 5) & 0x1f;
278   add.addend = (insn >> 10) & 0xfff;
279   return true;
280 }
281 
282 static bool parseLdr(uint32_t insn, Ldr &ldr) {
283   ldr.destRegister = insn & 0x1f;
284   ldr.baseRegister = (insn >> 5) & 0x1f;
285   uint8_t size = insn >> 30;
286   uint8_t opc = (insn >> 22) & 3;
287 
288   if ((insn & 0x3fc00000) == 0x39400000) {
289     // LDR (immediate), LDRB (immediate), LDRH (immediate)
290     ldr.p2Size = size;
291     ldr.extendType = ZeroExtend;
292     ldr.isFloat = false;
293   } else if ((insn & 0x3f800000) == 0x39800000) {
294     // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
295     ldr.p2Size = size;
296     ldr.extendType = static_cast<ExtendType>(opc);
297     ldr.isFloat = false;
298   } else if ((insn & 0x3f400000) == 0x3d400000) {
299     // LDR (immediate, SIMD&FP)
300     ldr.extendType = ZeroExtend;
301     ldr.isFloat = true;
302     if (opc == 1)
303       ldr.p2Size = size;
304     else if (size == 0 && opc == 3)
305       ldr.p2Size = 4;
306     else
307       return false;
308   } else {
309     return false;
310   }
311   ldr.offset = ((insn >> 10) & 0xfff) << ldr.p2Size;
312   return true;
313 }
314 
315 static bool isValidAdrOffset(int32_t delta) { return isInt<21>(delta); }
316 
317 static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
318   assert(isValidAdrOffset(delta));
319   uint32_t opcode = 0x10000000;
320   uint32_t immHi = (delta & 0x001ffffc) << 3;
321   uint32_t immLo = (delta & 0x00000003) << 29;
322   write32le(loc, opcode | immHi | immLo | dest);
323 }
324 
325 static void writeNop(void *loc) { write32le(loc, 0xd503201f); }
326 
327 static bool isLiteralLdrEligible(const Ldr &ldr) {
328   return ldr.p2Size > 1 && isShiftedInt<19, 2>(ldr.offset);
329 }
330 
331 static void writeLiteralLdr(void *loc, const Ldr &ldr) {
332   assert(isLiteralLdrEligible(ldr));
333   uint32_t imm19 = (ldr.offset / 4 & maskTrailingOnes<uint32_t>(19)) << 5;
334   uint32_t opcode;
335   switch (ldr.p2Size) {
336   case 2:
337     if (ldr.isFloat)
338       opcode = 0x1c000000;
339     else
340       opcode = ldr.extendType == Sign64 ? 0x98000000 : 0x18000000;
341     break;
342   case 3:
343     opcode = ldr.isFloat ? 0x5c000000 : 0x58000000;
344     break;
345   case 4:
346     opcode = 0x9c000000;
347     break;
348   default:
349     llvm_unreachable("Invalid literal ldr size");
350   }
351   write32le(loc, opcode | imm19 | ldr.destRegister);
352 }
353 
354 static bool isImmediateLdrEligible(const Ldr &ldr) {
355   // Note: We deviate from ld64's behavior, which converts to immediate loads
356   // only if ldr.offset < 4096, even though the offset is divided by the load's
357   // size in the 12-bit immediate operand. Only the unsigned offset variant is
358   // supported.
359 
360   uint32_t size = 1 << ldr.p2Size;
361   return ldr.offset >= 0 && (ldr.offset % size) == 0 &&
362          isUInt<12>(ldr.offset >> ldr.p2Size);
363 }
364 
365 static void writeImmediateLdr(void *loc, const Ldr &ldr) {
366   assert(isImmediateLdrEligible(ldr));
367   uint32_t opcode = 0x39000000;
368   if (ldr.isFloat) {
369     opcode |= 0x04000000;
370     assert(ldr.extendType == ZeroExtend);
371   }
372   opcode |= ldr.destRegister;
373   opcode |= ldr.baseRegister << 5;
374   uint8_t size, opc;
375   if (ldr.p2Size == 4) {
376     size = 0;
377     opc = 3;
378   } else {
379     opc = ldr.extendType;
380     size = ldr.p2Size;
381   }
382   uint32_t immBits = ldr.offset >> ldr.p2Size;
383   write32le(loc, opcode | (immBits << 10) | (opc << 22) | (size << 30));
384 }
385 
386 // Transforms a pair of adrp+add instructions into an adr instruction if the
387 // target is within the +/- 1 MiB range allowed by the adr's 21 bit signed
388 // immediate offset.
389 //
390 //   adrp xN, _foo@PAGE
391 //   add  xM, xN, _foo@PAGEOFF
392 // ->
393 //   adr  xM, _foo
394 //   nop
395 static bool applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
396                          uint64_t offset1, uint64_t offset2) {
397   uint32_t ins1 = read32le(buf + offset1);
398   uint32_t ins2 = read32le(buf + offset2);
399   Adrp adrp;
400   Add add;
401   if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add))
402     return false;
403   if (adrp.destRegister != add.srcRegister)
404     return false;
405 
406   uint64_t addr1 = isec->getVA() + offset1;
407   uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
408   int64_t delta = referent - addr1;
409   if (!isValidAdrOffset(delta))
410     return false;
411 
412   writeAdr(buf + offset1, add.destRegister, delta);
413   writeNop(buf + offset2);
414   return true;
415 }
416 
417 // Transforms two adrp instructions into a single adrp if their referent
418 // addresses are located on the same 4096 byte page.
419 //
420 //   adrp xN, _foo@PAGE
421 //   adrp xN, _bar@PAGE
422 // ->
423 //   adrp xN, _foo@PAGE
424 //   nop
425 static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec,
426                           uint64_t offset1, uint64_t offset2) {
427   uint32_t ins1 = read32le(buf + offset1);
428   uint32_t ins2 = read32le(buf + offset2);
429   Adrp adrp1, adrp2;
430   if (!parseAdrp(ins1, adrp1) || !parseAdrp(ins2, adrp2))
431     return;
432   if (adrp1.destRegister != adrp2.destRegister)
433     return;
434 
435   uint64_t page1 = pageBits(offset1 + isec->getVA()) + adrp1.addend;
436   uint64_t page2 = pageBits(offset2 + isec->getVA()) + adrp2.addend;
437   if (page1 != page2)
438     return;
439 
440   writeNop(buf + offset2);
441 }
442 
443 // Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
444 // load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
445 // as ldr can encode a signed 19-bit offset that gets multiplied by 4.
446 //
447 //   adrp xN, _foo@PAGE
448 //   ldr  xM, [xN, _foo@PAGEOFF]
449 // ->
450 //   nop
451 //   ldr  xM, _foo
452 static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec,
453                          uint64_t offset1, uint64_t offset2) {
454   uint32_t ins1 = read32le(buf + offset1);
455   uint32_t ins2 = read32le(buf + offset2);
456   Adrp adrp;
457   Ldr ldr;
458   if (!parseAdrp(ins1, adrp) || !parseLdr(ins2, ldr))
459     return;
460   if (adrp.destRegister != ldr.baseRegister)
461     return;
462 
463   uint64_t addr1 = isec->getVA() + offset1;
464   uint64_t addr2 = isec->getVA() + offset2;
465   uint64_t referent = pageBits(addr1) + adrp.addend + ldr.offset;
466   ldr.offset = referent - addr2;
467   if (!isLiteralLdrEligible(ldr))
468     return;
469 
470   writeNop(buf + offset1);
471   writeLiteralLdr(buf + offset2, ldr);
472 }
473 
474 // GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
475 // but they may be changed to adrp+add by relaxGotLoad(). This hint performs
476 // the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
477 static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec,
478                             uint64_t offset1, uint64_t offset2) {
479   uint32_t ins2 = read32le(buf + offset2);
480   Add add;
481   Ldr ldr;
482   if (parseAdd(ins2, add))
483     applyAdrpAdd(buf, isec, offset1, offset2);
484   else if (parseLdr(ins2, ldr))
485     applyAdrpLdr(buf, isec, offset1, offset2);
486 }
487 
488 // Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
489 // address by loading directly if it's close enough, or to an adrp(p)+ldr
490 // sequence if it's not.
491 //
492 //   adrp x0, _foo@PAGE
493 //   add  x1, x0, _foo@PAGEOFF
494 //   ldr  x2, [x1, #off]
495 static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
496                             uint64_t offset1, uint64_t offset2,
497                             uint64_t offset3) {
498   uint32_t ins1 = read32le(buf + offset1);
499   uint32_t ins2 = read32le(buf + offset2);
500   uint32_t ins3 = read32le(buf + offset3);
501   Adrp adrp;
502   Add add;
503   Ldr ldr;
504   if (!parseAdrp(ins1, adrp) || !parseAdd(ins2, add) || !parseLdr(ins3, ldr))
505     return;
506   if (adrp.destRegister != add.srcRegister)
507     return;
508   if (add.destRegister != ldr.baseRegister)
509     return;
510 
511   // Load from the target address directly.
512   //   nop
513   //   nop
514   //   ldr x2, [_foo + #off]
515   uint64_t addr1 = isec->getVA() + offset1;
516   uint64_t addr3 = isec->getVA() + offset3;
517   uint64_t referent = pageBits(addr1) + adrp.addend + add.addend;
518   Ldr literalLdr = ldr;
519   literalLdr.offset += referent - addr3;
520   if (isLiteralLdrEligible(literalLdr)) {
521     writeNop(buf + offset1);
522     writeNop(buf + offset2);
523     writeLiteralLdr(buf + offset3, literalLdr);
524     return;
525   }
526 
527   if (applyAdrpAdd(buf, isec, offset1, offset2))
528     return;
529 
530   // Move the target's page offset into the ldr's immediate offset.
531   //   adrp x0, _foo@PAGE
532   //   nop
533   //   ldr x2, [x0, _foo@PAGEOFF + #off]
534   Ldr immediateLdr = ldr;
535   immediateLdr.baseRegister = adrp.destRegister;
536   immediateLdr.offset += add.addend;
537   if (isImmediateLdrEligible(immediateLdr)) {
538     writeNop(buf + offset2);
539     writeImmediateLdr(buf + offset3, immediateLdr);
540     return;
541   }
542 }
543 
544 // Relaxes a GOT-indirect load.
545 // If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
546 // the GOT entry can be loaded with a single literal ldr instruction.
547 // If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
548 // we perform the AdrpAddLdr transformation.
549 static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
550                                uint64_t offset1, uint64_t offset2,
551                                uint64_t offset3) {
552   uint32_t ins2 = read32le(buf + offset2);
553   Add add;
554   Ldr ldr2;
555 
556   if (parseAdd(ins2, add)) {
557     applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
558   } else if (parseLdr(ins2, ldr2)) {
559     // adrp x1, _foo@GOTPAGE
560     // ldr  x2, [x1, _foo@GOTPAGEOFF]
561     // ldr  x3, [x2, #off]
562     uint32_t ins3 = read32le(buf + offset3);
563     Ldr ldr3;
564     if (!parseLdr(ins3, ldr3))
565       return;
566     if (ldr3.baseRegister != ldr2.destRegister)
567       return;
568     // Loads from the GOT must be pointer sized.
569     if (ldr2.p2Size != 3 || ldr2.isFloat)
570       return;
571     applyAdrpLdr(buf, isec, offset1, offset2);
572   }
573 }
574 
575 template <typename Callback>
576 static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
577   std::array<uint64_t, 3> args;
578 
579   auto readNext = [&]() -> uint64_t {
580     unsigned int n = 0;
581     uint64_t value = decodeULEB128(data.data(), &n, data.end());
582     data = data.drop_front(n);
583     return value;
584   };
585 
586   while (!data.empty()) {
587     uint64_t type = readNext();
588     if (type == 0)
589       break;
590 
591     uint64_t argCount = readNext();
592     for (unsigned i = 0; i < argCount; ++i) {
593       uint64_t arg = readNext();
594       if (i < 3)
595         args[i] = arg;
596     }
597     // All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
598     if (argCount > 3)
599       continue;
600     callback(type, ArrayRef(args.data(), argCount));
601   }
602 }
603 
604 // On RISC architectures like arm64, materializing a memory address generally
605 // takes multiple instructions. If the referenced symbol is located close enough
606 // in memory, fewer instructions are needed.
607 //
608 // Linker optimization hints record where addresses are computed. After
609 // addresses have been assigned, if possible, we change them to a shorter
610 // sequence of instructions. The size of the binary is not modified; the
611 // eliminated instructions are replaced with NOPs. This still leads to faster
612 // code as the CPU can skip over NOPs quickly.
613 //
614 // LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
615 // points to a sequence of ULEB128-encoded numbers. Each entry specifies a
616 // transformation kind, and 2 or 3 addresses where the instructions are located.
617 void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const {
618   ArrayRef<uint8_t> data = obj.getOptimizationHints();
619   if (data.empty())
620     return;
621 
622   const ConcatInputSection *section = nullptr;
623   uint64_t sectionAddr = 0;
624   uint8_t *buf = nullptr;
625 
626   auto findSection = [&](uint64_t addr) {
627     if (section && addr >= sectionAddr &&
628         addr < sectionAddr + section->getSize())
629       return true;
630 
631     if (obj.sections.empty())
632       return false;
633     auto secIt = std::prev(llvm::upper_bound(
634         obj.sections, addr,
635         [](uint64_t off, const Section *sec) { return off < sec->addr; }));
636     const Section *sec = *secIt;
637 
638     if (sec->subsections.empty())
639       return false;
640     auto subsecIt = std::prev(llvm::upper_bound(
641         sec->subsections, addr - sec->addr,
642         [](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
643     const Subsection &subsec = *subsecIt;
644     const ConcatInputSection *isec =
645         dyn_cast_or_null<ConcatInputSection>(subsec.isec);
646     if (!isec || isec->shouldOmitFromOutput())
647       return false;
648 
649     section = isec;
650     sectionAddr = subsec.offset + sec->addr;
651     buf = outBuf + section->outSecOff + section->parent->fileOff;
652     return true;
653   };
654 
655   auto isValidOffset = [&](uint64_t offset) {
656     if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) {
657       error(toString(&obj) +
658             ": linker optimization hint spans multiple sections");
659       return false;
660     }
661     return true;
662   };
663 
664   bool hasAdrpAdrp = false;
665   forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
666     if (kind == LOH_ARM64_ADRP_ADRP) {
667       hasAdrpAdrp = true;
668       return;
669     }
670 
671     if (!findSection(args[0]))
672       return;
673     switch (kind) {
674     case LOH_ARM64_ADRP_ADD:
675       if (isValidOffset(args[1]))
676         applyAdrpAdd(buf, section, args[0] - sectionAddr,
677                      args[1] - sectionAddr);
678       break;
679     case LOH_ARM64_ADRP_LDR:
680       if (isValidOffset(args[1]))
681         applyAdrpLdr(buf, section, args[0] - sectionAddr,
682                      args[1] - sectionAddr);
683       break;
684     case LOH_ARM64_ADRP_LDR_GOT:
685       if (isValidOffset(args[1]))
686         applyAdrpLdrGot(buf, section, args[0] - sectionAddr,
687                         args[1] - sectionAddr);
688       break;
689     case LOH_ARM64_ADRP_ADD_LDR:
690       if (isValidOffset(args[1]) && isValidOffset(args[2]))
691         applyAdrpAddLdr(buf, section, args[0] - sectionAddr,
692                         args[1] - sectionAddr, args[2] - sectionAddr);
693       break;
694     case LOH_ARM64_ADRP_LDR_GOT_LDR:
695       if (isValidOffset(args[1]) && isValidOffset(args[2]))
696         applyAdrpLdrGotLdr(buf, section, args[0] - sectionAddr,
697                            args[1] - sectionAddr, args[2] - sectionAddr);
698       break;
699     case LOH_ARM64_ADRP_ADD_STR:
700     case LOH_ARM64_ADRP_LDR_GOT_STR:
701       // TODO: Implement these
702       break;
703     }
704   });
705 
706   if (!hasAdrpAdrp)
707     return;
708 
709   // AdrpAdrp optimization hints are performed in a second pass because they
710   // might interfere with other transformations. For instance, consider the
711   // following input:
712   //
713   //   adrp x0, _foo@PAGE
714   //   add  x1, x0, _foo@PAGEOFF
715   //   adrp x0, _bar@PAGE
716   //   add  x2, x0, _bar@PAGEOFF
717   //
718   // If we perform the AdrpAdrp relaxation first, we get:
719   //
720   //   adrp x0, _foo@PAGE
721   //   add  x1, x0, _foo@PAGEOFF
722   //   nop
723   //   add x2, x0, _bar@PAGEOFF
724   //
725   // If we then apply AdrpAdd to the first two instructions, the add will have a
726   // garbage value in x0:
727   //
728   //   adr  x1, _foo
729   //   nop
730   //   nop
731   //   add  x2, x0, _bar@PAGEOFF
732   forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
733     if (kind != LOH_ARM64_ADRP_ADRP)
734       return;
735     if (!findSection(args[0]))
736       return;
737     if (isValidOffset(args[1]))
738       applyAdrpAdrp(buf, section, args[0] - sectionAddr, args[1] - sectionAddr);
739   });
740 }
741 
742 TargetInfo *macho::createARM64TargetInfo() {
743   static ARM64 t;
744   return &t;
745 }
746