xref: /freebsd/contrib/llvm-project/lld/ELF/Arch/LoongArch.cpp (revision e1e636193db45630c7881246d25902e57c43d24e)
1 //===- LoongArch.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
14 #include "llvm/Support/LEB128.h"
15 
16 using namespace llvm;
17 using namespace llvm::object;
18 using namespace llvm::support::endian;
19 using namespace llvm::ELF;
20 using namespace lld;
21 using namespace lld::elf;
22 
23 namespace {
24 class LoongArch final : public TargetInfo {
25 public:
26   LoongArch();
27   uint32_t calcEFlags() const override;
28   int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29   void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
30   void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
31   void writePltHeader(uint8_t *buf) const override;
32   void writePlt(uint8_t *buf, const Symbol &sym,
33                 uint64_t pltEntryAddr) const override;
34   RelType getDynRel(RelType type) const override;
35   RelExpr getRelExpr(RelType type, const Symbol &s,
36                      const uint8_t *loc) const override;
37   bool usesOnlyLowPageBits(RelType type) const override;
38   void relocate(uint8_t *loc, const Relocation &rel,
39                 uint64_t val) const override;
40   bool relaxOnce(int pass) const override;
41   void finalizeRelax(int passes) const override;
42 };
43 } // end anonymous namespace
44 
45 namespace {
46 enum Op {
47   SUB_W = 0x00110000,
48   SUB_D = 0x00118000,
49   BREAK = 0x002a0000,
50   SRLI_W = 0x00448000,
51   SRLI_D = 0x00450000,
52   ADDI_W = 0x02800000,
53   ADDI_D = 0x02c00000,
54   ANDI = 0x03400000,
55   PCADDU12I = 0x1c000000,
56   LD_W = 0x28800000,
57   LD_D = 0x28c00000,
58   JIRL = 0x4c000000,
59 };
60 
61 enum Reg {
62   R_ZERO = 0,
63   R_RA = 1,
64   R_TP = 2,
65   R_T0 = 12,
66   R_T1 = 13,
67   R_T2 = 14,
68   R_T3 = 15,
69 };
70 } // namespace
71 
72 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
73 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
74 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
75 // "page") for the next instruction to add in the "page offset". (`pcalau12i`
76 // stands for something like "PC ALigned Add Upper that starts from the 12th
77 // bit, Immediate".)
78 //
79 // Here a "page" is in fact just another way to refer to the 12-bit range
80 // allowed by the immediate field of the addi/ld/st instructions, and not
81 // related to the system or the kernel's actual page size. The sematics happens
82 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
83 static uint64_t getLoongArchPage(uint64_t p) {
84   return p & ~static_cast<uint64_t>(0xfff);
85 }
86 
87 static uint32_t lo12(uint32_t val) { return val & 0xfff; }
88 
89 // Calculate the adjusted page delta between dest and PC.
90 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {
91   // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
92   // + lu52i.d`, they must be adjancent so that we can infer the PC of
93   // `pcalau12i` when calculating the page delta for the other two instructions
94   // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
95   // complicated. Just use psABI recommended algorithm.
96   uint64_t pcalau12i_pc;
97   switch (type) {
98   case R_LARCH_PCALA64_LO20:
99   case R_LARCH_GOT64_PC_LO20:
100   case R_LARCH_TLS_IE64_PC_LO20:
101     pcalau12i_pc = pc - 8;
102     break;
103   case R_LARCH_PCALA64_HI12:
104   case R_LARCH_GOT64_PC_HI12:
105   case R_LARCH_TLS_IE64_PC_HI12:
106     pcalau12i_pc = pc - 12;
107     break;
108   default:
109     pcalau12i_pc = pc;
110     break;
111   }
112   uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc);
113   if (dest & 0x800)
114     result += 0x1000 - 0x1'0000'0000;
115   if (result & 0x8000'0000)
116     result += 0x1'0000'0000;
117   return result;
118 }
119 
120 static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
121 
122 static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
123   return op | d | (j << 5) | (k << 10);
124 }
125 
126 // Extract bits v[begin:end], where range is inclusive.
127 static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
128   return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
129 }
130 
131 static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
132   uint32_t immLo = extractBits(imm, 15, 0);
133   uint32_t immHi = extractBits(imm, 20, 16);
134   return (insn & 0xfc0003e0) | (immLo << 10) | immHi;
135 }
136 
137 static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
138   uint32_t immLo = extractBits(imm, 15, 0);
139   uint32_t immHi = extractBits(imm, 25, 16);
140   return (insn & 0xfc000000) | (immLo << 10) | immHi;
141 }
142 
143 static uint32_t setJ20(uint32_t insn, uint32_t imm) {
144   return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
145 }
146 
147 static uint32_t setK12(uint32_t insn, uint32_t imm) {
148   return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
149 }
150 
151 static uint32_t setK16(uint32_t insn, uint32_t imm) {
152   return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);
153 }
154 
155 static bool isJirl(uint32_t insn) {
156   return (insn & 0xfc000000) == JIRL;
157 }
158 
159 static void handleUleb128(uint8_t *loc, uint64_t val) {
160   const uint32_t maxcount = 1 + 64 / 7;
161   uint32_t count;
162   const char *error = nullptr;
163   uint64_t orig = decodeULEB128(loc, &count, nullptr, &error);
164   if (count > maxcount || (count == maxcount && error))
165     errorOrWarn(getErrorLocation(loc) + "extra space for uleb128");
166   uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL;
167   encodeULEB128((orig + val) & mask, loc, count);
168 }
169 
170 LoongArch::LoongArch() {
171   // The LoongArch ISA itself does not have a limit on page sizes. According to
172   // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
173   // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
174   // "unlimited".
175   // However, practically the maximum usable page size is constrained by the
176   // kernel implementation, and 64KiB is the biggest non-huge page size
177   // supported by Linux as of v6.4. The most widespread page size in use,
178   // though, is 16KiB.
179   defaultCommonPageSize = 16384;
180   defaultMaxPageSize = 65536;
181   write32le(trapInstr.data(), BREAK); // break 0
182 
183   copyRel = R_LARCH_COPY;
184   pltRel = R_LARCH_JUMP_SLOT;
185   relativeRel = R_LARCH_RELATIVE;
186   iRelativeRel = R_LARCH_IRELATIVE;
187 
188   if (config->is64) {
189     symbolicRel = R_LARCH_64;
190     tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
191     tlsOffsetRel = R_LARCH_TLS_DTPREL64;
192     tlsGotRel = R_LARCH_TLS_TPREL64;
193   } else {
194     symbolicRel = R_LARCH_32;
195     tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
196     tlsOffsetRel = R_LARCH_TLS_DTPREL32;
197     tlsGotRel = R_LARCH_TLS_TPREL32;
198   }
199 
200   gotRel = symbolicRel;
201 
202   // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
203   gotPltHeaderEntriesNum = 2;
204 
205   pltHeaderSize = 32;
206   pltEntrySize = 16;
207   ipltEntrySize = 16;
208 }
209 
210 static uint32_t getEFlags(const InputFile *f) {
211   if (config->is64)
212     return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
213   return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
214 }
215 
216 static bool inputFileHasCode(const InputFile *f) {
217   for (const auto *sec : f->getSections())
218     if (sec && sec->flags & SHF_EXECINSTR)
219       return true;
220 
221   return false;
222 }
223 
224 uint32_t LoongArch::calcEFlags() const {
225   // If there are only binary input files (from -b binary), use a
226   // value of 0 for the ELF header flags.
227   if (ctx.objectFiles.empty())
228     return 0;
229 
230   uint32_t target = 0;
231   const InputFile *targetFile;
232   for (const InputFile *f : ctx.objectFiles) {
233     // Do not enforce ABI compatibility if the input file does not contain code.
234     // This is useful for allowing linkage with data-only object files produced
235     // with tools like objcopy, that have zero e_flags.
236     if (!inputFileHasCode(f))
237       continue;
238 
239     // Take the first non-zero e_flags as the reference.
240     uint32_t flags = getEFlags(f);
241     if (target == 0 && flags != 0) {
242       target = flags;
243       targetFile = f;
244     }
245 
246     if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
247         (target & EF_LOONGARCH_ABI_MODIFIER_MASK))
248       error(toString(f) +
249             ": cannot link object files with different ABI from " +
250             toString(targetFile));
251 
252     // We cannot process psABI v1.x / object ABI v0 files (containing stack
253     // relocations), unlike ld.bfd.
254     //
255     // Instead of blindly accepting every v0 object and only failing at
256     // relocation processing time, just disallow interlink altogether. We
257     // don't expect significant usage of object ABI v0 in the wild (the old
258     // world may continue using object ABI v0 for a while, but as it's not
259     // binary-compatible with the upstream i.e. new-world ecosystem, it's not
260     // being considered here).
261     //
262     // There are briefly some new-world systems with object ABI v0 binaries too.
263     // It is because these systems were built before the new ABI was finalized.
264     // These are not supported either due to the extremely small number of them,
265     // and the few impacted users are advised to simply rebuild world or
266     // reinstall a recent system.
267     if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
268       error(toString(f) + ": unsupported object file ABI version");
269   }
270 
271   return target;
272 }
273 
274 int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {
275   switch (type) {
276   default:
277     internalLinkerError(getErrorLocation(buf),
278                         "cannot read addend for relocation " + toString(type));
279     return 0;
280   case R_LARCH_32:
281   case R_LARCH_TLS_DTPMOD32:
282   case R_LARCH_TLS_DTPREL32:
283   case R_LARCH_TLS_TPREL32:
284     return SignExtend64<32>(read32le(buf));
285   case R_LARCH_64:
286   case R_LARCH_TLS_DTPMOD64:
287   case R_LARCH_TLS_DTPREL64:
288   case R_LARCH_TLS_TPREL64:
289     return read64le(buf);
290   case R_LARCH_RELATIVE:
291   case R_LARCH_IRELATIVE:
292     return config->is64 ? read64le(buf) : read32le(buf);
293   case R_LARCH_NONE:
294   case R_LARCH_JUMP_SLOT:
295     // These relocations are defined as not having an implicit addend.
296     return 0;
297   }
298 }
299 
300 void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {
301   if (config->is64)
302     write64le(buf, in.plt->getVA());
303   else
304     write32le(buf, in.plt->getVA());
305 }
306 
307 void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
308   if (config->writeAddends) {
309     if (config->is64)
310       write64le(buf, s.getVA());
311     else
312       write32le(buf, s.getVA());
313   }
314 }
315 
316 void LoongArch::writePltHeader(uint8_t *buf) const {
317   // The LoongArch PLT is currently structured just like that of RISCV.
318   // Annoyingly, this means the PLT is still using `pcaddu12i` to perform
319   // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
320   // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
321   // is used everywhere else involving PC-relative operations in the LoongArch
322   // ELF psABI v2.00.
323   //
324   // The `pcrel_{hi20,lo12}` operators are illustrative only and not really
325   // supported by LoongArch assemblers.
326   //
327   //   pcaddu12i $t2, %pcrel_hi20(.got.plt)
328   //   sub.[wd]  $t1, $t1, $t3
329   //   ld.[wd]   $t3, $t2, %pcrel_lo12(.got.plt)  ; t3 = _dl_runtime_resolve
330   //   addi.[wd] $t1, $t1, -pltHeaderSize-12      ; t1 = &.plt[i] - &.plt[0]
331   //   addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
332   //   srli.[wd] $t1, $t1, (is64?1:2)             ; t1 = &.got.plt[i] - &.got.plt[0]
333   //   ld.[wd]   $t0, $t0, Wordsize               ; t0 = link_map
334   //   jr        $t3
335   uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();
336   uint32_t sub = config->is64 ? SUB_D : SUB_W;
337   uint32_t ld = config->is64 ? LD_D : LD_W;
338   uint32_t addi = config->is64 ? ADDI_D : ADDI_W;
339   uint32_t srli = config->is64 ? SRLI_D : SRLI_W;
340   write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));
341   write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));
342   write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));
343   write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));
344   write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));
345   write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));
346   write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));
347   write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));
348 }
349 
350 void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,
351                      uint64_t pltEntryAddr) const {
352   // See the comment in writePltHeader for reason why pcaddu12i is used instead
353   // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
354   //
355   //   pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
356   //   ld.[wd]   $t3, $t3, %pcrel_lo12(f@.got.plt)
357   //   jirl      $t1, $t3, 0
358   //   nop
359   uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
360   write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));
361   write32le(buf + 4,
362             insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));
363   write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));
364   write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));
365 }
366 
367 RelType LoongArch::getDynRel(RelType type) const {
368   return type == target->symbolicRel ? type
369                                      : static_cast<RelType>(R_LARCH_NONE);
370 }
371 
372 RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
373                               const uint8_t *loc) const {
374   switch (type) {
375   case R_LARCH_NONE:
376   case R_LARCH_MARK_LA:
377   case R_LARCH_MARK_PCREL:
378     return R_NONE;
379   case R_LARCH_32:
380   case R_LARCH_64:
381   case R_LARCH_ABS_HI20:
382   case R_LARCH_ABS_LO12:
383   case R_LARCH_ABS64_LO20:
384   case R_LARCH_ABS64_HI12:
385     return R_ABS;
386   case R_LARCH_PCALA_LO12:
387     // We could just R_ABS, but the JIRL instruction reuses the relocation type
388     // for a different purpose. The questionable usage is part of glibc 2.37
389     // libc_nonshared.a [1], which is linked into user programs, so we have to
390     // work around it for a while, even if a new relocation type may be
391     // introduced in the future [2].
392     //
393     // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
394     // [2]: https://github.com/loongson/la-abi-specs/pull/3
395     return isJirl(read32le(loc)) ? R_PLT : R_ABS;
396   case R_LARCH_TLS_DTPREL32:
397   case R_LARCH_TLS_DTPREL64:
398     return R_DTPREL;
399   case R_LARCH_TLS_TPREL32:
400   case R_LARCH_TLS_TPREL64:
401   case R_LARCH_TLS_LE_HI20:
402   case R_LARCH_TLS_LE_LO12:
403   case R_LARCH_TLS_LE64_LO20:
404   case R_LARCH_TLS_LE64_HI12:
405     return R_TPREL;
406   case R_LARCH_ADD6:
407   case R_LARCH_ADD8:
408   case R_LARCH_ADD16:
409   case R_LARCH_ADD32:
410   case R_LARCH_ADD64:
411   case R_LARCH_ADD_ULEB128:
412   case R_LARCH_SUB6:
413   case R_LARCH_SUB8:
414   case R_LARCH_SUB16:
415   case R_LARCH_SUB32:
416   case R_LARCH_SUB64:
417   case R_LARCH_SUB_ULEB128:
418     // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
419     // the RelExpr to avoid code duplication.
420     return R_RISCV_ADD;
421   case R_LARCH_32_PCREL:
422   case R_LARCH_64_PCREL:
423   case R_LARCH_PCREL20_S2:
424     return R_PC;
425   case R_LARCH_B16:
426   case R_LARCH_B21:
427   case R_LARCH_B26:
428   case R_LARCH_CALL36:
429     return R_PLT_PC;
430   case R_LARCH_GOT_PC_HI20:
431   case R_LARCH_GOT64_PC_LO20:
432   case R_LARCH_GOT64_PC_HI12:
433   case R_LARCH_TLS_IE_PC_HI20:
434   case R_LARCH_TLS_IE64_PC_LO20:
435   case R_LARCH_TLS_IE64_PC_HI12:
436     return R_LOONGARCH_GOT_PAGE_PC;
437   case R_LARCH_GOT_PC_LO12:
438   case R_LARCH_TLS_IE_PC_LO12:
439     return R_LOONGARCH_GOT;
440   case R_LARCH_TLS_LD_PC_HI20:
441   case R_LARCH_TLS_GD_PC_HI20:
442     return R_LOONGARCH_TLSGD_PAGE_PC;
443   case R_LARCH_PCALA_HI20:
444     // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
445     // anyway so why waste time checking only to get everything relaxed back to
446     // it?
447     //
448     // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
449     // both the HI20 and LO12 to potentially refer to the PLT. But in reality
450     // the HI20 reloc appears earlier, and the relocs don't contain enough
451     // information to let us properly resolve semantics per symbol.
452     // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
453     // relocs, hence it is nearly impossible to 100% accurately determine each
454     // HI20's "flavor" without taking big performance hits, in the presence of
455     // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
456     // apart that relationship is not certain anymore), and programmer mistakes
457     // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
458     //
459     // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
460     // every HI20 reloc referring to the same symbol differently; this is not
461     // feasible with the current function signature of getRelExpr that doesn't
462     // allow for such inter-pass state.
463     //
464     // So, unfortunately we have to again workaround this quirk the same way as
465     // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
466     // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
467     // stage.
468     return R_LOONGARCH_PLT_PAGE_PC;
469   case R_LARCH_PCALA64_LO20:
470   case R_LARCH_PCALA64_HI12:
471     return R_LOONGARCH_PAGE_PC;
472   case R_LARCH_GOT_HI20:
473   case R_LARCH_GOT_LO12:
474   case R_LARCH_GOT64_LO20:
475   case R_LARCH_GOT64_HI12:
476   case R_LARCH_TLS_IE_HI20:
477   case R_LARCH_TLS_IE_LO12:
478   case R_LARCH_TLS_IE64_LO20:
479   case R_LARCH_TLS_IE64_HI12:
480     return R_GOT;
481   case R_LARCH_TLS_LD_HI20:
482     return R_TLSLD_GOT;
483   case R_LARCH_TLS_GD_HI20:
484     return R_TLSGD_GOT;
485   case R_LARCH_RELAX:
486     return config->relax ? R_RELAX_HINT : R_NONE;
487   case R_LARCH_ALIGN:
488     return R_RELAX_HINT;
489 
490   // Other known relocs that are explicitly unimplemented:
491   //
492   // - psABI v1 relocs that need a stateful stack machine to work, and not
493   //   required when implementing psABI v2;
494   // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
495   //   two GNU vtable-related relocs).
496   //
497   // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
498   default:
499     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
500           ") against symbol " + toString(s));
501     return R_NONE;
502   }
503 }
504 
505 bool LoongArch::usesOnlyLowPageBits(RelType type) const {
506   switch (type) {
507   default:
508     return false;
509   case R_LARCH_PCALA_LO12:
510   case R_LARCH_GOT_LO12:
511   case R_LARCH_GOT_PC_LO12:
512   case R_LARCH_TLS_IE_PC_LO12:
513     return true;
514   }
515 }
516 
517 void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
518                          uint64_t val) const {
519   switch (rel.type) {
520   case R_LARCH_32_PCREL:
521     checkInt(loc, val, 32, rel);
522     [[fallthrough]];
523   case R_LARCH_32:
524   case R_LARCH_TLS_DTPREL32:
525     write32le(loc, val);
526     return;
527   case R_LARCH_64:
528   case R_LARCH_TLS_DTPREL64:
529   case R_LARCH_64_PCREL:
530     write64le(loc, val);
531     return;
532 
533   case R_LARCH_PCREL20_S2:
534     checkInt(loc, val, 22, rel);
535     checkAlignment(loc, val, 4, rel);
536     write32le(loc, setJ20(read32le(loc), val >> 2));
537     return;
538 
539   case R_LARCH_B16:
540     checkInt(loc, val, 18, rel);
541     checkAlignment(loc, val, 4, rel);
542     write32le(loc, setK16(read32le(loc), val >> 2));
543     return;
544 
545   case R_LARCH_B21:
546     checkInt(loc, val, 23, rel);
547     checkAlignment(loc, val, 4, rel);
548     write32le(loc, setD5k16(read32le(loc), val >> 2));
549     return;
550 
551   case R_LARCH_B26:
552     checkInt(loc, val, 28, rel);
553     checkAlignment(loc, val, 4, rel);
554     write32le(loc, setD10k16(read32le(loc), val >> 2));
555     return;
556 
557   case R_LARCH_CALL36: {
558     // This relocation is designed for adjancent pcaddu18i+jirl pairs that
559     // are patched in one time. Because of sign extension of these insns'
560     // immediate fields, the relocation range is [-128G - 0x20000, +128G -
561     // 0x20000) (of course must be 4-byte aligned).
562     if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38))
563       reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000,
564                        llvm::maxIntN(38) - 0x20000);
565     checkAlignment(loc, val, 4, rel);
566     // Since jirl performs sign extension on the offset immediate, adds (1<<17)
567     // to original val to get the correct hi20.
568     uint32_t hi20 = extractBits(val + (1 << 17), 37, 18);
569     // Despite the name, the lower part is actually 18 bits with 4-byte aligned.
570     uint32_t lo16 = extractBits(val, 17, 2);
571     write32le(loc, setJ20(read32le(loc), hi20));
572     write32le(loc + 4, setK16(read32le(loc + 4), lo16));
573     return;
574   }
575 
576   // Relocs intended for `addi`, `ld` or `st`.
577   case R_LARCH_PCALA_LO12:
578     // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
579     // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
580     // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
581     // its immediate slot width is different too (16, not 12).
582     // In this case, process like an R_LARCH_B16, but without overflow checking
583     // and only taking the value's lowest 12 bits.
584     if (isJirl(read32le(loc))) {
585       checkAlignment(loc, val, 4, rel);
586       val = SignExtend64<12>(val);
587       write32le(loc, setK16(read32le(loc), val >> 2));
588       return;
589     }
590     [[fallthrough]];
591   case R_LARCH_ABS_LO12:
592   case R_LARCH_GOT_PC_LO12:
593   case R_LARCH_GOT_LO12:
594   case R_LARCH_TLS_LE_LO12:
595   case R_LARCH_TLS_IE_PC_LO12:
596   case R_LARCH_TLS_IE_LO12:
597     write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));
598     return;
599 
600   // Relocs intended for `lu12i.w` or `pcalau12i`.
601   case R_LARCH_ABS_HI20:
602   case R_LARCH_PCALA_HI20:
603   case R_LARCH_GOT_PC_HI20:
604   case R_LARCH_GOT_HI20:
605   case R_LARCH_TLS_LE_HI20:
606   case R_LARCH_TLS_IE_PC_HI20:
607   case R_LARCH_TLS_IE_HI20:
608   case R_LARCH_TLS_LD_PC_HI20:
609   case R_LARCH_TLS_LD_HI20:
610   case R_LARCH_TLS_GD_PC_HI20:
611   case R_LARCH_TLS_GD_HI20:
612     write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));
613     return;
614 
615   // Relocs intended for `lu32i.d`.
616   case R_LARCH_ABS64_LO20:
617   case R_LARCH_PCALA64_LO20:
618   case R_LARCH_GOT64_PC_LO20:
619   case R_LARCH_GOT64_LO20:
620   case R_LARCH_TLS_LE64_LO20:
621   case R_LARCH_TLS_IE64_PC_LO20:
622   case R_LARCH_TLS_IE64_LO20:
623     write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));
624     return;
625 
626   // Relocs intended for `lu52i.d`.
627   case R_LARCH_ABS64_HI12:
628   case R_LARCH_PCALA64_HI12:
629   case R_LARCH_GOT64_PC_HI12:
630   case R_LARCH_GOT64_HI12:
631   case R_LARCH_TLS_LE64_HI12:
632   case R_LARCH_TLS_IE64_PC_HI12:
633   case R_LARCH_TLS_IE64_HI12:
634     write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));
635     return;
636 
637   case R_LARCH_ADD6:
638     *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f);
639     return;
640   case R_LARCH_ADD8:
641     *loc += val;
642     return;
643   case R_LARCH_ADD16:
644     write16le(loc, read16le(loc) + val);
645     return;
646   case R_LARCH_ADD32:
647     write32le(loc, read32le(loc) + val);
648     return;
649   case R_LARCH_ADD64:
650     write64le(loc, read64le(loc) + val);
651     return;
652   case R_LARCH_ADD_ULEB128:
653     handleUleb128(loc, val);
654     return;
655   case R_LARCH_SUB6:
656     *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f);
657     return;
658   case R_LARCH_SUB8:
659     *loc -= val;
660     return;
661   case R_LARCH_SUB16:
662     write16le(loc, read16le(loc) - val);
663     return;
664   case R_LARCH_SUB32:
665     write32le(loc, read32le(loc) - val);
666     return;
667   case R_LARCH_SUB64:
668     write64le(loc, read64le(loc) - val);
669     return;
670   case R_LARCH_SUB_ULEB128:
671     handleUleb128(loc, -val);
672     return;
673 
674   case R_LARCH_MARK_LA:
675   case R_LARCH_MARK_PCREL:
676     // no-op
677     return;
678 
679   case R_LARCH_RELAX:
680     return; // Ignored (for now)
681 
682   default:
683     llvm_unreachable("unknown relocation");
684   }
685 }
686 
687 static bool relax(InputSection &sec) {
688   const uint64_t secAddr = sec.getVA();
689   const MutableArrayRef<Relocation> relocs = sec.relocs();
690   auto &aux = *sec.relaxAux;
691   bool changed = false;
692   ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
693   uint64_t delta = 0;
694 
695   std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);
696   aux.writes.clear();
697   for (auto [i, r] : llvm::enumerate(relocs)) {
698     const uint64_t loc = secAddr + r.offset - delta;
699     uint32_t &cur = aux.relocDeltas[i], remove = 0;
700     switch (r.type) {
701     case R_LARCH_ALIGN: {
702       const uint64_t addend =
703           r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
704       const uint64_t allBytes = (1 << (addend & 0xff)) - 4;
705       const uint64_t align = 1 << (addend & 0xff);
706       const uint64_t maxBytes = addend >> 8;
707       const uint64_t off = loc & (align - 1);
708       const uint64_t curBytes = off == 0 ? 0 : align - off;
709       // All bytes beyond the alignment boundary should be removed.
710       // If emit bytes more than max bytes to emit, remove all.
711       if (maxBytes != 0 && curBytes > maxBytes)
712         remove = allBytes;
713       else
714         remove = allBytes - curBytes;
715       // If we can't satisfy this alignment, we've found a bad input.
716       if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
717         errorOrWarn(getErrorLocation((const uint8_t *)loc) +
718                     "insufficient padding bytes for " + lld::toString(r.type) +
719                     ": " + Twine(allBytes) + " bytes available for " +
720                     "requested alignment of " + Twine(align) + " bytes");
721         remove = 0;
722       }
723       break;
724     }
725     }
726 
727     // For all anchors whose offsets are <= r.offset, they are preceded by
728     // the previous relocation whose `relocDeltas` value equals `delta`.
729     // Decrease their st_value and update their st_size.
730     for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
731       if (sa[0].end)
732         sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
733       else
734         sa[0].d->value = sa[0].offset - delta;
735     }
736     delta += remove;
737     if (delta != cur) {
738       cur = delta;
739       changed = true;
740     }
741   }
742 
743   for (const SymbolAnchor &a : sa) {
744     if (a.end)
745       a.d->size = a.offset - delta - a.d->value;
746     else
747       a.d->value = a.offset - delta;
748   }
749   // Inform assignAddresses that the size has changed.
750   if (!isUInt<32>(delta))
751     fatal("section size decrease is too large: " + Twine(delta));
752   sec.bytesDropped = delta;
753   return changed;
754 }
755 
756 // When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
757 // the absence of a linker script. For call and load/store R_LARCH_RELAX, code
758 // shrinkage may reduce displacement and make more relocations eligible for
759 // relaxation. Code shrinkage may increase displacement to a call/load/store
760 // target at a higher fixed address, invalidating an earlier relaxation. Any
761 // change in section sizes can have cascading effect and require another
762 // relaxation pass.
763 bool LoongArch::relaxOnce(int pass) const {
764   if (config->relocatable)
765     return false;
766 
767   if (pass == 0)
768     initSymbolAnchors();
769 
770   SmallVector<InputSection *, 0> storage;
771   bool changed = false;
772   for (OutputSection *osec : outputSections) {
773     if (!(osec->flags & SHF_EXECINSTR))
774       continue;
775     for (InputSection *sec : getInputSections(*osec, storage))
776       changed |= relax(*sec);
777   }
778   return changed;
779 }
780 
781 void LoongArch::finalizeRelax(int passes) const {
782   log("relaxation passes: " + Twine(passes));
783   SmallVector<InputSection *, 0> storage;
784   for (OutputSection *osec : outputSections) {
785     if (!(osec->flags & SHF_EXECINSTR))
786       continue;
787     for (InputSection *sec : getInputSections(*osec, storage)) {
788       RelaxAux &aux = *sec->relaxAux;
789       if (!aux.relocDeltas)
790         continue;
791 
792       MutableArrayRef<Relocation> rels = sec->relocs();
793       ArrayRef<uint8_t> old = sec->content();
794       size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
795       uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
796       uint64_t offset = 0;
797       int64_t delta = 0;
798       sec->content_ = p;
799       sec->size = newSize;
800       sec->bytesDropped = 0;
801 
802       // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
803       // instructions for relaxed relocations.
804       for (size_t i = 0, e = rels.size(); i != e; ++i) {
805         uint32_t remove = aux.relocDeltas[i] - delta;
806         delta = aux.relocDeltas[i];
807         if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
808           continue;
809 
810         // Copy from last location to the current relocated location.
811         const Relocation &r = rels[i];
812         uint64_t size = r.offset - offset;
813         memcpy(p, old.data() + offset, size);
814         p += size;
815         offset = r.offset + remove;
816       }
817       memcpy(p, old.data() + offset, old.size() - offset);
818 
819       // Subtract the previous relocDeltas value from the relocation offset.
820       // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
821       // their r_offset by the same delta.
822       delta = 0;
823       for (size_t i = 0, e = rels.size(); i != e;) {
824         uint64_t cur = rels[i].offset;
825         do {
826           rels[i].offset -= delta;
827           if (aux.relocTypes[i] != R_LARCH_NONE)
828             rels[i].type = aux.relocTypes[i];
829         } while (++i != e && rels[i].offset == cur);
830         delta = aux.relocDeltas[i - 1];
831       }
832     }
833   }
834 }
835 
836 TargetInfo *elf::getLoongArchTargetInfo() {
837   static LoongArch target;
838   return &target;
839 }
840