xref: /freebsd/contrib/llvm-project/lld/ELF/Relocations.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Relocations.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_RELOCATIONS_H
10 #define LLD_ELF_RELOCATIONS_H
11 
12 #include "lld/Common/LLVM.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Object/ELFTypes.h"
16 #include <vector>
17 
18 namespace lld::elf {
19 struct Ctx;
20 class Defined;
21 class Symbol;
22 class InputSection;
23 class InputSectionBase;
24 class OutputSection;
25 class RelocationBaseSection;
26 class SectionBase;
27 
28 // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
29 struct RelType {
30   uint32_t v = 0;
vRelType31   /*implicit*/ constexpr RelType(uint32_t v = 0) : v(v) {}
uint32_tRelType32   /*implicit*/ operator uint32_t() const { return v; }
33 };
34 
35 using JumpModType = uint32_t;
36 
37 // List of target-independent relocation types. Relocations read
38 // from files are converted to these types so that the main code
39 // doesn't have to know about architecture-specific details.
40 enum RelExpr {
41   R_ABS,
42   R_ADDEND,
43   R_DTPREL,
44   R_GOT,
45   R_GOT_OFF,
46   R_GOT_PC,
47   R_GOTONLY_PC,
48   R_GOTPLTONLY_PC,
49   R_GOTPLT,
50   R_GOTPLTREL,
51   R_GOTREL,
52   R_GOTPLT_GOTREL,
53   R_GOTPLT_PC,
54   R_NONE,
55   R_PC,
56   R_PLT,
57   R_PLT_PC,
58   R_PLT_GOTPLT,
59   R_PLT_GOTREL,
60   R_RELAX_HINT,
61   R_RELAX_GOT_PC,
62   R_RELAX_GOT_PC_NOPIC,
63   R_RELAX_TLS_GD_TO_IE,
64   R_RELAX_TLS_GD_TO_IE_ABS,
65   R_RELAX_TLS_GD_TO_IE_GOT_OFF,
66   R_RELAX_TLS_GD_TO_IE_GOTPLT,
67   R_RELAX_TLS_GD_TO_LE,
68   R_RELAX_TLS_GD_TO_LE_NEG,
69   R_RELAX_TLS_IE_TO_LE,
70   R_RELAX_TLS_LD_TO_LE,
71   R_RELAX_TLS_LD_TO_LE_ABS,
72   R_SIZE,
73   R_TPREL,
74   R_TPREL_NEG,
75   R_TLSDESC,
76   R_TLSDESC_CALL,
77   R_TLSDESC_PC,
78   R_TLSDESC_GOTPLT,
79   R_TLSGD_GOT,
80   R_TLSGD_GOTPLT,
81   R_TLSGD_PC,
82   R_TLSIE_HINT,
83   R_TLSLD_GOT,
84   R_TLSLD_GOTPLT,
85   R_TLSLD_GOT_OFF,
86   R_TLSLD_HINT,
87   R_TLSLD_PC,
88 
89   // The following is abstract relocation types used for only one target.
90   //
91   // Even though RelExpr is intended to be a target-neutral representation
92   // of a relocation type, there are some relocations whose semantics are
93   // unique to a target. Such relocation are marked with RE_<TARGET_NAME>.
94   RE_AARCH64_GOT_PAGE_PC,
95   RE_AARCH64_AUTH_GOT_PAGE_PC,
96   RE_AARCH64_GOT_PAGE,
97   RE_AARCH64_AUTH_GOT,
98   RE_AARCH64_AUTH_GOT_PC,
99   RE_AARCH64_PAGE_PC,
100   RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
101   RE_AARCH64_TLSDESC_PAGE,
102   RE_AARCH64_AUTH_TLSDESC_PAGE,
103   RE_AARCH64_AUTH_TLSDESC,
104   RE_AARCH64_AUTH,
105   RE_ARM_PCA,
106   RE_ARM_SBREL,
107   RE_MIPS_GOTREL,
108   RE_MIPS_GOT_GP,
109   RE_MIPS_GOT_GP_PC,
110   RE_MIPS_GOT_LOCAL_PAGE,
111   RE_MIPS_GOT_OFF,
112   RE_MIPS_GOT_OFF32,
113   RE_MIPS_TLSGD,
114   RE_MIPS_TLSLD,
115   RE_PPC32_PLTREL,
116   RE_PPC64_CALL,
117   RE_PPC64_CALL_PLT,
118   RE_PPC64_RELAX_TOC,
119   RE_PPC64_TOCBASE,
120   RE_PPC64_RELAX_GOT_PC,
121   RE_RISCV_ADD,
122   RE_RISCV_LEB128,
123   RE_RISCV_PC_INDIRECT,
124   // Same as R_PC but with page-aligned semantics.
125   RE_LOONGARCH_PAGE_PC,
126   // Same as R_PLT_PC but with page-aligned semantics.
127   RE_LOONGARCH_PLT_PAGE_PC,
128   // In addition to having page-aligned semantics, LoongArch GOT relocs are
129   // also reused for TLS, making the semantics differ from other architectures.
130   RE_LOONGARCH_GOT,
131   RE_LOONGARCH_GOT_PAGE_PC,
132   RE_LOONGARCH_TLSGD_PAGE_PC,
133   RE_LOONGARCH_TLSDESC_PAGE_PC,
134   RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
135 };
136 
137 // Architecture-neutral representation of relocation.
138 struct Relocation {
139   RelExpr expr;
140   RelType type;
141   uint64_t offset;
142   int64_t addend;
143   Symbol *sym;
144 };
145 
146 // Manipulate jump instructions with these modifiers.  These are used to relax
147 // jump instruction opcodes at basic block boundaries and are particularly
148 // useful when basic block sections are enabled.
149 struct JumpInstrMod {
150   uint64_t offset;
151   JumpModType original;
152   unsigned size;
153 };
154 
155 // This function writes undefined symbol diagnostics to an internal buffer.
156 // Call reportUndefinedSymbols() after calling scanRelocations() to emit
157 // the diagnostics.
158 template <class ELFT> void scanRelocations(Ctx &ctx);
159 template <class ELFT> void checkNoCrossRefs(Ctx &ctx);
160 void reportUndefinedSymbols(Ctx &);
161 void postScanRelocations(Ctx &ctx);
162 void addGotEntry(Ctx &ctx, Symbol &sym);
163 
164 void hexagonTLSSymbolUpdate(Ctx &ctx);
165 bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
166 
167 class ThunkSection;
168 class Thunk;
169 class InputSectionDescription;
170 
171 class ThunkCreator {
172 public:
173   // Thunk may be incomplete. Avoid inline ctor/dtor.
174   ThunkCreator(Ctx &ctx);
175   ~ThunkCreator();
176   // Return true if Thunks have been added to OutputSections
177   bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
178 
179 private:
180   void mergeThunks(ArrayRef<OutputSection *> outputSections);
181 
182   ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
183                                InputSectionDescription *isd,
184                                const Relocation &rel, uint64_t src);
185 
186   ThunkSection *getISThunkSec(InputSection *isec);
187 
188   void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
189 
190   std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
191                                     uint64_t src);
192 
193   std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
194 
195   ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
196                                 uint64_t off);
197 
198   bool normalizeExistingThunk(Relocation &rel, uint64_t src);
199 
200   bool addSyntheticLandingPads();
201 
202   Ctx &ctx;
203 
204   // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
205   // is represented as a (section, offset) pair. There may be multiple
206   // relocations sharing the same (section, offset + addend) pair. We may revert
207   // a relocation back to its original non-Thunk target, and restore the
208   // original addend, so we cannot fold offset + addend. A nested pair is used
209   // because DenseMapInfo is not specialized for std::tuple.
210   llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
211                  SmallVector<std::unique_ptr<Thunk>, 0>>
212       thunkedSymbolsBySectionAndAddend;
213   llvm::DenseMap<std::pair<Symbol *, int64_t>,
214                  SmallVector<std::unique_ptr<Thunk>, 0>>
215       thunkedSymbols;
216 
217   // Find a Thunk from the Thunks symbol definition, we can use this to find
218   // the Thunk from a relocation to the Thunks symbol definition.
219   llvm::DenseMap<Symbol *, Thunk *> thunks;
220 
221   // Track InputSections that have an inline ThunkSection placed in front
222   // an inline ThunkSection may have control fall through to the section below
223   // so we need to make sure that there is only one of them.
224   // The Mips LA25 Thunk is an example of an inline ThunkSection, as is
225   // the AArch64BTLandingPadThunk.
226   llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
227 
228   // Record landing pads, generated for a section + offset destination.
229   // Landling pads are alternative entry points for destinations that need
230   // to be reached via thunks that use indirect branches. A destination
231   // needs at most one landing pad as that can be reused by all callers.
232   llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
233                  std::unique_ptr<Thunk>>
234       landingPadsBySectionAndAddend;
235 
236   // All the nonLandingPad thunks that have been created, in order of creation.
237   std::vector<Thunk *> allThunks;
238 
239   // The number of completed passes of createThunks this permits us
240   // to do one time initialization on Pass 0 and put a limit on the
241   // number of times it can be called to prevent infinite loops.
242   uint32_t pass = 0;
243 };
244 
245 // Decode LEB128 without error checking. Only used by performance critical code
246 // like RelocsCrel.
readLEB128(const uint8_t * & p,uint64_t leb)247 inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
248   uint64_t acc = 0, shift = 0, byte;
249   do {
250     byte = *p++;
251     acc |= (byte - 128 * (byte >= leb)) << shift;
252     shift += 7;
253   } while (byte >= 128);
254   return acc;
255 }
readULEB128(const uint8_t * & p)256 inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); }
readSLEB128(const uint8_t * & p)257 inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); }
258 
259 // This class implements a CREL iterator that does not allocate extra memory.
260 template <bool is64> struct RelocsCrel {
261   using uint = std::conditional_t<is64, uint64_t, uint32_t>;
262   struct const_iterator {
263     using iterator_category = std::forward_iterator_tag;
264     using value_type = llvm::object::Elf_Crel_Impl<is64>;
265     using difference_type = ptrdiff_t;
266     using pointer = value_type *;
267     using reference = const value_type &;
268     uint32_t count;
269     uint8_t flagBits, shift;
270     const uint8_t *p;
271     llvm::object::Elf_Crel_Impl<is64> crel{};
const_iteratorRelocsCrel::const_iterator272     const_iterator(size_t hdr, const uint8_t *p)
273         : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
274       if (count)
275         step();
276     }
stepRelocsCrel::const_iterator277     void step() {
278       // See object::decodeCrel.
279       const uint8_t b = *p++;
280       crel.r_offset += b >> flagBits << shift;
281       if (b >= 0x80)
282         crel.r_offset +=
283             ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
284       if (b & 1)
285         crel.r_symidx += readSLEB128(p);
286       if (b & 2)
287         crel.r_type += readSLEB128(p);
288       if (b & 4 && flagBits == 3)
289         crel.r_addend += static_cast<uint>(readSLEB128(p));
290     }
291     llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
292     const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
293       return &crel;
294     }
295     // For llvm::enumerate.
296     bool operator==(const const_iterator &r) const { return count == r.count; }
297     bool operator!=(const const_iterator &r) const { return count != r.count; }
298     const_iterator &operator++() {
299       if (--count)
300         step();
301       return *this;
302     }
303     // For RelocationScanner::scanOne.
304     void operator+=(size_t n) {
305       for (; n; --n)
306         operator++();
307     }
308   };
309 
310   size_t hdr = 0;
311   const uint8_t *p = nullptr;
312 
313   constexpr RelocsCrel() = default;
RelocsCrelRelocsCrel314   RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
sizeRelocsCrel315   size_t size() const { return hdr / 8; }
beginRelocsCrel316   const_iterator begin() const { return {hdr, p}; }
endRelocsCrel317   const_iterator end() const { return {0, nullptr}; }
318 };
319 
320 template <class RelTy> struct Relocs : ArrayRef<RelTy> {
321   Relocs() = default;
RelocsRelocs322   Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
323 };
324 
325 template <bool is64>
326 struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
327   using RelocsCrel<is64>::RelocsCrel;
328 };
329 
330 // Return a int64_t to make sure we get the sign extension out of the way as
331 // early as possible.
332 template <class ELFT>
333 static inline int64_t getAddend(const typename ELFT::Rel &rel) {
334   return 0;
335 }
336 template <class ELFT>
337 static inline int64_t getAddend(const typename ELFT::Rela &rel) {
338   return rel.r_addend;
339 }
340 template <class ELFT>
341 static inline int64_t getAddend(const typename ELFT::Crel &rel) {
342   return rel.r_addend;
343 }
344 
345 template <typename RelTy>
346 inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
347                               SmallVector<RelTy, 0> &storage) {
348   auto cmp = [](const RelTy &a, const RelTy &b) {
349     return a.r_offset < b.r_offset;
350   };
351   if (!llvm::is_sorted(rels, cmp)) {
352     storage.assign(rels.begin(), rels.end());
353     llvm::stable_sort(storage, cmp);
354     rels = Relocs<RelTy>(storage);
355   }
356   return rels;
357 }
358 
359 template <bool is64>
360 inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
361 sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
362          SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
363   return {};
364 }
365 
366 RelocationBaseSection &getIRelativeSection(Ctx &ctx);
367 
368 // Returns true if Expr refers a GOT entry. Note that this function returns
369 // false for TLS variables even though they need GOT, because TLS variables uses
370 // GOT differently than the regular variables.
371 bool needsGot(RelExpr expr);
372 } // namespace lld::elf
373 
374 #endif
375