xref: /freebsd/contrib/llvm-project/lld/ELF/Relocations.h (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1 //===- Relocations.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_RELOCATIONS_H
10 #define LLD_ELF_RELOCATIONS_H
11 
12 #include "lld/Common/LLVM.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Object/ELFTypes.h"
16 #include <vector>
17 
18 namespace lld::elf {
19 class Symbol;
20 class InputSection;
21 class InputSectionBase;
22 class OutputSection;
23 class SectionBase;
24 
25 // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
26 using RelType = uint32_t;
27 using JumpModType = uint32_t;
28 
29 // List of target-independent relocation types. Relocations read
30 // from files are converted to these types so that the main code
31 // doesn't have to know about architecture-specific details.
32 enum RelExpr {
33   R_ABS,
34   R_ADDEND,
35   R_DTPREL,
36   R_GOT,
37   R_GOT_OFF,
38   R_GOT_PC,
39   R_GOTONLY_PC,
40   R_GOTPLTONLY_PC,
41   R_GOTPLT,
42   R_GOTPLTREL,
43   R_GOTREL,
44   R_GOTPLT_GOTREL,
45   R_GOTPLT_PC,
46   R_NONE,
47   R_PC,
48   R_PLT,
49   R_PLT_PC,
50   R_PLT_GOTPLT,
51   R_PLT_GOTREL,
52   R_RELAX_HINT,
53   R_RELAX_GOT_PC,
54   R_RELAX_GOT_PC_NOPIC,
55   R_RELAX_TLS_GD_TO_IE,
56   R_RELAX_TLS_GD_TO_IE_ABS,
57   R_RELAX_TLS_GD_TO_IE_GOT_OFF,
58   R_RELAX_TLS_GD_TO_IE_GOTPLT,
59   R_RELAX_TLS_GD_TO_LE,
60   R_RELAX_TLS_GD_TO_LE_NEG,
61   R_RELAX_TLS_IE_TO_LE,
62   R_RELAX_TLS_LD_TO_LE,
63   R_RELAX_TLS_LD_TO_LE_ABS,
64   R_SIZE,
65   R_TPREL,
66   R_TPREL_NEG,
67   R_TLSDESC,
68   R_TLSDESC_CALL,
69   R_TLSDESC_PC,
70   R_TLSDESC_GOTPLT,
71   R_TLSGD_GOT,
72   R_TLSGD_GOTPLT,
73   R_TLSGD_PC,
74   R_TLSIE_HINT,
75   R_TLSLD_GOT,
76   R_TLSLD_GOTPLT,
77   R_TLSLD_GOT_OFF,
78   R_TLSLD_HINT,
79   R_TLSLD_PC,
80 
81   // The following is abstract relocation types used for only one target.
82   //
83   // Even though RelExpr is intended to be a target-neutral representation
84   // of a relocation type, there are some relocations whose semantics are
85   // unique to a target. Such relocation are marked with R_<TARGET_NAME>.
86   R_AARCH64_GOT_PAGE_PC,
87   R_AARCH64_GOT_PAGE,
88   R_AARCH64_PAGE_PC,
89   R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
90   R_AARCH64_TLSDESC_PAGE,
91   R_AARCH64_AUTH,
92   R_ARM_PCA,
93   R_ARM_SBREL,
94   R_MIPS_GOTREL,
95   R_MIPS_GOT_GP,
96   R_MIPS_GOT_GP_PC,
97   R_MIPS_GOT_LOCAL_PAGE,
98   R_MIPS_GOT_OFF,
99   R_MIPS_GOT_OFF32,
100   R_MIPS_TLSGD,
101   R_MIPS_TLSLD,
102   R_PPC32_PLTREL,
103   R_PPC64_CALL,
104   R_PPC64_CALL_PLT,
105   R_PPC64_RELAX_TOC,
106   R_PPC64_TOCBASE,
107   R_PPC64_RELAX_GOT_PC,
108   R_RISCV_ADD,
109   R_RISCV_LEB128,
110   R_RISCV_PC_INDIRECT,
111   // Same as R_PC but with page-aligned semantics.
112   R_LOONGARCH_PAGE_PC,
113   // Same as R_PLT_PC but with page-aligned semantics.
114   R_LOONGARCH_PLT_PAGE_PC,
115   // In addition to having page-aligned semantics, LoongArch GOT relocs are
116   // also reused for TLS, making the semantics differ from other architectures.
117   R_LOONGARCH_GOT,
118   R_LOONGARCH_GOT_PAGE_PC,
119   R_LOONGARCH_TLSGD_PAGE_PC,
120   R_LOONGARCH_TLSDESC_PAGE_PC,
121 };
122 
123 // Architecture-neutral representation of relocation.
124 struct Relocation {
125   RelExpr expr;
126   RelType type;
127   uint64_t offset;
128   int64_t addend;
129   Symbol *sym;
130 };
131 
132 // Manipulate jump instructions with these modifiers.  These are used to relax
133 // jump instruction opcodes at basic block boundaries and are particularly
134 // useful when basic block sections are enabled.
135 struct JumpInstrMod {
136   uint64_t offset;
137   JumpModType original;
138   unsigned size;
139 };
140 
141 // This function writes undefined symbol diagnostics to an internal buffer.
142 // Call reportUndefinedSymbols() after calling scanRelocations() to emit
143 // the diagnostics.
144 template <class ELFT> void scanRelocations();
145 template <class ELFT> void checkNoCrossRefs();
146 void reportUndefinedSymbols();
147 void postScanRelocations();
148 void addGotEntry(Symbol &sym);
149 
150 void hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections);
151 bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
152 
153 class ThunkSection;
154 class Thunk;
155 class InputSectionDescription;
156 
157 class ThunkCreator {
158 public:
159   // Return true if Thunks have been added to OutputSections
160   bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
161 
162 private:
163   void mergeThunks(ArrayRef<OutputSection *> outputSections);
164 
165   ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
166                                InputSectionDescription *isd,
167                                const Relocation &rel, uint64_t src);
168 
169   ThunkSection *getISThunkSec(InputSection *isec);
170 
171   void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
172 
173   std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
174                                     uint64_t src);
175 
176   ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
177                                 uint64_t off);
178 
179   bool normalizeExistingThunk(Relocation &rel, uint64_t src);
180 
181   // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
182   // is represented as a (section, offset) pair. There may be multiple
183   // relocations sharing the same (section, offset + addend) pair. We may revert
184   // a relocation back to its original non-Thunk target, and restore the
185   // original addend, so we cannot fold offset + addend. A nested pair is used
186   // because DenseMapInfo is not specialized for std::tuple.
187   llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
188                  std::vector<Thunk *>>
189       thunkedSymbolsBySectionAndAddend;
190   llvm::DenseMap<std::pair<Symbol *, int64_t>, std::vector<Thunk *>>
191       thunkedSymbols;
192 
193   // Find a Thunk from the Thunks symbol definition, we can use this to find
194   // the Thunk from a relocation to the Thunks symbol definition.
195   llvm::DenseMap<Symbol *, Thunk *> thunks;
196 
197   // Track InputSections that have an inline ThunkSection placed in front
198   // an inline ThunkSection may have control fall through to the section below
199   // so we need to make sure that there is only one of them.
200   // The Mips LA25 Thunk is an example of an inline ThunkSection.
201   llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
202 
203   // The number of completed passes of createThunks this permits us
204   // to do one time initialization on Pass 0 and put a limit on the
205   // number of times it can be called to prevent infinite loops.
206   uint32_t pass = 0;
207 };
208 
209 // Decode LEB128 without error checking. Only used by performance critical code
210 // like RelocsCrel.
211 inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
212   uint64_t acc = 0, shift = 0, byte;
213   do {
214     byte = *p++;
215     acc |= (byte - 128 * (byte >= leb)) << shift;
216     shift += 7;
217   } while (byte >= 128);
218   return acc;
219 }
220 inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); }
221 inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); }
222 
223 // This class implements a CREL iterator that does not allocate extra memory.
224 template <bool is64> struct RelocsCrel {
225   using uint = std::conditional_t<is64, uint64_t, uint32_t>;
226   struct const_iterator {
227     using iterator_category = std::forward_iterator_tag;
228     using value_type = llvm::object::Elf_Crel_Impl<is64>;
229     using difference_type = ptrdiff_t;
230     using pointer = value_type *;
231     using reference = const value_type &;
232     uint32_t count;
233     uint8_t flagBits, shift;
234     const uint8_t *p;
235     llvm::object::Elf_Crel_Impl<is64> crel{};
236     const_iterator(size_t hdr, const uint8_t *p)
237         : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
238       if (count)
239         step();
240     }
241     void step() {
242       // See object::decodeCrel.
243       const uint8_t b = *p++;
244       crel.r_offset += b >> flagBits << shift;
245       if (b >= 0x80)
246         crel.r_offset +=
247             ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
248       if (b & 1)
249         crel.r_symidx += readSLEB128(p);
250       if (b & 2)
251         crel.r_type += readSLEB128(p);
252       if (b & 4 && flagBits == 3)
253         crel.r_addend += static_cast<uint>(readSLEB128(p));
254     }
255     llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
256     const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
257       return &crel;
258     }
259     // For llvm::enumerate.
260     bool operator==(const const_iterator &r) const { return count == r.count; }
261     bool operator!=(const const_iterator &r) const { return count != r.count; }
262     const_iterator &operator++() {
263       if (--count)
264         step();
265       return *this;
266     }
267     // For RelocationScanner::scanOne.
268     void operator+=(size_t n) {
269       for (; n; --n)
270         operator++();
271     }
272   };
273 
274   size_t hdr = 0;
275   const uint8_t *p = nullptr;
276 
277   constexpr RelocsCrel() = default;
278   RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
279   size_t size() const { return hdr / 8; }
280   const_iterator begin() const { return {hdr, p}; }
281   const_iterator end() const { return {0, nullptr}; }
282 };
283 
284 template <class RelTy> struct Relocs : ArrayRef<RelTy> {
285   Relocs() = default;
286   Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
287 };
288 
289 template <bool is64>
290 struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
291   using RelocsCrel<is64>::RelocsCrel;
292 };
293 
294 // Return a int64_t to make sure we get the sign extension out of the way as
295 // early as possible.
296 template <class ELFT>
297 static inline int64_t getAddend(const typename ELFT::Rel &rel) {
298   return 0;
299 }
300 template <class ELFT>
301 static inline int64_t getAddend(const typename ELFT::Rela &rel) {
302   return rel.r_addend;
303 }
304 template <class ELFT>
305 static inline int64_t getAddend(const typename ELFT::Crel &rel) {
306   return rel.r_addend;
307 }
308 
309 template <typename RelTy>
310 inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
311                               SmallVector<RelTy, 0> &storage) {
312   auto cmp = [](const RelTy &a, const RelTy &b) {
313     return a.r_offset < b.r_offset;
314   };
315   if (!llvm::is_sorted(rels, cmp)) {
316     storage.assign(rels.begin(), rels.end());
317     llvm::stable_sort(storage, cmp);
318     rels = Relocs<RelTy>(storage);
319   }
320   return rels;
321 }
322 
323 template <bool is64>
324 inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
325 sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
326          SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
327   return {};
328 }
329 
330 // Returns true if Expr refers a GOT entry. Note that this function returns
331 // false for TLS variables even though they need GOT, because TLS variables uses
332 // GOT differently than the regular variables.
333 bool needsGot(RelExpr expr);
334 } // namespace lld::elf
335 
336 #endif
337