10b57cec5SDimitry Andric //===- Relocations.h -------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #ifndef LLD_ELF_RELOCATIONS_H
100b57cec5SDimitry Andric #define LLD_ELF_RELOCATIONS_H
110b57cec5SDimitry Andric
120b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
130b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
1404eeddc0SDimitry Andric #include "llvm/ADT/STLExtras.h"
15*52418fc2SDimitry Andric #include "llvm/Object/ELFTypes.h"
160b57cec5SDimitry Andric #include <vector>
170b57cec5SDimitry Andric
18bdd1243dSDimitry Andric namespace lld::elf {
190b57cec5SDimitry Andric class Symbol;
200b57cec5SDimitry Andric class InputSection;
210b57cec5SDimitry Andric class InputSectionBase;
220b57cec5SDimitry Andric class OutputSection;
230b57cec5SDimitry Andric class SectionBase;
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL.
260b57cec5SDimitry Andric using RelType = uint32_t;
275ffd83dbSDimitry Andric using JumpModType = uint32_t;
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric // List of target-independent relocation types. Relocations read
300b57cec5SDimitry Andric // from files are converted to these types so that the main code
310b57cec5SDimitry Andric // doesn't have to know about architecture-specific details.
320b57cec5SDimitry Andric enum RelExpr {
330b57cec5SDimitry Andric R_ABS,
340b57cec5SDimitry Andric R_ADDEND,
350b57cec5SDimitry Andric R_DTPREL,
360b57cec5SDimitry Andric R_GOT,
370b57cec5SDimitry Andric R_GOT_OFF,
380b57cec5SDimitry Andric R_GOT_PC,
390b57cec5SDimitry Andric R_GOTONLY_PC,
400b57cec5SDimitry Andric R_GOTPLTONLY_PC,
410b57cec5SDimitry Andric R_GOTPLT,
420b57cec5SDimitry Andric R_GOTPLTREL,
430b57cec5SDimitry Andric R_GOTREL,
4474626c16SDimitry Andric R_GOTPLT_GOTREL,
4574626c16SDimitry Andric R_GOTPLT_PC,
460b57cec5SDimitry Andric R_NONE,
470b57cec5SDimitry Andric R_PC,
480b57cec5SDimitry Andric R_PLT,
490b57cec5SDimitry Andric R_PLT_PC,
50349cc55cSDimitry Andric R_PLT_GOTPLT,
5174626c16SDimitry Andric R_PLT_GOTREL,
52753f127fSDimitry Andric R_RELAX_HINT,
530b57cec5SDimitry Andric R_RELAX_GOT_PC,
540b57cec5SDimitry Andric R_RELAX_GOT_PC_NOPIC,
550b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_IE,
560b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_IE_ABS,
570b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_IE_GOT_OFF,
580b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_IE_GOTPLT,
590b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_LE,
600b57cec5SDimitry Andric R_RELAX_TLS_GD_TO_LE_NEG,
610b57cec5SDimitry Andric R_RELAX_TLS_IE_TO_LE,
620b57cec5SDimitry Andric R_RELAX_TLS_LD_TO_LE,
630b57cec5SDimitry Andric R_RELAX_TLS_LD_TO_LE_ABS,
640b57cec5SDimitry Andric R_SIZE,
65e8d8bef9SDimitry Andric R_TPREL,
66e8d8bef9SDimitry Andric R_TPREL_NEG,
670b57cec5SDimitry Andric R_TLSDESC,
680b57cec5SDimitry Andric R_TLSDESC_CALL,
690b57cec5SDimitry Andric R_TLSDESC_PC,
70349cc55cSDimitry Andric R_TLSDESC_GOTPLT,
710b57cec5SDimitry Andric R_TLSGD_GOT,
720b57cec5SDimitry Andric R_TLSGD_GOTPLT,
730b57cec5SDimitry Andric R_TLSGD_PC,
740b57cec5SDimitry Andric R_TLSIE_HINT,
750b57cec5SDimitry Andric R_TLSLD_GOT,
760b57cec5SDimitry Andric R_TLSLD_GOTPLT,
770b57cec5SDimitry Andric R_TLSLD_GOT_OFF,
780b57cec5SDimitry Andric R_TLSLD_HINT,
790b57cec5SDimitry Andric R_TLSLD_PC,
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric // The following is abstract relocation types used for only one target.
820b57cec5SDimitry Andric //
830b57cec5SDimitry Andric // Even though RelExpr is intended to be a target-neutral representation
840b57cec5SDimitry Andric // of a relocation type, there are some relocations whose semantics are
850b57cec5SDimitry Andric // unique to a target. Such relocation are marked with R_<TARGET_NAME>.
860b57cec5SDimitry Andric R_AARCH64_GOT_PAGE_PC,
87e8d8bef9SDimitry Andric R_AARCH64_GOT_PAGE,
880b57cec5SDimitry Andric R_AARCH64_PAGE_PC,
890b57cec5SDimitry Andric R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
900b57cec5SDimitry Andric R_AARCH64_TLSDESC_PAGE,
910fca6ea1SDimitry Andric R_AARCH64_AUTH,
925ffd83dbSDimitry Andric R_ARM_PCA,
930b57cec5SDimitry Andric R_ARM_SBREL,
940b57cec5SDimitry Andric R_MIPS_GOTREL,
950b57cec5SDimitry Andric R_MIPS_GOT_GP,
960b57cec5SDimitry Andric R_MIPS_GOT_GP_PC,
970b57cec5SDimitry Andric R_MIPS_GOT_LOCAL_PAGE,
980b57cec5SDimitry Andric R_MIPS_GOT_OFF,
990b57cec5SDimitry Andric R_MIPS_GOT_OFF32,
1000b57cec5SDimitry Andric R_MIPS_TLSGD,
1010b57cec5SDimitry Andric R_MIPS_TLSLD,
1020b57cec5SDimitry Andric R_PPC32_PLTREL,
1030b57cec5SDimitry Andric R_PPC64_CALL,
1040b57cec5SDimitry Andric R_PPC64_CALL_PLT,
1050b57cec5SDimitry Andric R_PPC64_RELAX_TOC,
1060b57cec5SDimitry Andric R_PPC64_TOCBASE,
107e8d8bef9SDimitry Andric R_PPC64_RELAX_GOT_PC,
1080b57cec5SDimitry Andric R_RISCV_ADD,
1095f757f3fSDimitry Andric R_RISCV_LEB128,
1100b57cec5SDimitry Andric R_RISCV_PC_INDIRECT,
11106c3fb27SDimitry Andric // Same as R_PC but with page-aligned semantics.
11206c3fb27SDimitry Andric R_LOONGARCH_PAGE_PC,
11306c3fb27SDimitry Andric // Same as R_PLT_PC but with page-aligned semantics.
11406c3fb27SDimitry Andric R_LOONGARCH_PLT_PAGE_PC,
11506c3fb27SDimitry Andric // In addition to having page-aligned semantics, LoongArch GOT relocs are
11606c3fb27SDimitry Andric // also reused for TLS, making the semantics differ from other architectures.
11706c3fb27SDimitry Andric R_LOONGARCH_GOT,
11806c3fb27SDimitry Andric R_LOONGARCH_GOT_PAGE_PC,
11906c3fb27SDimitry Andric R_LOONGARCH_TLSGD_PAGE_PC,
1200fca6ea1SDimitry Andric R_LOONGARCH_TLSDESC_PAGE_PC,
1210b57cec5SDimitry Andric };
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric // Architecture-neutral representation of relocation.
1240b57cec5SDimitry Andric struct Relocation {
1250b57cec5SDimitry Andric RelExpr expr;
1260b57cec5SDimitry Andric RelType type;
1270b57cec5SDimitry Andric uint64_t offset;
1280b57cec5SDimitry Andric int64_t addend;
1290b57cec5SDimitry Andric Symbol *sym;
1300b57cec5SDimitry Andric };
1310b57cec5SDimitry Andric
1325ffd83dbSDimitry Andric // Manipulate jump instructions with these modifiers. These are used to relax
1335ffd83dbSDimitry Andric // jump instruction opcodes at basic block boundaries and are particularly
1345ffd83dbSDimitry Andric // useful when basic block sections are enabled.
1355ffd83dbSDimitry Andric struct JumpInstrMod {
1365ffd83dbSDimitry Andric uint64_t offset;
13704eeddc0SDimitry Andric JumpModType original;
1385ffd83dbSDimitry Andric unsigned size;
1395ffd83dbSDimitry Andric };
1405ffd83dbSDimitry Andric
1410b57cec5SDimitry Andric // This function writes undefined symbol diagnostics to an internal buffer.
1420b57cec5SDimitry Andric // Call reportUndefinedSymbols() after calling scanRelocations() to emit
1430b57cec5SDimitry Andric // the diagnostics.
144bdd1243dSDimitry Andric template <class ELFT> void scanRelocations();
1450fca6ea1SDimitry Andric template <class ELFT> void checkNoCrossRefs();
14681ad6265SDimitry Andric void reportUndefinedSymbols();
1470eae32dcSDimitry Andric void postScanRelocations();
1485f757f3fSDimitry Andric void addGotEntry(Symbol &sym);
1490b57cec5SDimitry Andric
1505ffd83dbSDimitry Andric void hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections);
1515ffd83dbSDimitry Andric bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections);
1525ffd83dbSDimitry Andric
1530b57cec5SDimitry Andric class ThunkSection;
1540b57cec5SDimitry Andric class Thunk;
155e8d8bef9SDimitry Andric class InputSectionDescription;
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric class ThunkCreator {
1580b57cec5SDimitry Andric public:
1590b57cec5SDimitry Andric // Return true if Thunks have been added to OutputSections
160753f127fSDimitry Andric bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections);
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric private:
1630b57cec5SDimitry Andric void mergeThunks(ArrayRef<OutputSection *> outputSections);
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec,
166fe6060f1SDimitry Andric InputSectionDescription *isd,
167fe6060f1SDimitry Andric const Relocation &rel, uint64_t src);
1680b57cec5SDimitry Andric
1690b57cec5SDimitry Andric ThunkSection *getISThunkSec(InputSection *isec);
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric void createInitialThunkSections(ArrayRef<OutputSection *> outputSections);
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andric std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
1740b57cec5SDimitry Andric uint64_t src);
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
1770b57cec5SDimitry Andric uint64_t off);
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric bool normalizeExistingThunk(Relocation &rel, uint64_t src);
1800b57cec5SDimitry Andric
181480093f4SDimitry Andric // Record all the available Thunks for a (Symbol, addend) pair, where Symbol
182480093f4SDimitry Andric // is represented as a (section, offset) pair. There may be multiple
183480093f4SDimitry Andric // relocations sharing the same (section, offset + addend) pair. We may revert
184480093f4SDimitry Andric // a relocation back to its original non-Thunk target, and restore the
185480093f4SDimitry Andric // original addend, so we cannot fold offset + addend. A nested pair is used
186480093f4SDimitry Andric // because DenseMapInfo is not specialized for std::tuple.
187480093f4SDimitry Andric llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
188480093f4SDimitry Andric std::vector<Thunk *>>
189480093f4SDimitry Andric thunkedSymbolsBySectionAndAddend;
190480093f4SDimitry Andric llvm::DenseMap<std::pair<Symbol *, int64_t>, std::vector<Thunk *>>
191480093f4SDimitry Andric thunkedSymbols;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric // Find a Thunk from the Thunks symbol definition, we can use this to find
1940b57cec5SDimitry Andric // the Thunk from a relocation to the Thunks symbol definition.
1950b57cec5SDimitry Andric llvm::DenseMap<Symbol *, Thunk *> thunks;
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric // Track InputSections that have an inline ThunkSection placed in front
1980b57cec5SDimitry Andric // an inline ThunkSection may have control fall through to the section below
1990b57cec5SDimitry Andric // so we need to make sure that there is only one of them.
2000b57cec5SDimitry Andric // The Mips LA25 Thunk is an example of an inline ThunkSection.
2010b57cec5SDimitry Andric llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;
202753f127fSDimitry Andric
203753f127fSDimitry Andric // The number of completed passes of createThunks this permits us
204753f127fSDimitry Andric // to do one time initialization on Pass 0 and put a limit on the
205753f127fSDimitry Andric // number of times it can be called to prevent infinite loops.
206753f127fSDimitry Andric uint32_t pass = 0;
2070b57cec5SDimitry Andric };
2080b57cec5SDimitry Andric
209*52418fc2SDimitry Andric // Decode LEB128 without error checking. Only used by performance critical code
210*52418fc2SDimitry Andric // like RelocsCrel.
readLEB128(const uint8_t * & p,uint64_t leb)211*52418fc2SDimitry Andric inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) {
212*52418fc2SDimitry Andric uint64_t acc = 0, shift = 0, byte;
213*52418fc2SDimitry Andric do {
214*52418fc2SDimitry Andric byte = *p++;
215*52418fc2SDimitry Andric acc |= (byte - 128 * (byte >= leb)) << shift;
216*52418fc2SDimitry Andric shift += 7;
217*52418fc2SDimitry Andric } while (byte >= 128);
218*52418fc2SDimitry Andric return acc;
219*52418fc2SDimitry Andric }
readULEB128(const uint8_t * & p)220*52418fc2SDimitry Andric inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); }
readSLEB128(const uint8_t * & p)221*52418fc2SDimitry Andric inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); }
222*52418fc2SDimitry Andric
223*52418fc2SDimitry Andric // This class implements a CREL iterator that does not allocate extra memory.
224*52418fc2SDimitry Andric template <bool is64> struct RelocsCrel {
225*52418fc2SDimitry Andric using uint = std::conditional_t<is64, uint64_t, uint32_t>;
226*52418fc2SDimitry Andric struct const_iterator {
227*52418fc2SDimitry Andric using iterator_category = std::forward_iterator_tag;
228*52418fc2SDimitry Andric using value_type = llvm::object::Elf_Crel_Impl<is64>;
229*52418fc2SDimitry Andric using difference_type = ptrdiff_t;
230*52418fc2SDimitry Andric using pointer = value_type *;
231*52418fc2SDimitry Andric using reference = const value_type &;
232*52418fc2SDimitry Andric uint32_t count;
233*52418fc2SDimitry Andric uint8_t flagBits, shift;
234*52418fc2SDimitry Andric const uint8_t *p;
235*52418fc2SDimitry Andric llvm::object::Elf_Crel_Impl<is64> crel{};
const_iteratorRelocsCrel::const_iterator236*52418fc2SDimitry Andric const_iterator(size_t hdr, const uint8_t *p)
237*52418fc2SDimitry Andric : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) {
238*52418fc2SDimitry Andric if (count)
239*52418fc2SDimitry Andric step();
240*52418fc2SDimitry Andric }
stepRelocsCrel::const_iterator241*52418fc2SDimitry Andric void step() {
242*52418fc2SDimitry Andric // See object::decodeCrel.
243*52418fc2SDimitry Andric const uint8_t b = *p++;
244*52418fc2SDimitry Andric crel.r_offset += b >> flagBits << shift;
245*52418fc2SDimitry Andric if (b >= 0x80)
246*52418fc2SDimitry Andric crel.r_offset +=
247*52418fc2SDimitry Andric ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift;
248*52418fc2SDimitry Andric if (b & 1)
249*52418fc2SDimitry Andric crel.r_symidx += readSLEB128(p);
250*52418fc2SDimitry Andric if (b & 2)
251*52418fc2SDimitry Andric crel.r_type += readSLEB128(p);
252*52418fc2SDimitry Andric if (b & 4 && flagBits == 3)
253*52418fc2SDimitry Andric crel.r_addend += static_cast<uint>(readSLEB128(p));
254*52418fc2SDimitry Andric }
255*52418fc2SDimitry Andric llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; };
256*52418fc2SDimitry Andric const llvm::object::Elf_Crel_Impl<is64> *operator->() const {
257*52418fc2SDimitry Andric return &crel;
258*52418fc2SDimitry Andric }
259*52418fc2SDimitry Andric // For llvm::enumerate.
260*52418fc2SDimitry Andric bool operator==(const const_iterator &r) const { return count == r.count; }
261*52418fc2SDimitry Andric bool operator!=(const const_iterator &r) const { return count != r.count; }
262*52418fc2SDimitry Andric const_iterator &operator++() {
263*52418fc2SDimitry Andric if (--count)
264*52418fc2SDimitry Andric step();
265*52418fc2SDimitry Andric return *this;
266*52418fc2SDimitry Andric }
267*52418fc2SDimitry Andric // For RelocationScanner::scanOne.
268*52418fc2SDimitry Andric void operator+=(size_t n) {
269*52418fc2SDimitry Andric for (; n; --n)
270*52418fc2SDimitry Andric operator++();
271*52418fc2SDimitry Andric }
272*52418fc2SDimitry Andric };
273*52418fc2SDimitry Andric
274*52418fc2SDimitry Andric size_t hdr = 0;
275*52418fc2SDimitry Andric const uint8_t *p = nullptr;
276*52418fc2SDimitry Andric
277*52418fc2SDimitry Andric constexpr RelocsCrel() = default;
RelocsCrelRelocsCrel278*52418fc2SDimitry Andric RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; }
sizeRelocsCrel279*52418fc2SDimitry Andric size_t size() const { return hdr / 8; }
beginRelocsCrel280*52418fc2SDimitry Andric const_iterator begin() const { return {hdr, p}; }
endRelocsCrel281*52418fc2SDimitry Andric const_iterator end() const { return {0, nullptr}; }
282*52418fc2SDimitry Andric };
283*52418fc2SDimitry Andric
284*52418fc2SDimitry Andric template <class RelTy> struct Relocs : ArrayRef<RelTy> {
285*52418fc2SDimitry Andric Relocs() = default;
RelocsRelocs286*52418fc2SDimitry Andric Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {}
287*52418fc2SDimitry Andric };
288*52418fc2SDimitry Andric
289*52418fc2SDimitry Andric template <bool is64>
290*52418fc2SDimitry Andric struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> {
291*52418fc2SDimitry Andric using RelocsCrel<is64>::RelocsCrel;
292*52418fc2SDimitry Andric };
293*52418fc2SDimitry Andric
2940b57cec5SDimitry Andric // Return a int64_t to make sure we get the sign extension out of the way as
2950b57cec5SDimitry Andric // early as possible.
2960b57cec5SDimitry Andric template <class ELFT>
2970b57cec5SDimitry Andric static inline int64_t getAddend(const typename ELFT::Rel &rel) {
2980b57cec5SDimitry Andric return 0;
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric template <class ELFT>
3010b57cec5SDimitry Andric static inline int64_t getAddend(const typename ELFT::Rela &rel) {
3020b57cec5SDimitry Andric return rel.r_addend;
3030b57cec5SDimitry Andric }
304*52418fc2SDimitry Andric template <class ELFT>
305*52418fc2SDimitry Andric static inline int64_t getAddend(const typename ELFT::Crel &rel) {
306*52418fc2SDimitry Andric return rel.r_addend;
307*52418fc2SDimitry Andric }
308fe6060f1SDimitry Andric
309fe6060f1SDimitry Andric template <typename RelTy>
310*52418fc2SDimitry Andric inline Relocs<RelTy> sortRels(Relocs<RelTy> rels,
311*52418fc2SDimitry Andric SmallVector<RelTy, 0> &storage) {
312fe6060f1SDimitry Andric auto cmp = [](const RelTy &a, const RelTy &b) {
313fe6060f1SDimitry Andric return a.r_offset < b.r_offset;
314fe6060f1SDimitry Andric };
315fe6060f1SDimitry Andric if (!llvm::is_sorted(rels, cmp)) {
316fe6060f1SDimitry Andric storage.assign(rels.begin(), rels.end());
317fe6060f1SDimitry Andric llvm::stable_sort(storage, cmp);
318*52418fc2SDimitry Andric rels = Relocs<RelTy>(storage);
319fe6060f1SDimitry Andric }
320fe6060f1SDimitry Andric return rels;
321fe6060f1SDimitry Andric }
3225f757f3fSDimitry Andric
323*52418fc2SDimitry Andric template <bool is64>
324*52418fc2SDimitry Andric inline Relocs<llvm::object::Elf_Crel_Impl<is64>>
325*52418fc2SDimitry Andric sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels,
326*52418fc2SDimitry Andric SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) {
327*52418fc2SDimitry Andric return {};
328*52418fc2SDimitry Andric }
329*52418fc2SDimitry Andric
3305f757f3fSDimitry Andric // Returns true if Expr refers a GOT entry. Note that this function returns
3315f757f3fSDimitry Andric // false for TLS variables even though they need GOT, because TLS variables uses
3325f757f3fSDimitry Andric // GOT differently than the regular variables.
3335f757f3fSDimitry Andric bool needsGot(RelExpr expr);
334bdd1243dSDimitry Andric } // namespace lld::elf
3350b57cec5SDimitry Andric
3360b57cec5SDimitry Andric #endif
337