xref: /freebsd/contrib/llvm-project/lld/MachO/Symbols.h (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H
105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H
115ffd83dbSDimitry Andric 
125ffd83dbSDimitry Andric #include "InputSection.h"
135ffd83dbSDimitry Andric #include "Target.h"
145ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h"
155ffd83dbSDimitry Andric #include "lld/Common/Strings.h"
165ffd83dbSDimitry Andric #include "llvm/Object/Archive.h"
17*e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h"
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric namespace lld {
205ffd83dbSDimitry Andric namespace macho {
215ffd83dbSDimitry Andric 
225ffd83dbSDimitry Andric class InputSection;
23*e8d8bef9SDimitry Andric class MachHeaderSection;
245ffd83dbSDimitry Andric class DylibFile;
255ffd83dbSDimitry Andric class ArchiveFile;
265ffd83dbSDimitry Andric 
275ffd83dbSDimitry Andric struct StringRefZ {
285ffd83dbSDimitry Andric   StringRefZ(const char *s) : data(s), size(-1) {}
295ffd83dbSDimitry Andric   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
305ffd83dbSDimitry Andric 
315ffd83dbSDimitry Andric   const char *data;
325ffd83dbSDimitry Andric   const uint32_t size;
335ffd83dbSDimitry Andric };
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric class Symbol {
365ffd83dbSDimitry Andric public:
375ffd83dbSDimitry Andric   enum Kind {
385ffd83dbSDimitry Andric     DefinedKind,
395ffd83dbSDimitry Andric     UndefinedKind,
40*e8d8bef9SDimitry Andric     CommonKind,
415ffd83dbSDimitry Andric     DylibKind,
425ffd83dbSDimitry Andric     LazyKind,
43*e8d8bef9SDimitry Andric     DSOHandleKind,
445ffd83dbSDimitry Andric   };
455ffd83dbSDimitry Andric 
46*e8d8bef9SDimitry Andric   virtual ~Symbol() {}
47*e8d8bef9SDimitry Andric 
485ffd83dbSDimitry Andric   Kind kind() const { return static_cast<Kind>(symbolKind); }
495ffd83dbSDimitry Andric 
50*e8d8bef9SDimitry Andric   StringRef getName() const {
51*e8d8bef9SDimitry Andric     if (nameSize == (uint32_t)-1)
52*e8d8bef9SDimitry Andric       nameSize = strlen(nameData);
53*e8d8bef9SDimitry Andric     return {nameData, nameSize};
54*e8d8bef9SDimitry Andric   }
555ffd83dbSDimitry Andric 
56*e8d8bef9SDimitry Andric   virtual uint64_t getVA() const { return 0; }
575ffd83dbSDimitry Andric 
58*e8d8bef9SDimitry Andric   virtual uint64_t getFileOffset() const {
59*e8d8bef9SDimitry Andric     llvm_unreachable("attempt to get an offset from a non-defined symbol");
60*e8d8bef9SDimitry Andric   }
615ffd83dbSDimitry Andric 
62*e8d8bef9SDimitry Andric   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
63*e8d8bef9SDimitry Andric 
64*e8d8bef9SDimitry Andric   // Only undefined or dylib symbols can be weak references. A weak reference
65*e8d8bef9SDimitry Andric   // need not be satisfied at runtime, e.g. due to the symbol not being
66*e8d8bef9SDimitry Andric   // available on a given target platform.
67*e8d8bef9SDimitry Andric   virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
68*e8d8bef9SDimitry Andric 
69*e8d8bef9SDimitry Andric   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
70*e8d8bef9SDimitry Andric 
71*e8d8bef9SDimitry Andric   // Whether this symbol is in the GOT or TLVPointer sections.
72*e8d8bef9SDimitry Andric   bool isInGot() const { return gotIndex != UINT32_MAX; }
73*e8d8bef9SDimitry Andric 
74*e8d8bef9SDimitry Andric   // Whether this symbol is in the StubsSection.
75*e8d8bef9SDimitry Andric   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
76*e8d8bef9SDimitry Andric 
77*e8d8bef9SDimitry Andric   // The index of this symbol in the GOT or the TLVPointer section, depending
78*e8d8bef9SDimitry Andric   // on whether it is a thread-local. A given symbol cannot be referenced by
79*e8d8bef9SDimitry Andric   // both these sections at once.
805ffd83dbSDimitry Andric   uint32_t gotIndex = UINT32_MAX;
815ffd83dbSDimitry Andric 
82*e8d8bef9SDimitry Andric   uint32_t stubsIndex = UINT32_MAX;
83*e8d8bef9SDimitry Andric 
84*e8d8bef9SDimitry Andric   uint32_t symtabIndex = UINT32_MAX;
85*e8d8bef9SDimitry Andric 
865ffd83dbSDimitry Andric protected:
87*e8d8bef9SDimitry Andric   Symbol(Kind k, StringRefZ name)
88*e8d8bef9SDimitry Andric       : symbolKind(k), nameData(name.data), nameSize(name.size) {}
895ffd83dbSDimitry Andric 
905ffd83dbSDimitry Andric   Kind symbolKind;
91*e8d8bef9SDimitry Andric   const char *nameData;
92*e8d8bef9SDimitry Andric   mutable uint32_t nameSize;
935ffd83dbSDimitry Andric };
945ffd83dbSDimitry Andric 
955ffd83dbSDimitry Andric class Defined : public Symbol {
965ffd83dbSDimitry Andric public:
97*e8d8bef9SDimitry Andric   Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef,
98*e8d8bef9SDimitry Andric           bool isExternal, bool isPrivateExtern)
99*e8d8bef9SDimitry Andric       : Symbol(DefinedKind, name), isec(isec), value(value),
100*e8d8bef9SDimitry Andric         overridesWeakDef(false), privateExtern(isPrivateExtern),
101*e8d8bef9SDimitry Andric         weakDef(isWeakDef), external(isExternal) {}
102*e8d8bef9SDimitry Andric 
103*e8d8bef9SDimitry Andric   bool isWeakDef() const override { return weakDef; }
104*e8d8bef9SDimitry Andric   bool isExternalWeakDef() const {
105*e8d8bef9SDimitry Andric     return isWeakDef() && isExternal() && !privateExtern;
106*e8d8bef9SDimitry Andric   }
107*e8d8bef9SDimitry Andric   bool isTlv() const override {
108*e8d8bef9SDimitry Andric     return !isAbsolute() && isThreadLocalVariables(isec->flags);
109*e8d8bef9SDimitry Andric   }
110*e8d8bef9SDimitry Andric 
111*e8d8bef9SDimitry Andric   bool isExternal() const { return external; }
112*e8d8bef9SDimitry Andric   bool isAbsolute() const { return isec == nullptr; }
113*e8d8bef9SDimitry Andric 
114*e8d8bef9SDimitry Andric   uint64_t getVA() const override;
115*e8d8bef9SDimitry Andric   uint64_t getFileOffset() const override;
116*e8d8bef9SDimitry Andric 
117*e8d8bef9SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric   InputSection *isec;
1205ffd83dbSDimitry Andric   uint32_t value;
1215ffd83dbSDimitry Andric 
122*e8d8bef9SDimitry Andric   bool overridesWeakDef : 1;
123*e8d8bef9SDimitry Andric   bool privateExtern : 1;
124*e8d8bef9SDimitry Andric 
125*e8d8bef9SDimitry Andric private:
126*e8d8bef9SDimitry Andric   const bool weakDef : 1;
127*e8d8bef9SDimitry Andric   const bool external : 1;
1285ffd83dbSDimitry Andric };
1295ffd83dbSDimitry Andric 
130*e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how
131*e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
132*e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak
133*e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as
134*e8d8bef9SDimitry Andric // strongly-referenced.
135*e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
136*e8d8bef9SDimitry Andric 
1375ffd83dbSDimitry Andric class Undefined : public Symbol {
1385ffd83dbSDimitry Andric public:
139*e8d8bef9SDimitry Andric   Undefined(StringRefZ name, RefState refState)
140*e8d8bef9SDimitry Andric       : Symbol(UndefinedKind, name), refState(refState) {
141*e8d8bef9SDimitry Andric     assert(refState != RefState::Unreferenced);
142*e8d8bef9SDimitry Andric   }
143*e8d8bef9SDimitry Andric 
144*e8d8bef9SDimitry Andric   bool isWeakRef() const override { return refState == RefState::Weak; }
1455ffd83dbSDimitry Andric 
1465ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
147*e8d8bef9SDimitry Andric 
148*e8d8bef9SDimitry Andric   RefState refState : 2;
149*e8d8bef9SDimitry Andric };
150*e8d8bef9SDimitry Andric 
151*e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without
152*e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are
153*e8d8bef9SDimitry Andric // called tentative definitions.
154*e8d8bef9SDimitry Andric //
155*e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should
156*e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files.
157*e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support
158*e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case.
159*e8d8bef9SDimitry Andric //
160*e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions.
161*e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular
162*e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang
163*e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining
164*e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them
165*e8d8bef9SDimitry Andric // to regular defined symbols in a __common section.
166*e8d8bef9SDimitry Andric class CommonSymbol : public Symbol {
167*e8d8bef9SDimitry Andric public:
168*e8d8bef9SDimitry Andric   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
169*e8d8bef9SDimitry Andric                bool isPrivateExtern)
170*e8d8bef9SDimitry Andric       : Symbol(CommonKind, name), file(file), size(size),
171*e8d8bef9SDimitry Andric         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
172*e8d8bef9SDimitry Andric         privateExtern(isPrivateExtern) {
173*e8d8bef9SDimitry Andric     // TODO: cap maximum alignment
174*e8d8bef9SDimitry Andric   }
175*e8d8bef9SDimitry Andric 
176*e8d8bef9SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
177*e8d8bef9SDimitry Andric 
178*e8d8bef9SDimitry Andric   InputFile *const file;
179*e8d8bef9SDimitry Andric   const uint64_t size;
180*e8d8bef9SDimitry Andric   const uint32_t align;
181*e8d8bef9SDimitry Andric   const bool privateExtern;
1825ffd83dbSDimitry Andric };
1835ffd83dbSDimitry Andric 
1845ffd83dbSDimitry Andric class DylibSymbol : public Symbol {
1855ffd83dbSDimitry Andric public:
186*e8d8bef9SDimitry Andric   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
187*e8d8bef9SDimitry Andric               RefState refState, bool isTlv)
188*e8d8bef9SDimitry Andric       : Symbol(DylibKind, name), file(file), refState(refState),
189*e8d8bef9SDimitry Andric         weakDef(isWeakDef), tlv(isTlv) {}
190*e8d8bef9SDimitry Andric 
191*e8d8bef9SDimitry Andric   bool isWeakDef() const override { return weakDef; }
192*e8d8bef9SDimitry Andric   bool isWeakRef() const override { return refState == RefState::Weak; }
193*e8d8bef9SDimitry Andric   bool isReferenced() const { return refState != RefState::Unreferenced; }
194*e8d8bef9SDimitry Andric   bool isTlv() const override { return tlv; }
195*e8d8bef9SDimitry Andric   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
1965ffd83dbSDimitry Andric 
1975ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
1985ffd83dbSDimitry Andric 
1995ffd83dbSDimitry Andric   DylibFile *file;
200*e8d8bef9SDimitry Andric   uint32_t stubsHelperIndex = UINT32_MAX;
2015ffd83dbSDimitry Andric   uint32_t lazyBindOffset = UINT32_MAX;
202*e8d8bef9SDimitry Andric 
203*e8d8bef9SDimitry Andric   RefState refState : 2;
204*e8d8bef9SDimitry Andric 
205*e8d8bef9SDimitry Andric private:
206*e8d8bef9SDimitry Andric   const bool weakDef : 1;
207*e8d8bef9SDimitry Andric   const bool tlv : 1;
2085ffd83dbSDimitry Andric };
2095ffd83dbSDimitry Andric 
2105ffd83dbSDimitry Andric class LazySymbol : public Symbol {
2115ffd83dbSDimitry Andric public:
2125ffd83dbSDimitry Andric   LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
2135ffd83dbSDimitry Andric       : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
2145ffd83dbSDimitry Andric 
2155ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
2165ffd83dbSDimitry Andric 
2175ffd83dbSDimitry Andric   void fetchArchiveMember();
2185ffd83dbSDimitry Andric 
2195ffd83dbSDimitry Andric private:
2205ffd83dbSDimitry Andric   ArchiveFile *file;
2215ffd83dbSDimitry Andric   const llvm::object::Archive::Symbol sym;
2225ffd83dbSDimitry Andric };
2235ffd83dbSDimitry Andric 
224*e8d8bef9SDimitry Andric // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which
225*e8d8bef9SDimitry Andric // does e.g. cleanup of static global variables. The ABI document says that the
226*e8d8bef9SDimitry Andric // pointer can point to any address in one of the dylib's segments, but in
227*e8d8bef9SDimitry Andric // practice ld64 seems to set it to point to the header, so that's what's
228*e8d8bef9SDimitry Andric // implemented here.
229*e8d8bef9SDimitry Andric //
230*e8d8bef9SDimitry Andric // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet
231*e8d8bef9SDimitry Andric // tested this on an ARM platform.
232*e8d8bef9SDimitry Andric //
233*e8d8bef9SDimitry Andric // DSOHandle effectively functions like a Defined symbol, but it doesn't belong
234*e8d8bef9SDimitry Andric // to an InputSection.
235*e8d8bef9SDimitry Andric class DSOHandle : public Symbol {
236*e8d8bef9SDimitry Andric public:
237*e8d8bef9SDimitry Andric   DSOHandle(const MachHeaderSection *header)
238*e8d8bef9SDimitry Andric       : Symbol(DSOHandleKind, name), header(header) {}
2395ffd83dbSDimitry Andric 
240*e8d8bef9SDimitry Andric   const MachHeaderSection *header;
241*e8d8bef9SDimitry Andric 
242*e8d8bef9SDimitry Andric   uint64_t getVA() const override;
243*e8d8bef9SDimitry Andric 
244*e8d8bef9SDimitry Andric   uint64_t getFileOffset() const override;
245*e8d8bef9SDimitry Andric 
246*e8d8bef9SDimitry Andric   bool isWeakDef() const override { return false; }
247*e8d8bef9SDimitry Andric 
248*e8d8bef9SDimitry Andric   bool isTlv() const override { return false; }
249*e8d8bef9SDimitry Andric 
250*e8d8bef9SDimitry Andric   static constexpr StringRef name = "___dso_handle";
251*e8d8bef9SDimitry Andric 
252*e8d8bef9SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; }
253*e8d8bef9SDimitry Andric };
2545ffd83dbSDimitry Andric 
2555ffd83dbSDimitry Andric union SymbolUnion {
2565ffd83dbSDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
2575ffd83dbSDimitry Andric   alignas(Undefined) char b[sizeof(Undefined)];
258*e8d8bef9SDimitry Andric   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
259*e8d8bef9SDimitry Andric   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
260*e8d8bef9SDimitry Andric   alignas(LazySymbol) char e[sizeof(LazySymbol)];
261*e8d8bef9SDimitry Andric   alignas(DSOHandle) char f[sizeof(DSOHandle)];
2625ffd83dbSDimitry Andric };
2635ffd83dbSDimitry Andric 
2645ffd83dbSDimitry Andric template <typename T, typename... ArgT>
265*e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&... arg) {
2665ffd83dbSDimitry Andric   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
2675ffd83dbSDimitry Andric   static_assert(alignof(T) <= alignof(SymbolUnion),
2685ffd83dbSDimitry Andric                 "SymbolUnion not aligned enough");
2695ffd83dbSDimitry Andric   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
2705ffd83dbSDimitry Andric          "Not a Symbol");
2715ffd83dbSDimitry Andric 
272*e8d8bef9SDimitry Andric   return new (s) T(std::forward<ArgT>(arg)...);
2735ffd83dbSDimitry Andric }
2745ffd83dbSDimitry Andric 
2755ffd83dbSDimitry Andric } // namespace macho
2765ffd83dbSDimitry Andric 
2775ffd83dbSDimitry Andric std::string toString(const macho::Symbol &);
278*e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &);
279*e8d8bef9SDimitry Andric 
2805ffd83dbSDimitry Andric } // namespace lld
2815ffd83dbSDimitry Andric 
2825ffd83dbSDimitry Andric #endif
283