xref: /freebsd/contrib/llvm-project/lld/MachO/Symbols.h (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H
105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H
115ffd83dbSDimitry Andric 
12*349cc55cSDimitry Andric #include "Config.h"
13fe6060f1SDimitry Andric #include "InputFiles.h"
145ffd83dbSDimitry Andric #include "Target.h"
155ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h"
165ffd83dbSDimitry Andric #include "lld/Common/Strings.h"
175ffd83dbSDimitry Andric #include "llvm/Object/Archive.h"
18e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h"
195ffd83dbSDimitry Andric 
205ffd83dbSDimitry Andric namespace lld {
215ffd83dbSDimitry Andric namespace macho {
225ffd83dbSDimitry Andric 
23e8d8bef9SDimitry Andric class MachHeaderSection;
245ffd83dbSDimitry Andric 
255ffd83dbSDimitry Andric struct StringRefZ {
265ffd83dbSDimitry Andric   StringRefZ(const char *s) : data(s), size(-1) {}
275ffd83dbSDimitry Andric   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric   const char *data;
305ffd83dbSDimitry Andric   const uint32_t size;
315ffd83dbSDimitry Andric };
325ffd83dbSDimitry Andric 
335ffd83dbSDimitry Andric class Symbol {
345ffd83dbSDimitry Andric public:
355ffd83dbSDimitry Andric   enum Kind {
365ffd83dbSDimitry Andric     DefinedKind,
375ffd83dbSDimitry Andric     UndefinedKind,
38e8d8bef9SDimitry Andric     CommonKind,
395ffd83dbSDimitry Andric     DylibKind,
405ffd83dbSDimitry Andric     LazyKind,
415ffd83dbSDimitry Andric   };
425ffd83dbSDimitry Andric 
43e8d8bef9SDimitry Andric   virtual ~Symbol() {}
44e8d8bef9SDimitry Andric 
45fe6060f1SDimitry Andric   Kind kind() const { return symbolKind; }
465ffd83dbSDimitry Andric 
47e8d8bef9SDimitry Andric   StringRef getName() const {
48e8d8bef9SDimitry Andric     if (nameSize == (uint32_t)-1)
49e8d8bef9SDimitry Andric       nameSize = strlen(nameData);
50e8d8bef9SDimitry Andric     return {nameData, nameSize};
51e8d8bef9SDimitry Andric   }
525ffd83dbSDimitry Andric 
53*349cc55cSDimitry Andric   bool isLive() const { return used; }
545ffd83dbSDimitry Andric 
55fe6060f1SDimitry Andric   virtual uint64_t getVA() const { return 0; }
565ffd83dbSDimitry Andric 
57e8d8bef9SDimitry Andric   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric   // Only undefined or dylib symbols can be weak references. A weak reference
60e8d8bef9SDimitry Andric   // need not be satisfied at runtime, e.g. due to the symbol not being
61e8d8bef9SDimitry Andric   // available on a given target platform.
62e8d8bef9SDimitry Andric   virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
63e8d8bef9SDimitry Andric 
64e8d8bef9SDimitry Andric   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
65e8d8bef9SDimitry Andric 
66e8d8bef9SDimitry Andric   // Whether this symbol is in the GOT or TLVPointer sections.
67e8d8bef9SDimitry Andric   bool isInGot() const { return gotIndex != UINT32_MAX; }
68e8d8bef9SDimitry Andric 
69e8d8bef9SDimitry Andric   // Whether this symbol is in the StubsSection.
70e8d8bef9SDimitry Andric   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
71e8d8bef9SDimitry Andric 
72fe6060f1SDimitry Andric   uint64_t getStubVA() const;
73fe6060f1SDimitry Andric   uint64_t getGotVA() const;
74fe6060f1SDimitry Andric   uint64_t getTlvVA() const;
75fe6060f1SDimitry Andric   uint64_t resolveBranchVA() const {
76fe6060f1SDimitry Andric     assert(isa<Defined>(this) || isa<DylibSymbol>(this));
77fe6060f1SDimitry Andric     return isInStubs() ? getStubVA() : getVA();
78fe6060f1SDimitry Andric   }
79fe6060f1SDimitry Andric   uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
80fe6060f1SDimitry Andric   uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
81fe6060f1SDimitry Andric 
82e8d8bef9SDimitry Andric   // The index of this symbol in the GOT or the TLVPointer section, depending
83e8d8bef9SDimitry Andric   // on whether it is a thread-local. A given symbol cannot be referenced by
84e8d8bef9SDimitry Andric   // both these sections at once.
855ffd83dbSDimitry Andric   uint32_t gotIndex = UINT32_MAX;
865ffd83dbSDimitry Andric 
87e8d8bef9SDimitry Andric   uint32_t stubsIndex = UINT32_MAX;
88e8d8bef9SDimitry Andric 
89e8d8bef9SDimitry Andric   uint32_t symtabIndex = UINT32_MAX;
90e8d8bef9SDimitry Andric 
91fe6060f1SDimitry Andric   InputFile *getFile() const { return file; }
92fe6060f1SDimitry Andric 
935ffd83dbSDimitry Andric protected:
94fe6060f1SDimitry Andric   Symbol(Kind k, StringRefZ name, InputFile *file)
95*349cc55cSDimitry Andric       : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
96fe6060f1SDimitry Andric         isUsedInRegularObj(!file || isa<ObjFile>(file)),
97fe6060f1SDimitry Andric         used(!config->deadStrip) {}
985ffd83dbSDimitry Andric 
995ffd83dbSDimitry Andric   Kind symbolKind;
100e8d8bef9SDimitry Andric   const char *nameData;
101fe6060f1SDimitry Andric   InputFile *file;
102*349cc55cSDimitry Andric   mutable uint32_t nameSize;
103fe6060f1SDimitry Andric 
104fe6060f1SDimitry Andric public:
105fe6060f1SDimitry Andric   // True if this symbol was referenced by a regular (non-bitcode) object.
106fe6060f1SDimitry Andric   bool isUsedInRegularObj : 1;
107fe6060f1SDimitry Andric 
108fe6060f1SDimitry Andric   // True if an undefined or dylib symbol is used from a live section.
109fe6060f1SDimitry Andric   bool used : 1;
1105ffd83dbSDimitry Andric };
1115ffd83dbSDimitry Andric 
1125ffd83dbSDimitry Andric class Defined : public Symbol {
1135ffd83dbSDimitry Andric public:
114fe6060f1SDimitry Andric   Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
115fe6060f1SDimitry Andric           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
116*349cc55cSDimitry Andric           bool isThumb, bool isReferencedDynamically, bool noDeadStrip,
117*349cc55cSDimitry Andric           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false);
118e8d8bef9SDimitry Andric 
119e8d8bef9SDimitry Andric   bool isWeakDef() const override { return weakDef; }
120e8d8bef9SDimitry Andric   bool isExternalWeakDef() const {
121e8d8bef9SDimitry Andric     return isWeakDef() && isExternal() && !privateExtern;
122e8d8bef9SDimitry Andric   }
123*349cc55cSDimitry Andric   bool isTlv() const override;
124e8d8bef9SDimitry Andric 
125e8d8bef9SDimitry Andric   bool isExternal() const { return external; }
126e8d8bef9SDimitry Andric   bool isAbsolute() const { return isec == nullptr; }
127e8d8bef9SDimitry Andric 
128e8d8bef9SDimitry Andric   uint64_t getVA() const override;
129e8d8bef9SDimitry Andric 
130*349cc55cSDimitry Andric   // Ensure this symbol's pointers to InputSections point to their canonical
131*349cc55cSDimitry Andric   // copies.
132*349cc55cSDimitry Andric   void canonicalize();
133*349cc55cSDimitry Andric 
134e8d8bef9SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
1355ffd83dbSDimitry Andric 
136*349cc55cSDimitry Andric   // Place the bitfields first so that they can get placed in the tail padding
137*349cc55cSDimitry Andric   // of the parent class, on platforms which support it.
138e8d8bef9SDimitry Andric   bool overridesWeakDef : 1;
139fe6060f1SDimitry Andric   // Whether this symbol should appear in the output binary's export trie.
140e8d8bef9SDimitry Andric   bool privateExtern : 1;
141fe6060f1SDimitry Andric   // Whether this symbol should appear in the output symbol table.
142fe6060f1SDimitry Andric   bool includeInSymtab : 1;
143fe6060f1SDimitry Andric   // Only relevant when compiling for Thumb-supporting arm32 archs.
144fe6060f1SDimitry Andric   bool thumb : 1;
145fe6060f1SDimitry Andric   // Symbols marked referencedDynamically won't be removed from the output's
146fe6060f1SDimitry Andric   // symbol table by tools like strip. In theory, this could be set on arbitrary
147fe6060f1SDimitry Andric   // symbols in input object files. In practice, it's used solely for the
148fe6060f1SDimitry Andric   // synthetic __mh_execute_header symbol.
149fe6060f1SDimitry Andric   // This is information for the static linker, and it's also written to the
150fe6060f1SDimitry Andric   // output file's symbol table for tools running later (such as `strip`).
151fe6060f1SDimitry Andric   bool referencedDynamically : 1;
152fe6060f1SDimitry Andric   // Set on symbols that should not be removed by dead code stripping.
153fe6060f1SDimitry Andric   // Set for example on `__attribute__((used))` globals, or on some Objective-C
154fe6060f1SDimitry Andric   // metadata. This is information only for the static linker and not written
155fe6060f1SDimitry Andric   // to the output.
156fe6060f1SDimitry Andric   bool noDeadStrip : 1;
157e8d8bef9SDimitry Andric 
158*349cc55cSDimitry Andric   bool weakDefCanBeHidden : 1;
159*349cc55cSDimitry Andric 
160e8d8bef9SDimitry Andric private:
161e8d8bef9SDimitry Andric   const bool weakDef : 1;
162e8d8bef9SDimitry Andric   const bool external : 1;
163*349cc55cSDimitry Andric 
164*349cc55cSDimitry Andric public:
165*349cc55cSDimitry Andric   InputSection *isec;
166*349cc55cSDimitry Andric   // Contains the offset from the containing subsection. Note that this is
167*349cc55cSDimitry Andric   // different from nlist::n_value, which is the absolute address of the symbol.
168*349cc55cSDimitry Andric   uint64_t value;
169*349cc55cSDimitry Andric   // size is only calculated for regular (non-bitcode) symbols.
170*349cc55cSDimitry Andric   uint64_t size;
171*349cc55cSDimitry Andric   ConcatInputSection *unwindEntry = nullptr;
1725ffd83dbSDimitry Andric };
1735ffd83dbSDimitry Andric 
174e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how
175e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
176e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak
177e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as
178e8d8bef9SDimitry Andric // strongly-referenced.
179e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
180e8d8bef9SDimitry Andric 
1815ffd83dbSDimitry Andric class Undefined : public Symbol {
1825ffd83dbSDimitry Andric public:
183fe6060f1SDimitry Andric   Undefined(StringRefZ name, InputFile *file, RefState refState)
184fe6060f1SDimitry Andric       : Symbol(UndefinedKind, name, file), refState(refState) {
185e8d8bef9SDimitry Andric     assert(refState != RefState::Unreferenced);
186e8d8bef9SDimitry Andric   }
187e8d8bef9SDimitry Andric 
188e8d8bef9SDimitry Andric   bool isWeakRef() const override { return refState == RefState::Weak; }
1895ffd83dbSDimitry Andric 
1905ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
191e8d8bef9SDimitry Andric 
192e8d8bef9SDimitry Andric   RefState refState : 2;
193e8d8bef9SDimitry Andric };
194e8d8bef9SDimitry Andric 
195e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without
196e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are
197e8d8bef9SDimitry Andric // called tentative definitions.
198e8d8bef9SDimitry Andric //
199e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should
200e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files.
201e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support
202e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case.
203e8d8bef9SDimitry Andric //
204e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions.
205e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular
206e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang
207e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining
208e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them
209e8d8bef9SDimitry Andric // to regular defined symbols in a __common section.
210e8d8bef9SDimitry Andric class CommonSymbol : public Symbol {
211e8d8bef9SDimitry Andric public:
212e8d8bef9SDimitry Andric   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
213e8d8bef9SDimitry Andric                bool isPrivateExtern)
214fe6060f1SDimitry Andric       : Symbol(CommonKind, name, file), size(size),
215e8d8bef9SDimitry Andric         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
216e8d8bef9SDimitry Andric         privateExtern(isPrivateExtern) {
217e8d8bef9SDimitry Andric     // TODO: cap maximum alignment
218e8d8bef9SDimitry Andric   }
219e8d8bef9SDimitry Andric 
220e8d8bef9SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
221e8d8bef9SDimitry Andric 
222e8d8bef9SDimitry Andric   const uint64_t size;
223e8d8bef9SDimitry Andric   const uint32_t align;
224e8d8bef9SDimitry Andric   const bool privateExtern;
2255ffd83dbSDimitry Andric };
2265ffd83dbSDimitry Andric 
2275ffd83dbSDimitry Andric class DylibSymbol : public Symbol {
2285ffd83dbSDimitry Andric public:
229e8d8bef9SDimitry Andric   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
230e8d8bef9SDimitry Andric               RefState refState, bool isTlv)
231fe6060f1SDimitry Andric       : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
232fe6060f1SDimitry Andric         tlv(isTlv) {
233fe6060f1SDimitry Andric     if (file && refState > RefState::Unreferenced)
234fe6060f1SDimitry Andric       file->numReferencedSymbols++;
235fe6060f1SDimitry Andric   }
236e8d8bef9SDimitry Andric 
237fe6060f1SDimitry Andric   uint64_t getVA() const override;
238e8d8bef9SDimitry Andric   bool isWeakDef() const override { return weakDef; }
239e8d8bef9SDimitry Andric   bool isWeakRef() const override { return refState == RefState::Weak; }
240e8d8bef9SDimitry Andric   bool isReferenced() const { return refState != RefState::Unreferenced; }
241e8d8bef9SDimitry Andric   bool isTlv() const override { return tlv; }
242fe6060f1SDimitry Andric   bool isDynamicLookup() const { return file == nullptr; }
243e8d8bef9SDimitry Andric   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
2445ffd83dbSDimitry Andric 
245fe6060f1SDimitry Andric   DylibFile *getFile() const {
246fe6060f1SDimitry Andric     assert(!isDynamicLookup());
247fe6060f1SDimitry Andric     return cast<DylibFile>(file);
248fe6060f1SDimitry Andric   }
249fe6060f1SDimitry Andric 
2505ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
2515ffd83dbSDimitry Andric 
252e8d8bef9SDimitry Andric   uint32_t stubsHelperIndex = UINT32_MAX;
2535ffd83dbSDimitry Andric   uint32_t lazyBindOffset = UINT32_MAX;
254e8d8bef9SDimitry Andric 
255fe6060f1SDimitry Andric   RefState getRefState() const { return refState; }
256fe6060f1SDimitry Andric 
257fe6060f1SDimitry Andric   void reference(RefState newState) {
258fe6060f1SDimitry Andric     assert(newState > RefState::Unreferenced);
259fe6060f1SDimitry Andric     if (refState == RefState::Unreferenced && file)
260fe6060f1SDimitry Andric       getFile()->numReferencedSymbols++;
261fe6060f1SDimitry Andric     refState = std::max(refState, newState);
262fe6060f1SDimitry Andric   }
263fe6060f1SDimitry Andric 
264fe6060f1SDimitry Andric   void unreference() {
265fe6060f1SDimitry Andric     // dynamic_lookup symbols have no file.
266fe6060f1SDimitry Andric     if (refState > RefState::Unreferenced && file) {
267fe6060f1SDimitry Andric       assert(getFile()->numReferencedSymbols > 0);
268fe6060f1SDimitry Andric       getFile()->numReferencedSymbols--;
269fe6060f1SDimitry Andric     }
270fe6060f1SDimitry Andric   }
271e8d8bef9SDimitry Andric 
272e8d8bef9SDimitry Andric private:
273fe6060f1SDimitry Andric   RefState refState : 2;
274e8d8bef9SDimitry Andric   const bool weakDef : 1;
275e8d8bef9SDimitry Andric   const bool tlv : 1;
2765ffd83dbSDimitry Andric };
2775ffd83dbSDimitry Andric 
2785ffd83dbSDimitry Andric class LazySymbol : public Symbol {
2795ffd83dbSDimitry Andric public:
2805ffd83dbSDimitry Andric   LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
281fe6060f1SDimitry Andric       : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
282fe6060f1SDimitry Andric 
283fe6060f1SDimitry Andric   ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
284fe6060f1SDimitry Andric   void fetchArchiveMember();
2855ffd83dbSDimitry Andric 
2865ffd83dbSDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
2875ffd83dbSDimitry Andric 
2885ffd83dbSDimitry Andric private:
2895ffd83dbSDimitry Andric   const llvm::object::Archive::Symbol sym;
2905ffd83dbSDimitry Andric };
2915ffd83dbSDimitry Andric 
2925ffd83dbSDimitry Andric union SymbolUnion {
2935ffd83dbSDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
2945ffd83dbSDimitry Andric   alignas(Undefined) char b[sizeof(Undefined)];
295e8d8bef9SDimitry Andric   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
296e8d8bef9SDimitry Andric   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
297e8d8bef9SDimitry Andric   alignas(LazySymbol) char e[sizeof(LazySymbol)];
2985ffd83dbSDimitry Andric };
2995ffd83dbSDimitry Andric 
3005ffd83dbSDimitry Andric template <typename T, typename... ArgT>
301e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&...arg) {
3025ffd83dbSDimitry Andric   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
3035ffd83dbSDimitry Andric   static_assert(alignof(T) <= alignof(SymbolUnion),
3045ffd83dbSDimitry Andric                 "SymbolUnion not aligned enough");
3055ffd83dbSDimitry Andric   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
3065ffd83dbSDimitry Andric          "Not a Symbol");
3075ffd83dbSDimitry Andric 
308fe6060f1SDimitry Andric   bool isUsedInRegularObj = s->isUsedInRegularObj;
309fe6060f1SDimitry Andric   bool used = s->used;
310fe6060f1SDimitry Andric   T *sym = new (s) T(std::forward<ArgT>(arg)...);
311fe6060f1SDimitry Andric   sym->isUsedInRegularObj |= isUsedInRegularObj;
312fe6060f1SDimitry Andric   sym->used |= used;
313fe6060f1SDimitry Andric   return sym;
3145ffd83dbSDimitry Andric }
3155ffd83dbSDimitry Andric 
3165ffd83dbSDimitry Andric } // namespace macho
3175ffd83dbSDimitry Andric 
3185ffd83dbSDimitry Andric std::string toString(const macho::Symbol &);
319e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &);
320e8d8bef9SDimitry Andric 
3215ffd83dbSDimitry Andric } // namespace lld
3225ffd83dbSDimitry Andric 
3235ffd83dbSDimitry Andric #endif
324