xref: /freebsd/contrib/llvm-project/lld/MachO/Symbols.h (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "InputFiles.h"
14 #include "Target.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/Object/Archive.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 namespace lld {
21 namespace macho {
22 
23 class MachHeaderSection;
24 
25 struct StringRefZ {
26   StringRefZ(const char *s) : data(s), size(-1) {}
27   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
28 
29   const char *data;
30   const uint32_t size;
31 };
32 
33 class Symbol {
34 public:
35   enum Kind {
36     DefinedKind,
37     UndefinedKind,
38     CommonKind,
39     DylibKind,
40     LazyKind,
41   };
42 
43   virtual ~Symbol() {}
44 
45   Kind kind() const { return symbolKind; }
46 
47   StringRef getName() const {
48     if (nameSize == (uint32_t)-1)
49       nameSize = strlen(nameData);
50     return {nameData, nameSize};
51   }
52 
53   bool isLive() const { return used; }
54 
55   virtual uint64_t getVA() const { return 0; }
56 
57   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
58 
59   // Only undefined or dylib symbols can be weak references. A weak reference
60   // need not be satisfied at runtime, e.g. due to the symbol not being
61   // available on a given target platform.
62   virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
63 
64   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
65 
66   // Whether this symbol is in the GOT or TLVPointer sections.
67   bool isInGot() const { return gotIndex != UINT32_MAX; }
68 
69   // Whether this symbol is in the StubsSection.
70   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
71 
72   uint64_t getStubVA() const;
73   uint64_t getGotVA() const;
74   uint64_t getTlvVA() const;
75   uint64_t resolveBranchVA() const {
76     assert(isa<Defined>(this) || isa<DylibSymbol>(this));
77     return isInStubs() ? getStubVA() : getVA();
78   }
79   uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
80   uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
81 
82   // The index of this symbol in the GOT or the TLVPointer section, depending
83   // on whether it is a thread-local. A given symbol cannot be referenced by
84   // both these sections at once.
85   uint32_t gotIndex = UINT32_MAX;
86 
87   uint32_t stubsIndex = UINT32_MAX;
88 
89   uint32_t symtabIndex = UINT32_MAX;
90 
91   InputFile *getFile() const { return file; }
92 
93 protected:
94   Symbol(Kind k, StringRefZ name, InputFile *file)
95       : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
96         isUsedInRegularObj(!file || isa<ObjFile>(file)),
97         used(!config->deadStrip) {}
98 
99   Kind symbolKind;
100   const char *nameData;
101   InputFile *file;
102   mutable uint32_t nameSize;
103 
104 public:
105   // True if this symbol was referenced by a regular (non-bitcode) object.
106   bool isUsedInRegularObj : 1;
107 
108   // True if an undefined or dylib symbol is used from a live section.
109   bool used : 1;
110 };
111 
112 class Defined : public Symbol {
113 public:
114   Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
115           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
116           bool isThumb, bool isReferencedDynamically, bool noDeadStrip,
117           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false);
118 
119   bool isWeakDef() const override { return weakDef; }
120   bool isExternalWeakDef() const {
121     return isWeakDef() && isExternal() && !privateExtern;
122   }
123   bool isTlv() const override;
124 
125   bool isExternal() const { return external; }
126   bool isAbsolute() const { return isec == nullptr; }
127 
128   uint64_t getVA() const override;
129 
130   // Ensure this symbol's pointers to InputSections point to their canonical
131   // copies.
132   void canonicalize();
133 
134   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
135 
136   // Place the bitfields first so that they can get placed in the tail padding
137   // of the parent class, on platforms which support it.
138   bool overridesWeakDef : 1;
139   // Whether this symbol should appear in the output binary's export trie.
140   bool privateExtern : 1;
141   // Whether this symbol should appear in the output symbol table.
142   bool includeInSymtab : 1;
143   // Only relevant when compiling for Thumb-supporting arm32 archs.
144   bool thumb : 1;
145   // Symbols marked referencedDynamically won't be removed from the output's
146   // symbol table by tools like strip. In theory, this could be set on arbitrary
147   // symbols in input object files. In practice, it's used solely for the
148   // synthetic __mh_execute_header symbol.
149   // This is information for the static linker, and it's also written to the
150   // output file's symbol table for tools running later (such as `strip`).
151   bool referencedDynamically : 1;
152   // Set on symbols that should not be removed by dead code stripping.
153   // Set for example on `__attribute__((used))` globals, or on some Objective-C
154   // metadata. This is information only for the static linker and not written
155   // to the output.
156   bool noDeadStrip : 1;
157 
158   bool weakDefCanBeHidden : 1;
159 
160 private:
161   const bool weakDef : 1;
162   const bool external : 1;
163 
164 public:
165   InputSection *isec;
166   // Contains the offset from the containing subsection. Note that this is
167   // different from nlist::n_value, which is the absolute address of the symbol.
168   uint64_t value;
169   // size is only calculated for regular (non-bitcode) symbols.
170   uint64_t size;
171   ConcatInputSection *unwindEntry = nullptr;
172 };
173 
174 // This enum does double-duty: as a symbol property, it indicates whether & how
175 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
176 // of referenced symbols contained within the file. If there are both weak
177 // and strong references to the same file, we will count the file as
178 // strongly-referenced.
179 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
180 
181 class Undefined : public Symbol {
182 public:
183   Undefined(StringRefZ name, InputFile *file, RefState refState)
184       : Symbol(UndefinedKind, name, file), refState(refState) {
185     assert(refState != RefState::Unreferenced);
186   }
187 
188   bool isWeakRef() const override { return refState == RefState::Weak; }
189 
190   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
191 
192   RefState refState : 2;
193 };
194 
195 // On Unix, it is traditionally allowed to write variable definitions without
196 // initialization expressions (such as "int foo;") to header files. These are
197 // called tentative definitions.
198 //
199 // Using tentative definitions is usually considered a bad practice; you should
200 // write only declarations (such as "extern int foo;") to header files.
201 // Nevertheless, the linker and the compiler have to do something to support
202 // bad code by allowing duplicate definitions for this particular case.
203 //
204 // The compiler creates common symbols when it sees tentative definitions.
205 // (You can suppress this behavior and let the compiler create a regular
206 // defined symbol by passing -fno-common. -fno-common is the default in clang
207 // as of LLVM 11.0.) When linking the final binary, if there are remaining
208 // common symbols after name resolution is complete, the linker converts them
209 // to regular defined symbols in a __common section.
210 class CommonSymbol : public Symbol {
211 public:
212   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
213                bool isPrivateExtern)
214       : Symbol(CommonKind, name, file), size(size),
215         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
216         privateExtern(isPrivateExtern) {
217     // TODO: cap maximum alignment
218   }
219 
220   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
221 
222   const uint64_t size;
223   const uint32_t align;
224   const bool privateExtern;
225 };
226 
227 class DylibSymbol : public Symbol {
228 public:
229   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
230               RefState refState, bool isTlv)
231       : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
232         tlv(isTlv) {
233     if (file && refState > RefState::Unreferenced)
234       file->numReferencedSymbols++;
235   }
236 
237   uint64_t getVA() const override;
238   bool isWeakDef() const override { return weakDef; }
239 
240   // Symbols from weak libraries/frameworks are also weakly-referenced.
241   bool isWeakRef() const override {
242     return refState == RefState::Weak ||
243            (file && getFile()->umbrella->forceWeakImport);
244   }
245   bool isReferenced() const { return refState != RefState::Unreferenced; }
246   bool isTlv() const override { return tlv; }
247   bool isDynamicLookup() const { return file == nullptr; }
248   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
249 
250   DylibFile *getFile() const {
251     assert(!isDynamicLookup());
252     return cast<DylibFile>(file);
253   }
254 
255   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
256 
257   uint32_t stubsHelperIndex = UINT32_MAX;
258   uint32_t lazyBindOffset = UINT32_MAX;
259 
260   RefState getRefState() const { return refState; }
261 
262   void reference(RefState newState) {
263     assert(newState > RefState::Unreferenced);
264     if (refState == RefState::Unreferenced && file)
265       getFile()->numReferencedSymbols++;
266     refState = std::max(refState, newState);
267   }
268 
269   void unreference() {
270     // dynamic_lookup symbols have no file.
271     if (refState > RefState::Unreferenced && file) {
272       assert(getFile()->numReferencedSymbols > 0);
273       getFile()->numReferencedSymbols--;
274     }
275   }
276 
277 private:
278   RefState refState : 2;
279   const bool weakDef : 1;
280   const bool tlv : 1;
281 };
282 
283 class LazySymbol : public Symbol {
284 public:
285   LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
286       : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
287 
288   ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
289   void fetchArchiveMember();
290 
291   static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
292 
293 private:
294   const llvm::object::Archive::Symbol sym;
295 };
296 
297 union SymbolUnion {
298   alignas(Defined) char a[sizeof(Defined)];
299   alignas(Undefined) char b[sizeof(Undefined)];
300   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
301   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
302   alignas(LazySymbol) char e[sizeof(LazySymbol)];
303 };
304 
305 template <typename T, typename... ArgT>
306 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
307   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
308   static_assert(alignof(T) <= alignof(SymbolUnion),
309                 "SymbolUnion not aligned enough");
310   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
311          "Not a Symbol");
312 
313   bool isUsedInRegularObj = s->isUsedInRegularObj;
314   bool used = s->used;
315   T *sym = new (s) T(std::forward<ArgT>(arg)...);
316   sym->isUsedInRegularObj |= isUsedInRegularObj;
317   sym->used |= used;
318   return sym;
319 }
320 
321 } // namespace macho
322 
323 std::string toString(const macho::Symbol &);
324 std::string toMachOString(const llvm::object::Archive::Symbol &);
325 
326 } // namespace lld
327 
328 #endif
329