xref: /freebsd/contrib/llvm-project/lld/MachO/Symbols.h (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "InputFiles.h"
14 #include "Target.h"
15 
16 #include "llvm/Object/Archive.h"
17 #include "llvm/Support/MathExtras.h"
18 
19 namespace lld {
20 namespace macho {
21 
22 class MachHeaderSection;
23 
24 struct StringRefZ {
25   StringRefZ(const char *s) : data(s), size(-1) {}
26   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
27 
28   const char *data;
29   const uint32_t size;
30 };
31 
32 class Symbol {
33 public:
34   enum Kind {
35     DefinedKind,
36     UndefinedKind,
37     CommonKind,
38     DylibKind,
39     LazyArchiveKind,
40     LazyObjectKind,
41     AliasKind,
42   };
43 
44   virtual ~Symbol() {}
45 
46   Kind kind() const { return symbolKind; }
47 
48   StringRef getName() const {
49     if (nameSize == (uint32_t)-1)
50       nameSize = strlen(nameData);
51     return {nameData, nameSize};
52   }
53 
54   bool isLive() const { return used; }
55   bool isLazy() const {
56     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
57   }
58 
59   virtual uint64_t getVA() const { return 0; }
60 
61   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
62 
63   // Only undefined or dylib symbols can be weak references. A weak reference
64   // need not be satisfied at runtime, e.g. due to the symbol not being
65   // available on a given target platform.
66   virtual bool isWeakRef() const { return false; }
67 
68   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
69 
70   // Whether this symbol is in the GOT or TLVPointer sections.
71   bool isInGot() const { return gotIndex != UINT32_MAX; }
72 
73   // Whether this symbol is in the StubsSection.
74   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
75 
76   uint64_t getStubVA() const;
77   uint64_t getLazyPtrVA() const;
78   uint64_t getGotVA() const;
79   uint64_t getTlvVA() const;
80   uint64_t resolveBranchVA() const {
81     assert(isa<Defined>(this) || isa<DylibSymbol>(this));
82     return isInStubs() ? getStubVA() : getVA();
83   }
84   uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
85   uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
86 
87   // The index of this symbol in the GOT or the TLVPointer section, depending
88   // on whether it is a thread-local. A given symbol cannot be referenced by
89   // both these sections at once.
90   uint32_t gotIndex = UINT32_MAX;
91   uint32_t lazyBindOffset = UINT32_MAX;
92   uint32_t stubsHelperIndex = UINT32_MAX;
93   uint32_t stubsIndex = UINT32_MAX;
94   uint32_t symtabIndex = UINT32_MAX;
95 
96   InputFile *getFile() const { return file; }
97 
98 protected:
99   Symbol(Kind k, StringRefZ name, InputFile *file)
100       : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
101         isUsedInRegularObj(!file || isa<ObjFile>(file)),
102         used(!config->deadStrip) {}
103 
104   Kind symbolKind;
105   const char *nameData;
106   InputFile *file;
107   mutable uint32_t nameSize;
108 
109 public:
110   // True if this symbol was referenced by a regular (non-bitcode) object.
111   bool isUsedInRegularObj : 1;
112 
113   // True if this symbol is used from a live section.
114   bool used : 1;
115 };
116 
117 class Defined : public Symbol {
118 public:
119   Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
120           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
121           bool includeInSymtab, bool isThumb, bool isReferencedDynamically,
122           bool noDeadStrip, bool canOverrideWeakDef = false,
123           bool isWeakDefCanBeHidden = false, bool interposable = false);
124 
125   bool isWeakDef() const override { return weakDef; }
126   bool isExternalWeakDef() const {
127     return isWeakDef() && isExternal() && !privateExtern;
128   }
129   bool isTlv() const override;
130 
131   bool isExternal() const { return external; }
132   bool isAbsolute() const { return isec == nullptr; }
133 
134   uint64_t getVA() const override;
135 
136   // Returns the object file that this symbol was defined in. This value differs
137   // from `getFile()` if the symbol originated from a bitcode file.
138   ObjFile *getObjectFile() const;
139 
140   std::string getSourceLocation();
141 
142   // Ensure this symbol's pointers to InputSections point to their canonical
143   // copies.
144   void canonicalize();
145 
146   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
147 
148   // Place the bitfields first so that they can get placed in the tail padding
149   // of the parent class, on platforms which support it.
150   bool overridesWeakDef : 1;
151   // Whether this symbol should appear in the output binary's export trie.
152   bool privateExtern : 1;
153   // Whether this symbol should appear in the output symbol table.
154   bool includeInSymtab : 1;
155   // Whether this symbol was folded into a different symbol during ICF.
156   bool wasIdenticalCodeFolded : 1;
157   // Only relevant when compiling for Thumb-supporting arm32 archs.
158   bool thumb : 1;
159   // Symbols marked referencedDynamically won't be removed from the output's
160   // symbol table by tools like strip. In theory, this could be set on arbitrary
161   // symbols in input object files. In practice, it's used solely for the
162   // synthetic __mh_execute_header symbol.
163   // This is information for the static linker, and it's also written to the
164   // output file's symbol table for tools running later (such as `strip`).
165   bool referencedDynamically : 1;
166   // Set on symbols that should not be removed by dead code stripping.
167   // Set for example on `__attribute__((used))` globals, or on some Objective-C
168   // metadata. This is information only for the static linker and not written
169   // to the output.
170   bool noDeadStrip : 1;
171   // Whether references to this symbol can be interposed at runtime to point to
172   // a different symbol definition (with the same name). For example, if both
173   // dylib A and B define an interposable symbol _foo, and we load A before B at
174   // runtime, then all references to _foo within dylib B will point to the
175   // definition in dylib A.
176   //
177   // Only extern symbols may be interposable.
178   bool interposable : 1;
179 
180   bool weakDefCanBeHidden : 1;
181 
182 private:
183   const bool weakDef : 1;
184   const bool external : 1;
185 
186 public:
187   InputSection *isec;
188   // Contains the offset from the containing subsection. Note that this is
189   // different from nlist::n_value, which is the absolute address of the symbol.
190   uint64_t value;
191   // size is only calculated for regular (non-bitcode) symbols.
192   uint64_t size;
193   // This can be a subsection of either __compact_unwind or __eh_frame.
194   ConcatInputSection *unwindEntry = nullptr;
195 };
196 
197 // This enum does double-duty: as a symbol property, it indicates whether & how
198 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
199 // of referenced symbols contained within the file. If there are both weak
200 // and strong references to the same file, we will count the file as
201 // strongly-referenced.
202 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
203 
204 class Undefined : public Symbol {
205 public:
206   Undefined(StringRefZ name, InputFile *file, RefState refState,
207             bool wasBitcodeSymbol)
208       : Symbol(UndefinedKind, name, file), refState(refState),
209         wasBitcodeSymbol(wasBitcodeSymbol) {
210     assert(refState != RefState::Unreferenced);
211   }
212 
213   bool isWeakRef() const override { return refState == RefState::Weak; }
214 
215   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
216 
217   RefState refState : 2;
218   bool wasBitcodeSymbol;
219 };
220 
221 // On Unix, it is traditionally allowed to write variable definitions without
222 // initialization expressions (such as "int foo;") to header files. These are
223 // called tentative definitions.
224 //
225 // Using tentative definitions is usually considered a bad practice; you should
226 // write only declarations (such as "extern int foo;") to header files.
227 // Nevertheless, the linker and the compiler have to do something to support
228 // bad code by allowing duplicate definitions for this particular case.
229 //
230 // The compiler creates common symbols when it sees tentative definitions.
231 // (You can suppress this behavior and let the compiler create a regular
232 // defined symbol by passing -fno-common. -fno-common is the default in clang
233 // as of LLVM 11.0.) When linking the final binary, if there are remaining
234 // common symbols after name resolution is complete, the linker converts them
235 // to regular defined symbols in a __common section.
236 class CommonSymbol : public Symbol {
237 public:
238   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
239                bool isPrivateExtern)
240       : Symbol(CommonKind, name, file), size(size),
241         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
242         privateExtern(isPrivateExtern) {
243     // TODO: cap maximum alignment
244   }
245 
246   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
247 
248   const uint64_t size;
249   const uint32_t align;
250   const bool privateExtern;
251 };
252 
253 class DylibSymbol : public Symbol {
254 public:
255   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
256               RefState refState, bool isTlv)
257       : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
258         tlv(isTlv) {
259     if (file && refState > RefState::Unreferenced)
260       file->numReferencedSymbols++;
261   }
262 
263   uint64_t getVA() const override;
264   bool isWeakDef() const override { return weakDef; }
265 
266   // Symbols from weak libraries/frameworks are also weakly-referenced.
267   bool isWeakRef() const override {
268     return refState == RefState::Weak ||
269            (file && getFile()->umbrella->forceWeakImport);
270   }
271   bool isReferenced() const { return refState != RefState::Unreferenced; }
272   bool isTlv() const override { return tlv; }
273   bool isDynamicLookup() const { return file == nullptr; }
274   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
275 
276   DylibFile *getFile() const {
277     assert(!isDynamicLookup());
278     return cast<DylibFile>(file);
279   }
280 
281   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
282 
283   RefState getRefState() const { return refState; }
284 
285   void reference(RefState newState) {
286     assert(newState > RefState::Unreferenced);
287     if (refState == RefState::Unreferenced && file)
288       getFile()->numReferencedSymbols++;
289     refState = std::max(refState, newState);
290   }
291 
292   void unreference() {
293     // dynamic_lookup symbols have no file.
294     if (refState > RefState::Unreferenced && file) {
295       assert(getFile()->numReferencedSymbols > 0);
296       getFile()->numReferencedSymbols--;
297     }
298   }
299 
300 private:
301   RefState refState : 2;
302   const bool weakDef : 1;
303   const bool tlv : 1;
304 };
305 
306 class LazyArchive : public Symbol {
307 public:
308   LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
309       : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
310 
311   ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
312   void fetchArchiveMember();
313 
314   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
315 
316 private:
317   const llvm::object::Archive::Symbol sym;
318 };
319 
320 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
321 // --end-lib.
322 class LazyObject : public Symbol {
323 public:
324   LazyObject(InputFile &file, StringRef name)
325       : Symbol(LazyObjectKind, name, &file) {
326     isUsedInRegularObj = false;
327   }
328 
329   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
330 };
331 
332 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs,
333 // then all AliasSymbol instances will be converted into one of the other Symbol
334 // types after `createAliases()` runs.
335 class AliasSymbol final : public Symbol {
336 public:
337   AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,
338               bool isPrivateExtern)
339       : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),
340         aliasedName(aliasedName) {}
341 
342   StringRef getAliasedName() const { return aliasedName; }
343 
344   static bool classof(const Symbol *s) { return s->kind() == AliasKind; }
345 
346   const bool privateExtern;
347 
348 private:
349   StringRef aliasedName;
350 };
351 
352 union SymbolUnion {
353   alignas(Defined) char a[sizeof(Defined)];
354   alignas(Undefined) char b[sizeof(Undefined)];
355   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
356   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
357   alignas(LazyArchive) char e[sizeof(LazyArchive)];
358   alignas(LazyObject) char f[sizeof(LazyObject)];
359   alignas(AliasSymbol) char g[sizeof(AliasSymbol)];
360 };
361 
362 template <typename T, typename... ArgT>
363 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
364   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
365   static_assert(alignof(T) <= alignof(SymbolUnion),
366                 "SymbolUnion not aligned enough");
367   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
368          "Not a Symbol");
369 
370   bool isUsedInRegularObj = s->isUsedInRegularObj;
371   bool used = s->used;
372   T *sym = new (s) T(std::forward<ArgT>(arg)...);
373   sym->isUsedInRegularObj |= isUsedInRegularObj;
374   sym->used |= used;
375   return sym;
376 }
377 
378 // Can a symbol's address only be resolved at runtime?
379 inline bool needsBinding(const Symbol *sym) {
380   if (isa<DylibSymbol>(sym))
381     return true;
382   if (const auto *defined = dyn_cast<Defined>(sym))
383     return defined->isExternalWeakDef() || defined->interposable;
384   return false;
385 }
386 } // namespace macho
387 
388 std::string toString(const macho::Symbol &);
389 std::string toMachOString(const llvm::object::Archive::Symbol &);
390 
391 } // namespace lld
392 
393 #endif
394