xref: /freebsd/contrib/llvm-project/lld/MachO/Symbols.h (revision 7c20397b724a55001c2054fa133a768e9d06eb1c)
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_SYMBOLS_H
10 #define LLD_MACHO_SYMBOLS_H
11 
12 #include "Config.h"
13 #include "InputFiles.h"
14 #include "Target.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/Object/Archive.h"
18 #include "llvm/Support/MathExtras.h"
19 
20 namespace lld {
21 namespace macho {
22 
23 class MachHeaderSection;
24 
25 struct StringRefZ {
26   StringRefZ(const char *s) : data(s), size(-1) {}
27   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
28 
29   const char *data;
30   const uint32_t size;
31 };
32 
33 class Symbol {
34 public:
35   enum Kind {
36     DefinedKind,
37     UndefinedKind,
38     CommonKind,
39     DylibKind,
40     LazyArchiveKind,
41     LazyObjectKind,
42   };
43 
44   virtual ~Symbol() {}
45 
46   Kind kind() const { return symbolKind; }
47 
48   StringRef getName() const {
49     if (nameSize == (uint32_t)-1)
50       nameSize = strlen(nameData);
51     return {nameData, nameSize};
52   }
53 
54   bool isLive() const { return used; }
55   bool isLazy() const {
56     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
57   }
58 
59   virtual uint64_t getVA() const { return 0; }
60 
61   virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
62 
63   // Only undefined or dylib symbols can be weak references. A weak reference
64   // need not be satisfied at runtime, e.g. due to the symbol not being
65   // available on a given target platform.
66   virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
67 
68   virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
69 
70   // Whether this symbol is in the GOT or TLVPointer sections.
71   bool isInGot() const { return gotIndex != UINT32_MAX; }
72 
73   // Whether this symbol is in the StubsSection.
74   bool isInStubs() const { return stubsIndex != UINT32_MAX; }
75 
76   uint64_t getStubVA() const;
77   uint64_t getGotVA() const;
78   uint64_t getTlvVA() const;
79   uint64_t resolveBranchVA() const {
80     assert(isa<Defined>(this) || isa<DylibSymbol>(this));
81     return isInStubs() ? getStubVA() : getVA();
82   }
83   uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
84   uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
85 
86   // The index of this symbol in the GOT or the TLVPointer section, depending
87   // on whether it is a thread-local. A given symbol cannot be referenced by
88   // both these sections at once.
89   uint32_t gotIndex = UINT32_MAX;
90 
91   uint32_t stubsIndex = UINT32_MAX;
92 
93   uint32_t symtabIndex = UINT32_MAX;
94 
95   InputFile *getFile() const { return file; }
96 
97 protected:
98   Symbol(Kind k, StringRefZ name, InputFile *file)
99       : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
100         isUsedInRegularObj(!file || isa<ObjFile>(file)),
101         used(!config->deadStrip) {}
102 
103   Kind symbolKind;
104   const char *nameData;
105   InputFile *file;
106   mutable uint32_t nameSize;
107 
108 public:
109   // True if this symbol was referenced by a regular (non-bitcode) object.
110   bool isUsedInRegularObj : 1;
111 
112   // True if an undefined or dylib symbol is used from a live section.
113   bool used : 1;
114 };
115 
116 class Defined : public Symbol {
117 public:
118   Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
119           uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
120           bool isThumb, bool isReferencedDynamically, bool noDeadStrip,
121           bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false);
122 
123   bool isWeakDef() const override { return weakDef; }
124   bool isExternalWeakDef() const {
125     return isWeakDef() && isExternal() && !privateExtern;
126   }
127   bool isTlv() const override;
128 
129   bool isExternal() const { return external; }
130   bool isAbsolute() const { return isec == nullptr; }
131 
132   uint64_t getVA() const override;
133 
134   // Ensure this symbol's pointers to InputSections point to their canonical
135   // copies.
136   void canonicalize();
137 
138   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
139 
140   // Place the bitfields first so that they can get placed in the tail padding
141   // of the parent class, on platforms which support it.
142   bool overridesWeakDef : 1;
143   // Whether this symbol should appear in the output binary's export trie.
144   bool privateExtern : 1;
145   // Whether this symbol should appear in the output symbol table.
146   bool includeInSymtab : 1;
147   // Only relevant when compiling for Thumb-supporting arm32 archs.
148   bool thumb : 1;
149   // Symbols marked referencedDynamically won't be removed from the output's
150   // symbol table by tools like strip. In theory, this could be set on arbitrary
151   // symbols in input object files. In practice, it's used solely for the
152   // synthetic __mh_execute_header symbol.
153   // This is information for the static linker, and it's also written to the
154   // output file's symbol table for tools running later (such as `strip`).
155   bool referencedDynamically : 1;
156   // Set on symbols that should not be removed by dead code stripping.
157   // Set for example on `__attribute__((used))` globals, or on some Objective-C
158   // metadata. This is information only for the static linker and not written
159   // to the output.
160   bool noDeadStrip : 1;
161 
162   bool weakDefCanBeHidden : 1;
163 
164 private:
165   const bool weakDef : 1;
166   const bool external : 1;
167 
168 public:
169   InputSection *isec;
170   // Contains the offset from the containing subsection. Note that this is
171   // different from nlist::n_value, which is the absolute address of the symbol.
172   uint64_t value;
173   // size is only calculated for regular (non-bitcode) symbols.
174   uint64_t size;
175   ConcatInputSection *unwindEntry = nullptr;
176 };
177 
178 // This enum does double-duty: as a symbol property, it indicates whether & how
179 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
180 // of referenced symbols contained within the file. If there are both weak
181 // and strong references to the same file, we will count the file as
182 // strongly-referenced.
183 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
184 
185 class Undefined : public Symbol {
186 public:
187   Undefined(StringRefZ name, InputFile *file, RefState refState)
188       : Symbol(UndefinedKind, name, file), refState(refState) {
189     assert(refState != RefState::Unreferenced);
190   }
191 
192   bool isWeakRef() const override { return refState == RefState::Weak; }
193 
194   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
195 
196   RefState refState : 2;
197 };
198 
199 // On Unix, it is traditionally allowed to write variable definitions without
200 // initialization expressions (such as "int foo;") to header files. These are
201 // called tentative definitions.
202 //
203 // Using tentative definitions is usually considered a bad practice; you should
204 // write only declarations (such as "extern int foo;") to header files.
205 // Nevertheless, the linker and the compiler have to do something to support
206 // bad code by allowing duplicate definitions for this particular case.
207 //
208 // The compiler creates common symbols when it sees tentative definitions.
209 // (You can suppress this behavior and let the compiler create a regular
210 // defined symbol by passing -fno-common. -fno-common is the default in clang
211 // as of LLVM 11.0.) When linking the final binary, if there are remaining
212 // common symbols after name resolution is complete, the linker converts them
213 // to regular defined symbols in a __common section.
214 class CommonSymbol : public Symbol {
215 public:
216   CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
217                bool isPrivateExtern)
218       : Symbol(CommonKind, name, file), size(size),
219         align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
220         privateExtern(isPrivateExtern) {
221     // TODO: cap maximum alignment
222   }
223 
224   static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
225 
226   const uint64_t size;
227   const uint32_t align;
228   const bool privateExtern;
229 };
230 
231 class DylibSymbol : public Symbol {
232 public:
233   DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
234               RefState refState, bool isTlv)
235       : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
236         tlv(isTlv) {
237     if (file && refState > RefState::Unreferenced)
238       file->numReferencedSymbols++;
239   }
240 
241   uint64_t getVA() const override;
242   bool isWeakDef() const override { return weakDef; }
243 
244   // Symbols from weak libraries/frameworks are also weakly-referenced.
245   bool isWeakRef() const override {
246     return refState == RefState::Weak ||
247            (file && getFile()->umbrella->forceWeakImport);
248   }
249   bool isReferenced() const { return refState != RefState::Unreferenced; }
250   bool isTlv() const override { return tlv; }
251   bool isDynamicLookup() const { return file == nullptr; }
252   bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
253 
254   DylibFile *getFile() const {
255     assert(!isDynamicLookup());
256     return cast<DylibFile>(file);
257   }
258 
259   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
260 
261   uint32_t stubsHelperIndex = UINT32_MAX;
262   uint32_t lazyBindOffset = UINT32_MAX;
263 
264   RefState getRefState() const { return refState; }
265 
266   void reference(RefState newState) {
267     assert(newState > RefState::Unreferenced);
268     if (refState == RefState::Unreferenced && file)
269       getFile()->numReferencedSymbols++;
270     refState = std::max(refState, newState);
271   }
272 
273   void unreference() {
274     // dynamic_lookup symbols have no file.
275     if (refState > RefState::Unreferenced && file) {
276       assert(getFile()->numReferencedSymbols > 0);
277       getFile()->numReferencedSymbols--;
278     }
279   }
280 
281 private:
282   RefState refState : 2;
283   const bool weakDef : 1;
284   const bool tlv : 1;
285 };
286 
287 class LazyArchive : public Symbol {
288 public:
289   LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
290       : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}
291 
292   ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
293   void fetchArchiveMember();
294 
295   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
296 
297 private:
298   const llvm::object::Archive::Symbol sym;
299 };
300 
301 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and
302 // --end-lib.
303 class LazyObject : public Symbol {
304 public:
305   LazyObject(InputFile &file, StringRef name)
306       : Symbol(LazyObjectKind, name, &file) {
307     isUsedInRegularObj = false;
308   }
309 
310   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
311 };
312 
313 union SymbolUnion {
314   alignas(Defined) char a[sizeof(Defined)];
315   alignas(Undefined) char b[sizeof(Undefined)];
316   alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
317   alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
318   alignas(LazyArchive) char e[sizeof(LazyArchive)];
319   alignas(LazyObject) char f[sizeof(LazyObject)];
320 };
321 
322 template <typename T, typename... ArgT>
323 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
324   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
325   static_assert(alignof(T) <= alignof(SymbolUnion),
326                 "SymbolUnion not aligned enough");
327   assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
328          "Not a Symbol");
329 
330   bool isUsedInRegularObj = s->isUsedInRegularObj;
331   bool used = s->used;
332   T *sym = new (s) T(std::forward<ArgT>(arg)...);
333   sym->isUsedInRegularObj |= isUsedInRegularObj;
334   sym->used |= used;
335   return sym;
336 }
337 
338 } // namespace macho
339 
340 std::string toString(const macho::Symbol &);
341 std::string toMachOString(const llvm::object::Archive::Symbol &);
342 
343 } // namespace lld
344 
345 #endif
346