1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "InputSection.h" 13 #include "Target.h" 14 #include "lld/Common/ErrorHandler.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/Object/Archive.h" 17 #include "llvm/Support/MathExtras.h" 18 19 namespace lld { 20 namespace macho { 21 22 class InputSection; 23 class MachHeaderSection; 24 class DylibFile; 25 class ArchiveFile; 26 27 struct StringRefZ { 28 StringRefZ(const char *s) : data(s), size(-1) {} 29 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 30 31 const char *data; 32 const uint32_t size; 33 }; 34 35 class Symbol { 36 public: 37 enum Kind { 38 DefinedKind, 39 UndefinedKind, 40 CommonKind, 41 DylibKind, 42 LazyKind, 43 DSOHandleKind, 44 }; 45 46 virtual ~Symbol() {} 47 48 Kind kind() const { return static_cast<Kind>(symbolKind); } 49 50 StringRef getName() const { 51 if (nameSize == (uint32_t)-1) 52 nameSize = strlen(nameData); 53 return {nameData, nameSize}; 54 } 55 56 virtual uint64_t getVA() const { return 0; } 57 58 virtual uint64_t getFileOffset() const { 59 llvm_unreachable("attempt to get an offset from a non-defined symbol"); 60 } 61 62 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 63 64 // Only undefined or dylib symbols can be weak references. A weak reference 65 // need not be satisfied at runtime, e.g. due to the symbol not being 66 // available on a given target platform. 67 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 68 69 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 70 71 // Whether this symbol is in the GOT or TLVPointer sections. 72 bool isInGot() const { return gotIndex != UINT32_MAX; } 73 74 // Whether this symbol is in the StubsSection. 75 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 76 77 // The index of this symbol in the GOT or the TLVPointer section, depending 78 // on whether it is a thread-local. A given symbol cannot be referenced by 79 // both these sections at once. 80 uint32_t gotIndex = UINT32_MAX; 81 82 uint32_t stubsIndex = UINT32_MAX; 83 84 uint32_t symtabIndex = UINT32_MAX; 85 86 protected: 87 Symbol(Kind k, StringRefZ name) 88 : symbolKind(k), nameData(name.data), nameSize(name.size) {} 89 90 Kind symbolKind; 91 const char *nameData; 92 mutable uint32_t nameSize; 93 }; 94 95 class Defined : public Symbol { 96 public: 97 Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, 98 bool isExternal, bool isPrivateExtern) 99 : Symbol(DefinedKind, name), isec(isec), value(value), 100 overridesWeakDef(false), privateExtern(isPrivateExtern), 101 weakDef(isWeakDef), external(isExternal) {} 102 103 bool isWeakDef() const override { return weakDef; } 104 bool isExternalWeakDef() const { 105 return isWeakDef() && isExternal() && !privateExtern; 106 } 107 bool isTlv() const override { 108 return !isAbsolute() && isThreadLocalVariables(isec->flags); 109 } 110 111 bool isExternal() const { return external; } 112 bool isAbsolute() const { return isec == nullptr; } 113 114 uint64_t getVA() const override; 115 uint64_t getFileOffset() const override; 116 117 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 118 119 InputSection *isec; 120 uint32_t value; 121 122 bool overridesWeakDef : 1; 123 bool privateExtern : 1; 124 125 private: 126 const bool weakDef : 1; 127 const bool external : 1; 128 }; 129 130 // This enum does double-duty: as a symbol property, it indicates whether & how 131 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 132 // of referenced symbols contained within the file. If there are both weak 133 // and strong references to the same file, we will count the file as 134 // strongly-referenced. 135 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 136 137 class Undefined : public Symbol { 138 public: 139 Undefined(StringRefZ name, RefState refState) 140 : Symbol(UndefinedKind, name), refState(refState) { 141 assert(refState != RefState::Unreferenced); 142 } 143 144 bool isWeakRef() const override { return refState == RefState::Weak; } 145 146 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 147 148 RefState refState : 2; 149 }; 150 151 // On Unix, it is traditionally allowed to write variable definitions without 152 // initialization expressions (such as "int foo;") to header files. These are 153 // called tentative definitions. 154 // 155 // Using tentative definitions is usually considered a bad practice; you should 156 // write only declarations (such as "extern int foo;") to header files. 157 // Nevertheless, the linker and the compiler have to do something to support 158 // bad code by allowing duplicate definitions for this particular case. 159 // 160 // The compiler creates common symbols when it sees tentative definitions. 161 // (You can suppress this behavior and let the compiler create a regular 162 // defined symbol by passing -fno-common. -fno-common is the default in clang 163 // as of LLVM 11.0.) When linking the final binary, if there are remaining 164 // common symbols after name resolution is complete, the linker converts them 165 // to regular defined symbols in a __common section. 166 class CommonSymbol : public Symbol { 167 public: 168 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 169 bool isPrivateExtern) 170 : Symbol(CommonKind, name), file(file), size(size), 171 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 172 privateExtern(isPrivateExtern) { 173 // TODO: cap maximum alignment 174 } 175 176 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 177 178 InputFile *const file; 179 const uint64_t size; 180 const uint32_t align; 181 const bool privateExtern; 182 }; 183 184 class DylibSymbol : public Symbol { 185 public: 186 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 187 RefState refState, bool isTlv) 188 : Symbol(DylibKind, name), file(file), refState(refState), 189 weakDef(isWeakDef), tlv(isTlv) {} 190 191 bool isWeakDef() const override { return weakDef; } 192 bool isWeakRef() const override { return refState == RefState::Weak; } 193 bool isReferenced() const { return refState != RefState::Unreferenced; } 194 bool isTlv() const override { return tlv; } 195 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 196 197 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 198 199 DylibFile *file; 200 uint32_t stubsHelperIndex = UINT32_MAX; 201 uint32_t lazyBindOffset = UINT32_MAX; 202 203 RefState refState : 2; 204 205 private: 206 const bool weakDef : 1; 207 const bool tlv : 1; 208 }; 209 210 class LazySymbol : public Symbol { 211 public: 212 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 213 : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} 214 215 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 216 217 void fetchArchiveMember(); 218 219 private: 220 ArchiveFile *file; 221 const llvm::object::Archive::Symbol sym; 222 }; 223 224 // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which 225 // does e.g. cleanup of static global variables. The ABI document says that the 226 // pointer can point to any address in one of the dylib's segments, but in 227 // practice ld64 seems to set it to point to the header, so that's what's 228 // implemented here. 229 // 230 // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet 231 // tested this on an ARM platform. 232 // 233 // DSOHandle effectively functions like a Defined symbol, but it doesn't belong 234 // to an InputSection. 235 class DSOHandle : public Symbol { 236 public: 237 DSOHandle(const MachHeaderSection *header) 238 : Symbol(DSOHandleKind, name), header(header) {} 239 240 const MachHeaderSection *header; 241 242 uint64_t getVA() const override; 243 244 uint64_t getFileOffset() const override; 245 246 bool isWeakDef() const override { return false; } 247 248 bool isTlv() const override { return false; } 249 250 static constexpr StringRef name = "___dso_handle"; 251 252 static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; } 253 }; 254 255 union SymbolUnion { 256 alignas(Defined) char a[sizeof(Defined)]; 257 alignas(Undefined) char b[sizeof(Undefined)]; 258 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 259 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 260 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 261 alignas(DSOHandle) char f[sizeof(DSOHandle)]; 262 }; 263 264 template <typename T, typename... ArgT> 265 T *replaceSymbol(Symbol *s, ArgT &&... arg) { 266 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 267 static_assert(alignof(T) <= alignof(SymbolUnion), 268 "SymbolUnion not aligned enough"); 269 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 270 "Not a Symbol"); 271 272 return new (s) T(std::forward<ArgT>(arg)...); 273 } 274 275 } // namespace macho 276 277 std::string toString(const macho::Symbol &); 278 std::string toMachOString(const llvm::object::Archive::Symbol &); 279 280 } // namespace lld 281 282 #endif 283