15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H 105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H 115ffd83dbSDimitry Andric 125ffd83dbSDimitry Andric #include "InputSection.h" 135ffd83dbSDimitry Andric #include "Target.h" 145ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h" 155ffd83dbSDimitry Andric #include "lld/Common/Strings.h" 165ffd83dbSDimitry Andric #include "llvm/Object/Archive.h" 17*e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric namespace lld { 205ffd83dbSDimitry Andric namespace macho { 215ffd83dbSDimitry Andric 225ffd83dbSDimitry Andric class InputSection; 23*e8d8bef9SDimitry Andric class MachHeaderSection; 245ffd83dbSDimitry Andric class DylibFile; 255ffd83dbSDimitry Andric class ArchiveFile; 265ffd83dbSDimitry Andric 275ffd83dbSDimitry Andric struct StringRefZ { 285ffd83dbSDimitry Andric StringRefZ(const char *s) : data(s), size(-1) {} 295ffd83dbSDimitry Andric StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 305ffd83dbSDimitry Andric 315ffd83dbSDimitry Andric const char *data; 325ffd83dbSDimitry Andric const uint32_t size; 335ffd83dbSDimitry Andric }; 345ffd83dbSDimitry Andric 355ffd83dbSDimitry Andric class Symbol { 365ffd83dbSDimitry Andric public: 375ffd83dbSDimitry Andric enum Kind { 385ffd83dbSDimitry Andric DefinedKind, 395ffd83dbSDimitry Andric UndefinedKind, 40*e8d8bef9SDimitry Andric CommonKind, 415ffd83dbSDimitry Andric DylibKind, 425ffd83dbSDimitry Andric LazyKind, 43*e8d8bef9SDimitry Andric DSOHandleKind, 445ffd83dbSDimitry Andric }; 455ffd83dbSDimitry Andric 46*e8d8bef9SDimitry Andric virtual ~Symbol() {} 47*e8d8bef9SDimitry Andric 485ffd83dbSDimitry Andric Kind kind() const { return static_cast<Kind>(symbolKind); } 495ffd83dbSDimitry Andric 50*e8d8bef9SDimitry Andric StringRef getName() const { 51*e8d8bef9SDimitry Andric if (nameSize == (uint32_t)-1) 52*e8d8bef9SDimitry Andric nameSize = strlen(nameData); 53*e8d8bef9SDimitry Andric return {nameData, nameSize}; 54*e8d8bef9SDimitry Andric } 555ffd83dbSDimitry Andric 56*e8d8bef9SDimitry Andric virtual uint64_t getVA() const { return 0; } 575ffd83dbSDimitry Andric 58*e8d8bef9SDimitry Andric virtual uint64_t getFileOffset() const { 59*e8d8bef9SDimitry Andric llvm_unreachable("attempt to get an offset from a non-defined symbol"); 60*e8d8bef9SDimitry Andric } 615ffd83dbSDimitry Andric 62*e8d8bef9SDimitry Andric virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 63*e8d8bef9SDimitry Andric 64*e8d8bef9SDimitry Andric // Only undefined or dylib symbols can be weak references. A weak reference 65*e8d8bef9SDimitry Andric // need not be satisfied at runtime, e.g. due to the symbol not being 66*e8d8bef9SDimitry Andric // available on a given target platform. 67*e8d8bef9SDimitry Andric virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 68*e8d8bef9SDimitry Andric 69*e8d8bef9SDimitry Andric virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 70*e8d8bef9SDimitry Andric 71*e8d8bef9SDimitry Andric // Whether this symbol is in the GOT or TLVPointer sections. 72*e8d8bef9SDimitry Andric bool isInGot() const { return gotIndex != UINT32_MAX; } 73*e8d8bef9SDimitry Andric 74*e8d8bef9SDimitry Andric // Whether this symbol is in the StubsSection. 75*e8d8bef9SDimitry Andric bool isInStubs() const { return stubsIndex != UINT32_MAX; } 76*e8d8bef9SDimitry Andric 77*e8d8bef9SDimitry Andric // The index of this symbol in the GOT or the TLVPointer section, depending 78*e8d8bef9SDimitry Andric // on whether it is a thread-local. A given symbol cannot be referenced by 79*e8d8bef9SDimitry Andric // both these sections at once. 805ffd83dbSDimitry Andric uint32_t gotIndex = UINT32_MAX; 815ffd83dbSDimitry Andric 82*e8d8bef9SDimitry Andric uint32_t stubsIndex = UINT32_MAX; 83*e8d8bef9SDimitry Andric 84*e8d8bef9SDimitry Andric uint32_t symtabIndex = UINT32_MAX; 85*e8d8bef9SDimitry Andric 865ffd83dbSDimitry Andric protected: 87*e8d8bef9SDimitry Andric Symbol(Kind k, StringRefZ name) 88*e8d8bef9SDimitry Andric : symbolKind(k), nameData(name.data), nameSize(name.size) {} 895ffd83dbSDimitry Andric 905ffd83dbSDimitry Andric Kind symbolKind; 91*e8d8bef9SDimitry Andric const char *nameData; 92*e8d8bef9SDimitry Andric mutable uint32_t nameSize; 935ffd83dbSDimitry Andric }; 945ffd83dbSDimitry Andric 955ffd83dbSDimitry Andric class Defined : public Symbol { 965ffd83dbSDimitry Andric public: 97*e8d8bef9SDimitry Andric Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, 98*e8d8bef9SDimitry Andric bool isExternal, bool isPrivateExtern) 99*e8d8bef9SDimitry Andric : Symbol(DefinedKind, name), isec(isec), value(value), 100*e8d8bef9SDimitry Andric overridesWeakDef(false), privateExtern(isPrivateExtern), 101*e8d8bef9SDimitry Andric weakDef(isWeakDef), external(isExternal) {} 102*e8d8bef9SDimitry Andric 103*e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 104*e8d8bef9SDimitry Andric bool isExternalWeakDef() const { 105*e8d8bef9SDimitry Andric return isWeakDef() && isExternal() && !privateExtern; 106*e8d8bef9SDimitry Andric } 107*e8d8bef9SDimitry Andric bool isTlv() const override { 108*e8d8bef9SDimitry Andric return !isAbsolute() && isThreadLocalVariables(isec->flags); 109*e8d8bef9SDimitry Andric } 110*e8d8bef9SDimitry Andric 111*e8d8bef9SDimitry Andric bool isExternal() const { return external; } 112*e8d8bef9SDimitry Andric bool isAbsolute() const { return isec == nullptr; } 113*e8d8bef9SDimitry Andric 114*e8d8bef9SDimitry Andric uint64_t getVA() const override; 115*e8d8bef9SDimitry Andric uint64_t getFileOffset() const override; 116*e8d8bef9SDimitry Andric 117*e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric InputSection *isec; 1205ffd83dbSDimitry Andric uint32_t value; 1215ffd83dbSDimitry Andric 122*e8d8bef9SDimitry Andric bool overridesWeakDef : 1; 123*e8d8bef9SDimitry Andric bool privateExtern : 1; 124*e8d8bef9SDimitry Andric 125*e8d8bef9SDimitry Andric private: 126*e8d8bef9SDimitry Andric const bool weakDef : 1; 127*e8d8bef9SDimitry Andric const bool external : 1; 1285ffd83dbSDimitry Andric }; 1295ffd83dbSDimitry Andric 130*e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how 131*e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 132*e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak 133*e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as 134*e8d8bef9SDimitry Andric // strongly-referenced. 135*e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 136*e8d8bef9SDimitry Andric 1375ffd83dbSDimitry Andric class Undefined : public Symbol { 1385ffd83dbSDimitry Andric public: 139*e8d8bef9SDimitry Andric Undefined(StringRefZ name, RefState refState) 140*e8d8bef9SDimitry Andric : Symbol(UndefinedKind, name), refState(refState) { 141*e8d8bef9SDimitry Andric assert(refState != RefState::Unreferenced); 142*e8d8bef9SDimitry Andric } 143*e8d8bef9SDimitry Andric 144*e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 1455ffd83dbSDimitry Andric 1465ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 147*e8d8bef9SDimitry Andric 148*e8d8bef9SDimitry Andric RefState refState : 2; 149*e8d8bef9SDimitry Andric }; 150*e8d8bef9SDimitry Andric 151*e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without 152*e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are 153*e8d8bef9SDimitry Andric // called tentative definitions. 154*e8d8bef9SDimitry Andric // 155*e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should 156*e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files. 157*e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support 158*e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case. 159*e8d8bef9SDimitry Andric // 160*e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions. 161*e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular 162*e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang 163*e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining 164*e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them 165*e8d8bef9SDimitry Andric // to regular defined symbols in a __common section. 166*e8d8bef9SDimitry Andric class CommonSymbol : public Symbol { 167*e8d8bef9SDimitry Andric public: 168*e8d8bef9SDimitry Andric CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 169*e8d8bef9SDimitry Andric bool isPrivateExtern) 170*e8d8bef9SDimitry Andric : Symbol(CommonKind, name), file(file), size(size), 171*e8d8bef9SDimitry Andric align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 172*e8d8bef9SDimitry Andric privateExtern(isPrivateExtern) { 173*e8d8bef9SDimitry Andric // TODO: cap maximum alignment 174*e8d8bef9SDimitry Andric } 175*e8d8bef9SDimitry Andric 176*e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 177*e8d8bef9SDimitry Andric 178*e8d8bef9SDimitry Andric InputFile *const file; 179*e8d8bef9SDimitry Andric const uint64_t size; 180*e8d8bef9SDimitry Andric const uint32_t align; 181*e8d8bef9SDimitry Andric const bool privateExtern; 1825ffd83dbSDimitry Andric }; 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric class DylibSymbol : public Symbol { 1855ffd83dbSDimitry Andric public: 186*e8d8bef9SDimitry Andric DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 187*e8d8bef9SDimitry Andric RefState refState, bool isTlv) 188*e8d8bef9SDimitry Andric : Symbol(DylibKind, name), file(file), refState(refState), 189*e8d8bef9SDimitry Andric weakDef(isWeakDef), tlv(isTlv) {} 190*e8d8bef9SDimitry Andric 191*e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 192*e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 193*e8d8bef9SDimitry Andric bool isReferenced() const { return refState != RefState::Unreferenced; } 194*e8d8bef9SDimitry Andric bool isTlv() const override { return tlv; } 195*e8d8bef9SDimitry Andric bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 1965ffd83dbSDimitry Andric 1975ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 1985ffd83dbSDimitry Andric 1995ffd83dbSDimitry Andric DylibFile *file; 200*e8d8bef9SDimitry Andric uint32_t stubsHelperIndex = UINT32_MAX; 2015ffd83dbSDimitry Andric uint32_t lazyBindOffset = UINT32_MAX; 202*e8d8bef9SDimitry Andric 203*e8d8bef9SDimitry Andric RefState refState : 2; 204*e8d8bef9SDimitry Andric 205*e8d8bef9SDimitry Andric private: 206*e8d8bef9SDimitry Andric const bool weakDef : 1; 207*e8d8bef9SDimitry Andric const bool tlv : 1; 2085ffd83dbSDimitry Andric }; 2095ffd83dbSDimitry Andric 2105ffd83dbSDimitry Andric class LazySymbol : public Symbol { 2115ffd83dbSDimitry Andric public: 2125ffd83dbSDimitry Andric LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 2135ffd83dbSDimitry Andric : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} 2145ffd83dbSDimitry Andric 2155ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 2165ffd83dbSDimitry Andric 2175ffd83dbSDimitry Andric void fetchArchiveMember(); 2185ffd83dbSDimitry Andric 2195ffd83dbSDimitry Andric private: 2205ffd83dbSDimitry Andric ArchiveFile *file; 2215ffd83dbSDimitry Andric const llvm::object::Archive::Symbol sym; 2225ffd83dbSDimitry Andric }; 2235ffd83dbSDimitry Andric 224*e8d8bef9SDimitry Andric // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which 225*e8d8bef9SDimitry Andric // does e.g. cleanup of static global variables. The ABI document says that the 226*e8d8bef9SDimitry Andric // pointer can point to any address in one of the dylib's segments, but in 227*e8d8bef9SDimitry Andric // practice ld64 seems to set it to point to the header, so that's what's 228*e8d8bef9SDimitry Andric // implemented here. 229*e8d8bef9SDimitry Andric // 230*e8d8bef9SDimitry Andric // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet 231*e8d8bef9SDimitry Andric // tested this on an ARM platform. 232*e8d8bef9SDimitry Andric // 233*e8d8bef9SDimitry Andric // DSOHandle effectively functions like a Defined symbol, but it doesn't belong 234*e8d8bef9SDimitry Andric // to an InputSection. 235*e8d8bef9SDimitry Andric class DSOHandle : public Symbol { 236*e8d8bef9SDimitry Andric public: 237*e8d8bef9SDimitry Andric DSOHandle(const MachHeaderSection *header) 238*e8d8bef9SDimitry Andric : Symbol(DSOHandleKind, name), header(header) {} 2395ffd83dbSDimitry Andric 240*e8d8bef9SDimitry Andric const MachHeaderSection *header; 241*e8d8bef9SDimitry Andric 242*e8d8bef9SDimitry Andric uint64_t getVA() const override; 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric uint64_t getFileOffset() const override; 245*e8d8bef9SDimitry Andric 246*e8d8bef9SDimitry Andric bool isWeakDef() const override { return false; } 247*e8d8bef9SDimitry Andric 248*e8d8bef9SDimitry Andric bool isTlv() const override { return false; } 249*e8d8bef9SDimitry Andric 250*e8d8bef9SDimitry Andric static constexpr StringRef name = "___dso_handle"; 251*e8d8bef9SDimitry Andric 252*e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; } 253*e8d8bef9SDimitry Andric }; 2545ffd83dbSDimitry Andric 2555ffd83dbSDimitry Andric union SymbolUnion { 2565ffd83dbSDimitry Andric alignas(Defined) char a[sizeof(Defined)]; 2575ffd83dbSDimitry Andric alignas(Undefined) char b[sizeof(Undefined)]; 258*e8d8bef9SDimitry Andric alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 259*e8d8bef9SDimitry Andric alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 260*e8d8bef9SDimitry Andric alignas(LazySymbol) char e[sizeof(LazySymbol)]; 261*e8d8bef9SDimitry Andric alignas(DSOHandle) char f[sizeof(DSOHandle)]; 2625ffd83dbSDimitry Andric }; 2635ffd83dbSDimitry Andric 2645ffd83dbSDimitry Andric template <typename T, typename... ArgT> 265*e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&... arg) { 2665ffd83dbSDimitry Andric static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 2675ffd83dbSDimitry Andric static_assert(alignof(T) <= alignof(SymbolUnion), 2685ffd83dbSDimitry Andric "SymbolUnion not aligned enough"); 2695ffd83dbSDimitry Andric assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 2705ffd83dbSDimitry Andric "Not a Symbol"); 2715ffd83dbSDimitry Andric 272*e8d8bef9SDimitry Andric return new (s) T(std::forward<ArgT>(arg)...); 2735ffd83dbSDimitry Andric } 2745ffd83dbSDimitry Andric 2755ffd83dbSDimitry Andric } // namespace macho 2765ffd83dbSDimitry Andric 2775ffd83dbSDimitry Andric std::string toString(const macho::Symbol &); 278*e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &); 279*e8d8bef9SDimitry Andric 2805ffd83dbSDimitry Andric } // namespace lld 2815ffd83dbSDimitry Andric 2825ffd83dbSDimitry Andric #endif 283