15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H 105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H 115ffd83dbSDimitry Andric 12349cc55cSDimitry Andric #include "Config.h" 13fe6060f1SDimitry Andric #include "InputFiles.h" 145ffd83dbSDimitry Andric #include "Target.h" 155ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h" 165ffd83dbSDimitry Andric #include "lld/Common/Strings.h" 175ffd83dbSDimitry Andric #include "llvm/Object/Archive.h" 18e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric namespace lld { 215ffd83dbSDimitry Andric namespace macho { 225ffd83dbSDimitry Andric 23e8d8bef9SDimitry Andric class MachHeaderSection; 245ffd83dbSDimitry Andric 255ffd83dbSDimitry Andric struct StringRefZ { 265ffd83dbSDimitry Andric StringRefZ(const char *s) : data(s), size(-1) {} 275ffd83dbSDimitry Andric StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric const char *data; 305ffd83dbSDimitry Andric const uint32_t size; 315ffd83dbSDimitry Andric }; 325ffd83dbSDimitry Andric 335ffd83dbSDimitry Andric class Symbol { 345ffd83dbSDimitry Andric public: 355ffd83dbSDimitry Andric enum Kind { 365ffd83dbSDimitry Andric DefinedKind, 375ffd83dbSDimitry Andric UndefinedKind, 38e8d8bef9SDimitry Andric CommonKind, 395ffd83dbSDimitry Andric DylibKind, 40*04eeddc0SDimitry Andric LazyArchiveKind, 41*04eeddc0SDimitry Andric LazyObjectKind, 425ffd83dbSDimitry Andric }; 435ffd83dbSDimitry Andric 44e8d8bef9SDimitry Andric virtual ~Symbol() {} 45e8d8bef9SDimitry Andric 46fe6060f1SDimitry Andric Kind kind() const { return symbolKind; } 475ffd83dbSDimitry Andric 48e8d8bef9SDimitry Andric StringRef getName() const { 49e8d8bef9SDimitry Andric if (nameSize == (uint32_t)-1) 50e8d8bef9SDimitry Andric nameSize = strlen(nameData); 51e8d8bef9SDimitry Andric return {nameData, nameSize}; 52e8d8bef9SDimitry Andric } 535ffd83dbSDimitry Andric 54349cc55cSDimitry Andric bool isLive() const { return used; } 55*04eeddc0SDimitry Andric bool isLazy() const { 56*04eeddc0SDimitry Andric return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57*04eeddc0SDimitry Andric } 585ffd83dbSDimitry Andric 59fe6060f1SDimitry Andric virtual uint64_t getVA() const { return 0; } 605ffd83dbSDimitry Andric 61e8d8bef9SDimitry Andric virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 62e8d8bef9SDimitry Andric 63e8d8bef9SDimitry Andric // Only undefined or dylib symbols can be weak references. A weak reference 64e8d8bef9SDimitry Andric // need not be satisfied at runtime, e.g. due to the symbol not being 65e8d8bef9SDimitry Andric // available on a given target platform. 66e8d8bef9SDimitry Andric virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 67e8d8bef9SDimitry Andric 68e8d8bef9SDimitry Andric virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric // Whether this symbol is in the GOT or TLVPointer sections. 71e8d8bef9SDimitry Andric bool isInGot() const { return gotIndex != UINT32_MAX; } 72e8d8bef9SDimitry Andric 73e8d8bef9SDimitry Andric // Whether this symbol is in the StubsSection. 74e8d8bef9SDimitry Andric bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75e8d8bef9SDimitry Andric 76fe6060f1SDimitry Andric uint64_t getStubVA() const; 77fe6060f1SDimitry Andric uint64_t getGotVA() const; 78fe6060f1SDimitry Andric uint64_t getTlvVA() const; 79fe6060f1SDimitry Andric uint64_t resolveBranchVA() const { 80fe6060f1SDimitry Andric assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 81fe6060f1SDimitry Andric return isInStubs() ? getStubVA() : getVA(); 82fe6060f1SDimitry Andric } 83fe6060f1SDimitry Andric uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 84fe6060f1SDimitry Andric uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 85fe6060f1SDimitry Andric 86e8d8bef9SDimitry Andric // The index of this symbol in the GOT or the TLVPointer section, depending 87e8d8bef9SDimitry Andric // on whether it is a thread-local. A given symbol cannot be referenced by 88e8d8bef9SDimitry Andric // both these sections at once. 895ffd83dbSDimitry Andric uint32_t gotIndex = UINT32_MAX; 905ffd83dbSDimitry Andric 91e8d8bef9SDimitry Andric uint32_t stubsIndex = UINT32_MAX; 92e8d8bef9SDimitry Andric 93e8d8bef9SDimitry Andric uint32_t symtabIndex = UINT32_MAX; 94e8d8bef9SDimitry Andric 95fe6060f1SDimitry Andric InputFile *getFile() const { return file; } 96fe6060f1SDimitry Andric 975ffd83dbSDimitry Andric protected: 98fe6060f1SDimitry Andric Symbol(Kind k, StringRefZ name, InputFile *file) 99349cc55cSDimitry Andric : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 100fe6060f1SDimitry Andric isUsedInRegularObj(!file || isa<ObjFile>(file)), 101fe6060f1SDimitry Andric used(!config->deadStrip) {} 1025ffd83dbSDimitry Andric 1035ffd83dbSDimitry Andric Kind symbolKind; 104e8d8bef9SDimitry Andric const char *nameData; 105fe6060f1SDimitry Andric InputFile *file; 106349cc55cSDimitry Andric mutable uint32_t nameSize; 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric public: 109fe6060f1SDimitry Andric // True if this symbol was referenced by a regular (non-bitcode) object. 110fe6060f1SDimitry Andric bool isUsedInRegularObj : 1; 111fe6060f1SDimitry Andric 112fe6060f1SDimitry Andric // True if an undefined or dylib symbol is used from a live section. 113fe6060f1SDimitry Andric bool used : 1; 1145ffd83dbSDimitry Andric }; 1155ffd83dbSDimitry Andric 1165ffd83dbSDimitry Andric class Defined : public Symbol { 1175ffd83dbSDimitry Andric public: 118fe6060f1SDimitry Andric Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 119fe6060f1SDimitry Andric uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 120349cc55cSDimitry Andric bool isThumb, bool isReferencedDynamically, bool noDeadStrip, 121349cc55cSDimitry Andric bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); 122e8d8bef9SDimitry Andric 123e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 124e8d8bef9SDimitry Andric bool isExternalWeakDef() const { 125e8d8bef9SDimitry Andric return isWeakDef() && isExternal() && !privateExtern; 126e8d8bef9SDimitry Andric } 127349cc55cSDimitry Andric bool isTlv() const override; 128e8d8bef9SDimitry Andric 129e8d8bef9SDimitry Andric bool isExternal() const { return external; } 130e8d8bef9SDimitry Andric bool isAbsolute() const { return isec == nullptr; } 131e8d8bef9SDimitry Andric 132e8d8bef9SDimitry Andric uint64_t getVA() const override; 133e8d8bef9SDimitry Andric 134349cc55cSDimitry Andric // Ensure this symbol's pointers to InputSections point to their canonical 135349cc55cSDimitry Andric // copies. 136349cc55cSDimitry Andric void canonicalize(); 137349cc55cSDimitry Andric 138e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 1395ffd83dbSDimitry Andric 140349cc55cSDimitry Andric // Place the bitfields first so that they can get placed in the tail padding 141349cc55cSDimitry Andric // of the parent class, on platforms which support it. 142e8d8bef9SDimitry Andric bool overridesWeakDef : 1; 143fe6060f1SDimitry Andric // Whether this symbol should appear in the output binary's export trie. 144e8d8bef9SDimitry Andric bool privateExtern : 1; 145fe6060f1SDimitry Andric // Whether this symbol should appear in the output symbol table. 146fe6060f1SDimitry Andric bool includeInSymtab : 1; 147fe6060f1SDimitry Andric // Only relevant when compiling for Thumb-supporting arm32 archs. 148fe6060f1SDimitry Andric bool thumb : 1; 149fe6060f1SDimitry Andric // Symbols marked referencedDynamically won't be removed from the output's 150fe6060f1SDimitry Andric // symbol table by tools like strip. In theory, this could be set on arbitrary 151fe6060f1SDimitry Andric // symbols in input object files. In practice, it's used solely for the 152fe6060f1SDimitry Andric // synthetic __mh_execute_header symbol. 153fe6060f1SDimitry Andric // This is information for the static linker, and it's also written to the 154fe6060f1SDimitry Andric // output file's symbol table for tools running later (such as `strip`). 155fe6060f1SDimitry Andric bool referencedDynamically : 1; 156fe6060f1SDimitry Andric // Set on symbols that should not be removed by dead code stripping. 157fe6060f1SDimitry Andric // Set for example on `__attribute__((used))` globals, or on some Objective-C 158fe6060f1SDimitry Andric // metadata. This is information only for the static linker and not written 159fe6060f1SDimitry Andric // to the output. 160fe6060f1SDimitry Andric bool noDeadStrip : 1; 161e8d8bef9SDimitry Andric 162349cc55cSDimitry Andric bool weakDefCanBeHidden : 1; 163349cc55cSDimitry Andric 164e8d8bef9SDimitry Andric private: 165e8d8bef9SDimitry Andric const bool weakDef : 1; 166e8d8bef9SDimitry Andric const bool external : 1; 167349cc55cSDimitry Andric 168349cc55cSDimitry Andric public: 169349cc55cSDimitry Andric InputSection *isec; 170349cc55cSDimitry Andric // Contains the offset from the containing subsection. Note that this is 171349cc55cSDimitry Andric // different from nlist::n_value, which is the absolute address of the symbol. 172349cc55cSDimitry Andric uint64_t value; 173349cc55cSDimitry Andric // size is only calculated for regular (non-bitcode) symbols. 174349cc55cSDimitry Andric uint64_t size; 175349cc55cSDimitry Andric ConcatInputSection *unwindEntry = nullptr; 1765ffd83dbSDimitry Andric }; 1775ffd83dbSDimitry Andric 178e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how 179e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 180e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak 181e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as 182e8d8bef9SDimitry Andric // strongly-referenced. 183e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 184e8d8bef9SDimitry Andric 1855ffd83dbSDimitry Andric class Undefined : public Symbol { 1865ffd83dbSDimitry Andric public: 187fe6060f1SDimitry Andric Undefined(StringRefZ name, InputFile *file, RefState refState) 188fe6060f1SDimitry Andric : Symbol(UndefinedKind, name, file), refState(refState) { 189e8d8bef9SDimitry Andric assert(refState != RefState::Unreferenced); 190e8d8bef9SDimitry Andric } 191e8d8bef9SDimitry Andric 192e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 1935ffd83dbSDimitry Andric 1945ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 195e8d8bef9SDimitry Andric 196e8d8bef9SDimitry Andric RefState refState : 2; 197e8d8bef9SDimitry Andric }; 198e8d8bef9SDimitry Andric 199e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without 200e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are 201e8d8bef9SDimitry Andric // called tentative definitions. 202e8d8bef9SDimitry Andric // 203e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should 204e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files. 205e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support 206e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case. 207e8d8bef9SDimitry Andric // 208e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions. 209e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular 210e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang 211e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining 212e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them 213e8d8bef9SDimitry Andric // to regular defined symbols in a __common section. 214e8d8bef9SDimitry Andric class CommonSymbol : public Symbol { 215e8d8bef9SDimitry Andric public: 216e8d8bef9SDimitry Andric CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 217e8d8bef9SDimitry Andric bool isPrivateExtern) 218fe6060f1SDimitry Andric : Symbol(CommonKind, name, file), size(size), 219e8d8bef9SDimitry Andric align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 220e8d8bef9SDimitry Andric privateExtern(isPrivateExtern) { 221e8d8bef9SDimitry Andric // TODO: cap maximum alignment 222e8d8bef9SDimitry Andric } 223e8d8bef9SDimitry Andric 224e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 225e8d8bef9SDimitry Andric 226e8d8bef9SDimitry Andric const uint64_t size; 227e8d8bef9SDimitry Andric const uint32_t align; 228e8d8bef9SDimitry Andric const bool privateExtern; 2295ffd83dbSDimitry Andric }; 2305ffd83dbSDimitry Andric 2315ffd83dbSDimitry Andric class DylibSymbol : public Symbol { 2325ffd83dbSDimitry Andric public: 233e8d8bef9SDimitry Andric DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 234e8d8bef9SDimitry Andric RefState refState, bool isTlv) 235fe6060f1SDimitry Andric : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 236fe6060f1SDimitry Andric tlv(isTlv) { 237fe6060f1SDimitry Andric if (file && refState > RefState::Unreferenced) 238fe6060f1SDimitry Andric file->numReferencedSymbols++; 239fe6060f1SDimitry Andric } 240e8d8bef9SDimitry Andric 241fe6060f1SDimitry Andric uint64_t getVA() const override; 242e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 2434824e7fdSDimitry Andric 2444824e7fdSDimitry Andric // Symbols from weak libraries/frameworks are also weakly-referenced. 2454824e7fdSDimitry Andric bool isWeakRef() const override { 2464824e7fdSDimitry Andric return refState == RefState::Weak || 2474824e7fdSDimitry Andric (file && getFile()->umbrella->forceWeakImport); 2484824e7fdSDimitry Andric } 249e8d8bef9SDimitry Andric bool isReferenced() const { return refState != RefState::Unreferenced; } 250e8d8bef9SDimitry Andric bool isTlv() const override { return tlv; } 251fe6060f1SDimitry Andric bool isDynamicLookup() const { return file == nullptr; } 252e8d8bef9SDimitry Andric bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 2535ffd83dbSDimitry Andric 254fe6060f1SDimitry Andric DylibFile *getFile() const { 255fe6060f1SDimitry Andric assert(!isDynamicLookup()); 256fe6060f1SDimitry Andric return cast<DylibFile>(file); 257fe6060f1SDimitry Andric } 258fe6060f1SDimitry Andric 2595ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 2605ffd83dbSDimitry Andric 261e8d8bef9SDimitry Andric uint32_t stubsHelperIndex = UINT32_MAX; 2625ffd83dbSDimitry Andric uint32_t lazyBindOffset = UINT32_MAX; 263e8d8bef9SDimitry Andric 264fe6060f1SDimitry Andric RefState getRefState() const { return refState; } 265fe6060f1SDimitry Andric 266fe6060f1SDimitry Andric void reference(RefState newState) { 267fe6060f1SDimitry Andric assert(newState > RefState::Unreferenced); 268fe6060f1SDimitry Andric if (refState == RefState::Unreferenced && file) 269fe6060f1SDimitry Andric getFile()->numReferencedSymbols++; 270fe6060f1SDimitry Andric refState = std::max(refState, newState); 271fe6060f1SDimitry Andric } 272fe6060f1SDimitry Andric 273fe6060f1SDimitry Andric void unreference() { 274fe6060f1SDimitry Andric // dynamic_lookup symbols have no file. 275fe6060f1SDimitry Andric if (refState > RefState::Unreferenced && file) { 276fe6060f1SDimitry Andric assert(getFile()->numReferencedSymbols > 0); 277fe6060f1SDimitry Andric getFile()->numReferencedSymbols--; 278fe6060f1SDimitry Andric } 279fe6060f1SDimitry Andric } 280e8d8bef9SDimitry Andric 281e8d8bef9SDimitry Andric private: 282fe6060f1SDimitry Andric RefState refState : 2; 283e8d8bef9SDimitry Andric const bool weakDef : 1; 284e8d8bef9SDimitry Andric const bool tlv : 1; 2855ffd83dbSDimitry Andric }; 2865ffd83dbSDimitry Andric 287*04eeddc0SDimitry Andric class LazyArchive : public Symbol { 2885ffd83dbSDimitry Andric public: 289*04eeddc0SDimitry Andric LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 290*04eeddc0SDimitry Andric : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 291fe6060f1SDimitry Andric 292fe6060f1SDimitry Andric ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 293fe6060f1SDimitry Andric void fetchArchiveMember(); 2945ffd83dbSDimitry Andric 295*04eeddc0SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 2965ffd83dbSDimitry Andric 2975ffd83dbSDimitry Andric private: 2985ffd83dbSDimitry Andric const llvm::object::Archive::Symbol sym; 2995ffd83dbSDimitry Andric }; 3005ffd83dbSDimitry Andric 301*04eeddc0SDimitry Andric // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 302*04eeddc0SDimitry Andric // --end-lib. 303*04eeddc0SDimitry Andric class LazyObject : public Symbol { 304*04eeddc0SDimitry Andric public: 305*04eeddc0SDimitry Andric LazyObject(InputFile &file, StringRef name) 306*04eeddc0SDimitry Andric : Symbol(LazyObjectKind, name, &file) { 307*04eeddc0SDimitry Andric isUsedInRegularObj = false; 308*04eeddc0SDimitry Andric } 309*04eeddc0SDimitry Andric 310*04eeddc0SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 311*04eeddc0SDimitry Andric }; 312*04eeddc0SDimitry Andric 3135ffd83dbSDimitry Andric union SymbolUnion { 3145ffd83dbSDimitry Andric alignas(Defined) char a[sizeof(Defined)]; 3155ffd83dbSDimitry Andric alignas(Undefined) char b[sizeof(Undefined)]; 316e8d8bef9SDimitry Andric alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 317e8d8bef9SDimitry Andric alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 318*04eeddc0SDimitry Andric alignas(LazyArchive) char e[sizeof(LazyArchive)]; 319*04eeddc0SDimitry Andric alignas(LazyObject) char f[sizeof(LazyObject)]; 3205ffd83dbSDimitry Andric }; 3215ffd83dbSDimitry Andric 3225ffd83dbSDimitry Andric template <typename T, typename... ArgT> 323e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&...arg) { 3245ffd83dbSDimitry Andric static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 3255ffd83dbSDimitry Andric static_assert(alignof(T) <= alignof(SymbolUnion), 3265ffd83dbSDimitry Andric "SymbolUnion not aligned enough"); 3275ffd83dbSDimitry Andric assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 3285ffd83dbSDimitry Andric "Not a Symbol"); 3295ffd83dbSDimitry Andric 330fe6060f1SDimitry Andric bool isUsedInRegularObj = s->isUsedInRegularObj; 331fe6060f1SDimitry Andric bool used = s->used; 332fe6060f1SDimitry Andric T *sym = new (s) T(std::forward<ArgT>(arg)...); 333fe6060f1SDimitry Andric sym->isUsedInRegularObj |= isUsedInRegularObj; 334fe6060f1SDimitry Andric sym->used |= used; 335fe6060f1SDimitry Andric return sym; 3365ffd83dbSDimitry Andric } 3375ffd83dbSDimitry Andric 3385ffd83dbSDimitry Andric } // namespace macho 3395ffd83dbSDimitry Andric 3405ffd83dbSDimitry Andric std::string toString(const macho::Symbol &); 341e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &); 342e8d8bef9SDimitry Andric 3435ffd83dbSDimitry Andric } // namespace lld 3445ffd83dbSDimitry Andric 3455ffd83dbSDimitry Andric #endif 346