15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H 105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H 115ffd83dbSDimitry Andric 12*349cc55cSDimitry Andric #include "Config.h" 13fe6060f1SDimitry Andric #include "InputFiles.h" 145ffd83dbSDimitry Andric #include "Target.h" 155ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h" 165ffd83dbSDimitry Andric #include "lld/Common/Strings.h" 175ffd83dbSDimitry Andric #include "llvm/Object/Archive.h" 18e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric namespace lld { 215ffd83dbSDimitry Andric namespace macho { 225ffd83dbSDimitry Andric 23e8d8bef9SDimitry Andric class MachHeaderSection; 245ffd83dbSDimitry Andric 255ffd83dbSDimitry Andric struct StringRefZ { 265ffd83dbSDimitry Andric StringRefZ(const char *s) : data(s), size(-1) {} 275ffd83dbSDimitry Andric StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric const char *data; 305ffd83dbSDimitry Andric const uint32_t size; 315ffd83dbSDimitry Andric }; 325ffd83dbSDimitry Andric 335ffd83dbSDimitry Andric class Symbol { 345ffd83dbSDimitry Andric public: 355ffd83dbSDimitry Andric enum Kind { 365ffd83dbSDimitry Andric DefinedKind, 375ffd83dbSDimitry Andric UndefinedKind, 38e8d8bef9SDimitry Andric CommonKind, 395ffd83dbSDimitry Andric DylibKind, 405ffd83dbSDimitry Andric LazyKind, 415ffd83dbSDimitry Andric }; 425ffd83dbSDimitry Andric 43e8d8bef9SDimitry Andric virtual ~Symbol() {} 44e8d8bef9SDimitry Andric 45fe6060f1SDimitry Andric Kind kind() const { return symbolKind; } 465ffd83dbSDimitry Andric 47e8d8bef9SDimitry Andric StringRef getName() const { 48e8d8bef9SDimitry Andric if (nameSize == (uint32_t)-1) 49e8d8bef9SDimitry Andric nameSize = strlen(nameData); 50e8d8bef9SDimitry Andric return {nameData, nameSize}; 51e8d8bef9SDimitry Andric } 525ffd83dbSDimitry Andric 53*349cc55cSDimitry Andric bool isLive() const { return used; } 545ffd83dbSDimitry Andric 55fe6060f1SDimitry Andric virtual uint64_t getVA() const { return 0; } 565ffd83dbSDimitry Andric 57e8d8bef9SDimitry Andric virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 58e8d8bef9SDimitry Andric 59e8d8bef9SDimitry Andric // Only undefined or dylib symbols can be weak references. A weak reference 60e8d8bef9SDimitry Andric // need not be satisfied at runtime, e.g. due to the symbol not being 61e8d8bef9SDimitry Andric // available on a given target platform. 62e8d8bef9SDimitry Andric virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 63e8d8bef9SDimitry Andric 64e8d8bef9SDimitry Andric virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 65e8d8bef9SDimitry Andric 66e8d8bef9SDimitry Andric // Whether this symbol is in the GOT or TLVPointer sections. 67e8d8bef9SDimitry Andric bool isInGot() const { return gotIndex != UINT32_MAX; } 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric // Whether this symbol is in the StubsSection. 70e8d8bef9SDimitry Andric bool isInStubs() const { return stubsIndex != UINT32_MAX; } 71e8d8bef9SDimitry Andric 72fe6060f1SDimitry Andric uint64_t getStubVA() const; 73fe6060f1SDimitry Andric uint64_t getGotVA() const; 74fe6060f1SDimitry Andric uint64_t getTlvVA() const; 75fe6060f1SDimitry Andric uint64_t resolveBranchVA() const { 76fe6060f1SDimitry Andric assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 77fe6060f1SDimitry Andric return isInStubs() ? getStubVA() : getVA(); 78fe6060f1SDimitry Andric } 79fe6060f1SDimitry Andric uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 80fe6060f1SDimitry Andric uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 81fe6060f1SDimitry Andric 82e8d8bef9SDimitry Andric // The index of this symbol in the GOT or the TLVPointer section, depending 83e8d8bef9SDimitry Andric // on whether it is a thread-local. A given symbol cannot be referenced by 84e8d8bef9SDimitry Andric // both these sections at once. 855ffd83dbSDimitry Andric uint32_t gotIndex = UINT32_MAX; 865ffd83dbSDimitry Andric 87e8d8bef9SDimitry Andric uint32_t stubsIndex = UINT32_MAX; 88e8d8bef9SDimitry Andric 89e8d8bef9SDimitry Andric uint32_t symtabIndex = UINT32_MAX; 90e8d8bef9SDimitry Andric 91fe6060f1SDimitry Andric InputFile *getFile() const { return file; } 92fe6060f1SDimitry Andric 935ffd83dbSDimitry Andric protected: 94fe6060f1SDimitry Andric Symbol(Kind k, StringRefZ name, InputFile *file) 95*349cc55cSDimitry Andric : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 96fe6060f1SDimitry Andric isUsedInRegularObj(!file || isa<ObjFile>(file)), 97fe6060f1SDimitry Andric used(!config->deadStrip) {} 985ffd83dbSDimitry Andric 995ffd83dbSDimitry Andric Kind symbolKind; 100e8d8bef9SDimitry Andric const char *nameData; 101fe6060f1SDimitry Andric InputFile *file; 102*349cc55cSDimitry Andric mutable uint32_t nameSize; 103fe6060f1SDimitry Andric 104fe6060f1SDimitry Andric public: 105fe6060f1SDimitry Andric // True if this symbol was referenced by a regular (non-bitcode) object. 106fe6060f1SDimitry Andric bool isUsedInRegularObj : 1; 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric // True if an undefined or dylib symbol is used from a live section. 109fe6060f1SDimitry Andric bool used : 1; 1105ffd83dbSDimitry Andric }; 1115ffd83dbSDimitry Andric 1125ffd83dbSDimitry Andric class Defined : public Symbol { 1135ffd83dbSDimitry Andric public: 114fe6060f1SDimitry Andric Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 115fe6060f1SDimitry Andric uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 116*349cc55cSDimitry Andric bool isThumb, bool isReferencedDynamically, bool noDeadStrip, 117*349cc55cSDimitry Andric bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); 118e8d8bef9SDimitry Andric 119e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 120e8d8bef9SDimitry Andric bool isExternalWeakDef() const { 121e8d8bef9SDimitry Andric return isWeakDef() && isExternal() && !privateExtern; 122e8d8bef9SDimitry Andric } 123*349cc55cSDimitry Andric bool isTlv() const override; 124e8d8bef9SDimitry Andric 125e8d8bef9SDimitry Andric bool isExternal() const { return external; } 126e8d8bef9SDimitry Andric bool isAbsolute() const { return isec == nullptr; } 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric uint64_t getVA() const override; 129e8d8bef9SDimitry Andric 130*349cc55cSDimitry Andric // Ensure this symbol's pointers to InputSections point to their canonical 131*349cc55cSDimitry Andric // copies. 132*349cc55cSDimitry Andric void canonicalize(); 133*349cc55cSDimitry Andric 134e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 1355ffd83dbSDimitry Andric 136*349cc55cSDimitry Andric // Place the bitfields first so that they can get placed in the tail padding 137*349cc55cSDimitry Andric // of the parent class, on platforms which support it. 138e8d8bef9SDimitry Andric bool overridesWeakDef : 1; 139fe6060f1SDimitry Andric // Whether this symbol should appear in the output binary's export trie. 140e8d8bef9SDimitry Andric bool privateExtern : 1; 141fe6060f1SDimitry Andric // Whether this symbol should appear in the output symbol table. 142fe6060f1SDimitry Andric bool includeInSymtab : 1; 143fe6060f1SDimitry Andric // Only relevant when compiling for Thumb-supporting arm32 archs. 144fe6060f1SDimitry Andric bool thumb : 1; 145fe6060f1SDimitry Andric // Symbols marked referencedDynamically won't be removed from the output's 146fe6060f1SDimitry Andric // symbol table by tools like strip. In theory, this could be set on arbitrary 147fe6060f1SDimitry Andric // symbols in input object files. In practice, it's used solely for the 148fe6060f1SDimitry Andric // synthetic __mh_execute_header symbol. 149fe6060f1SDimitry Andric // This is information for the static linker, and it's also written to the 150fe6060f1SDimitry Andric // output file's symbol table for tools running later (such as `strip`). 151fe6060f1SDimitry Andric bool referencedDynamically : 1; 152fe6060f1SDimitry Andric // Set on symbols that should not be removed by dead code stripping. 153fe6060f1SDimitry Andric // Set for example on `__attribute__((used))` globals, or on some Objective-C 154fe6060f1SDimitry Andric // metadata. This is information only for the static linker and not written 155fe6060f1SDimitry Andric // to the output. 156fe6060f1SDimitry Andric bool noDeadStrip : 1; 157e8d8bef9SDimitry Andric 158*349cc55cSDimitry Andric bool weakDefCanBeHidden : 1; 159*349cc55cSDimitry Andric 160e8d8bef9SDimitry Andric private: 161e8d8bef9SDimitry Andric const bool weakDef : 1; 162e8d8bef9SDimitry Andric const bool external : 1; 163*349cc55cSDimitry Andric 164*349cc55cSDimitry Andric public: 165*349cc55cSDimitry Andric InputSection *isec; 166*349cc55cSDimitry Andric // Contains the offset from the containing subsection. Note that this is 167*349cc55cSDimitry Andric // different from nlist::n_value, which is the absolute address of the symbol. 168*349cc55cSDimitry Andric uint64_t value; 169*349cc55cSDimitry Andric // size is only calculated for regular (non-bitcode) symbols. 170*349cc55cSDimitry Andric uint64_t size; 171*349cc55cSDimitry Andric ConcatInputSection *unwindEntry = nullptr; 1725ffd83dbSDimitry Andric }; 1735ffd83dbSDimitry Andric 174e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how 175e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 176e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak 177e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as 178e8d8bef9SDimitry Andric // strongly-referenced. 179e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 180e8d8bef9SDimitry Andric 1815ffd83dbSDimitry Andric class Undefined : public Symbol { 1825ffd83dbSDimitry Andric public: 183fe6060f1SDimitry Andric Undefined(StringRefZ name, InputFile *file, RefState refState) 184fe6060f1SDimitry Andric : Symbol(UndefinedKind, name, file), refState(refState) { 185e8d8bef9SDimitry Andric assert(refState != RefState::Unreferenced); 186e8d8bef9SDimitry Andric } 187e8d8bef9SDimitry Andric 188e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 191e8d8bef9SDimitry Andric 192e8d8bef9SDimitry Andric RefState refState : 2; 193e8d8bef9SDimitry Andric }; 194e8d8bef9SDimitry Andric 195e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without 196e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are 197e8d8bef9SDimitry Andric // called tentative definitions. 198e8d8bef9SDimitry Andric // 199e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should 200e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files. 201e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support 202e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case. 203e8d8bef9SDimitry Andric // 204e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions. 205e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular 206e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang 207e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining 208e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them 209e8d8bef9SDimitry Andric // to regular defined symbols in a __common section. 210e8d8bef9SDimitry Andric class CommonSymbol : public Symbol { 211e8d8bef9SDimitry Andric public: 212e8d8bef9SDimitry Andric CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 213e8d8bef9SDimitry Andric bool isPrivateExtern) 214fe6060f1SDimitry Andric : Symbol(CommonKind, name, file), size(size), 215e8d8bef9SDimitry Andric align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 216e8d8bef9SDimitry Andric privateExtern(isPrivateExtern) { 217e8d8bef9SDimitry Andric // TODO: cap maximum alignment 218e8d8bef9SDimitry Andric } 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 221e8d8bef9SDimitry Andric 222e8d8bef9SDimitry Andric const uint64_t size; 223e8d8bef9SDimitry Andric const uint32_t align; 224e8d8bef9SDimitry Andric const bool privateExtern; 2255ffd83dbSDimitry Andric }; 2265ffd83dbSDimitry Andric 2275ffd83dbSDimitry Andric class DylibSymbol : public Symbol { 2285ffd83dbSDimitry Andric public: 229e8d8bef9SDimitry Andric DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 230e8d8bef9SDimitry Andric RefState refState, bool isTlv) 231fe6060f1SDimitry Andric : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 232fe6060f1SDimitry Andric tlv(isTlv) { 233fe6060f1SDimitry Andric if (file && refState > RefState::Unreferenced) 234fe6060f1SDimitry Andric file->numReferencedSymbols++; 235fe6060f1SDimitry Andric } 236e8d8bef9SDimitry Andric 237fe6060f1SDimitry Andric uint64_t getVA() const override; 238e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 239e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 240e8d8bef9SDimitry Andric bool isReferenced() const { return refState != RefState::Unreferenced; } 241e8d8bef9SDimitry Andric bool isTlv() const override { return tlv; } 242fe6060f1SDimitry Andric bool isDynamicLookup() const { return file == nullptr; } 243e8d8bef9SDimitry Andric bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 2445ffd83dbSDimitry Andric 245fe6060f1SDimitry Andric DylibFile *getFile() const { 246fe6060f1SDimitry Andric assert(!isDynamicLookup()); 247fe6060f1SDimitry Andric return cast<DylibFile>(file); 248fe6060f1SDimitry Andric } 249fe6060f1SDimitry Andric 2505ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 2515ffd83dbSDimitry Andric 252e8d8bef9SDimitry Andric uint32_t stubsHelperIndex = UINT32_MAX; 2535ffd83dbSDimitry Andric uint32_t lazyBindOffset = UINT32_MAX; 254e8d8bef9SDimitry Andric 255fe6060f1SDimitry Andric RefState getRefState() const { return refState; } 256fe6060f1SDimitry Andric 257fe6060f1SDimitry Andric void reference(RefState newState) { 258fe6060f1SDimitry Andric assert(newState > RefState::Unreferenced); 259fe6060f1SDimitry Andric if (refState == RefState::Unreferenced && file) 260fe6060f1SDimitry Andric getFile()->numReferencedSymbols++; 261fe6060f1SDimitry Andric refState = std::max(refState, newState); 262fe6060f1SDimitry Andric } 263fe6060f1SDimitry Andric 264fe6060f1SDimitry Andric void unreference() { 265fe6060f1SDimitry Andric // dynamic_lookup symbols have no file. 266fe6060f1SDimitry Andric if (refState > RefState::Unreferenced && file) { 267fe6060f1SDimitry Andric assert(getFile()->numReferencedSymbols > 0); 268fe6060f1SDimitry Andric getFile()->numReferencedSymbols--; 269fe6060f1SDimitry Andric } 270fe6060f1SDimitry Andric } 271e8d8bef9SDimitry Andric 272e8d8bef9SDimitry Andric private: 273fe6060f1SDimitry Andric RefState refState : 2; 274e8d8bef9SDimitry Andric const bool weakDef : 1; 275e8d8bef9SDimitry Andric const bool tlv : 1; 2765ffd83dbSDimitry Andric }; 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric class LazySymbol : public Symbol { 2795ffd83dbSDimitry Andric public: 2805ffd83dbSDimitry Andric LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 281fe6060f1SDimitry Andric : Symbol(LazyKind, sym.getName(), file), sym(sym) {} 282fe6060f1SDimitry Andric 283fe6060f1SDimitry Andric ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 284fe6060f1SDimitry Andric void fetchArchiveMember(); 2855ffd83dbSDimitry Andric 2865ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 2875ffd83dbSDimitry Andric 2885ffd83dbSDimitry Andric private: 2895ffd83dbSDimitry Andric const llvm::object::Archive::Symbol sym; 2905ffd83dbSDimitry Andric }; 2915ffd83dbSDimitry Andric 2925ffd83dbSDimitry Andric union SymbolUnion { 2935ffd83dbSDimitry Andric alignas(Defined) char a[sizeof(Defined)]; 2945ffd83dbSDimitry Andric alignas(Undefined) char b[sizeof(Undefined)]; 295e8d8bef9SDimitry Andric alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 296e8d8bef9SDimitry Andric alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 297e8d8bef9SDimitry Andric alignas(LazySymbol) char e[sizeof(LazySymbol)]; 2985ffd83dbSDimitry Andric }; 2995ffd83dbSDimitry Andric 3005ffd83dbSDimitry Andric template <typename T, typename... ArgT> 301e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&...arg) { 3025ffd83dbSDimitry Andric static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 3035ffd83dbSDimitry Andric static_assert(alignof(T) <= alignof(SymbolUnion), 3045ffd83dbSDimitry Andric "SymbolUnion not aligned enough"); 3055ffd83dbSDimitry Andric assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 3065ffd83dbSDimitry Andric "Not a Symbol"); 3075ffd83dbSDimitry Andric 308fe6060f1SDimitry Andric bool isUsedInRegularObj = s->isUsedInRegularObj; 309fe6060f1SDimitry Andric bool used = s->used; 310fe6060f1SDimitry Andric T *sym = new (s) T(std::forward<ArgT>(arg)...); 311fe6060f1SDimitry Andric sym->isUsedInRegularObj |= isUsedInRegularObj; 312fe6060f1SDimitry Andric sym->used |= used; 313fe6060f1SDimitry Andric return sym; 3145ffd83dbSDimitry Andric } 3155ffd83dbSDimitry Andric 3165ffd83dbSDimitry Andric } // namespace macho 3175ffd83dbSDimitry Andric 3185ffd83dbSDimitry Andric std::string toString(const macho::Symbol &); 319e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &); 320e8d8bef9SDimitry Andric 3215ffd83dbSDimitry Andric } // namespace lld 3225ffd83dbSDimitry Andric 3235ffd83dbSDimitry Andric #endif 324