15ffd83dbSDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #ifndef LLD_MACHO_SYMBOLS_H 105ffd83dbSDimitry Andric #define LLD_MACHO_SYMBOLS_H 115ffd83dbSDimitry Andric 12*fe6060f1SDimitry Andric #include "InputFiles.h" 135ffd83dbSDimitry Andric #include "InputSection.h" 145ffd83dbSDimitry Andric #include "Target.h" 155ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h" 165ffd83dbSDimitry Andric #include "lld/Common/Strings.h" 175ffd83dbSDimitry Andric #include "llvm/Object/Archive.h" 18e8d8bef9SDimitry Andric #include "llvm/Support/MathExtras.h" 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric namespace lld { 215ffd83dbSDimitry Andric namespace macho { 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric class InputSection; 24e8d8bef9SDimitry Andric class MachHeaderSection; 255ffd83dbSDimitry Andric 265ffd83dbSDimitry Andric struct StringRefZ { 275ffd83dbSDimitry Andric StringRefZ(const char *s) : data(s), size(-1) {} 285ffd83dbSDimitry Andric StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 295ffd83dbSDimitry Andric 305ffd83dbSDimitry Andric const char *data; 315ffd83dbSDimitry Andric const uint32_t size; 325ffd83dbSDimitry Andric }; 335ffd83dbSDimitry Andric 345ffd83dbSDimitry Andric class Symbol { 355ffd83dbSDimitry Andric public: 365ffd83dbSDimitry Andric enum Kind { 375ffd83dbSDimitry Andric DefinedKind, 385ffd83dbSDimitry Andric UndefinedKind, 39e8d8bef9SDimitry Andric CommonKind, 405ffd83dbSDimitry Andric DylibKind, 415ffd83dbSDimitry Andric LazyKind, 425ffd83dbSDimitry Andric }; 435ffd83dbSDimitry Andric 44e8d8bef9SDimitry Andric virtual ~Symbol() {} 45e8d8bef9SDimitry Andric 46*fe6060f1SDimitry Andric Kind kind() const { return symbolKind; } 475ffd83dbSDimitry Andric 48e8d8bef9SDimitry Andric StringRef getName() const { 49e8d8bef9SDimitry Andric if (nameSize == (uint32_t)-1) 50e8d8bef9SDimitry Andric nameSize = strlen(nameData); 51e8d8bef9SDimitry Andric return {nameData, nameSize}; 52e8d8bef9SDimitry Andric } 535ffd83dbSDimitry Andric 54*fe6060f1SDimitry Andric bool isLive() const; 555ffd83dbSDimitry Andric 56*fe6060f1SDimitry Andric virtual uint64_t getVA() const { return 0; } 575ffd83dbSDimitry Andric 58e8d8bef9SDimitry Andric virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 59e8d8bef9SDimitry Andric 60e8d8bef9SDimitry Andric // Only undefined or dylib symbols can be weak references. A weak reference 61e8d8bef9SDimitry Andric // need not be satisfied at runtime, e.g. due to the symbol not being 62e8d8bef9SDimitry Andric // available on a given target platform. 63e8d8bef9SDimitry Andric virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 64e8d8bef9SDimitry Andric 65e8d8bef9SDimitry Andric virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric // Whether this symbol is in the GOT or TLVPointer sections. 68e8d8bef9SDimitry Andric bool isInGot() const { return gotIndex != UINT32_MAX; } 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric // Whether this symbol is in the StubsSection. 71e8d8bef9SDimitry Andric bool isInStubs() const { return stubsIndex != UINT32_MAX; } 72e8d8bef9SDimitry Andric 73*fe6060f1SDimitry Andric uint64_t getStubVA() const; 74*fe6060f1SDimitry Andric uint64_t getGotVA() const; 75*fe6060f1SDimitry Andric uint64_t getTlvVA() const; 76*fe6060f1SDimitry Andric uint64_t resolveBranchVA() const { 77*fe6060f1SDimitry Andric assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 78*fe6060f1SDimitry Andric return isInStubs() ? getStubVA() : getVA(); 79*fe6060f1SDimitry Andric } 80*fe6060f1SDimitry Andric uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 81*fe6060f1SDimitry Andric uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 82*fe6060f1SDimitry Andric 83e8d8bef9SDimitry Andric // The index of this symbol in the GOT or the TLVPointer section, depending 84e8d8bef9SDimitry Andric // on whether it is a thread-local. A given symbol cannot be referenced by 85e8d8bef9SDimitry Andric // both these sections at once. 865ffd83dbSDimitry Andric uint32_t gotIndex = UINT32_MAX; 875ffd83dbSDimitry Andric 88e8d8bef9SDimitry Andric uint32_t stubsIndex = UINT32_MAX; 89e8d8bef9SDimitry Andric 90e8d8bef9SDimitry Andric uint32_t symtabIndex = UINT32_MAX; 91e8d8bef9SDimitry Andric 92*fe6060f1SDimitry Andric InputFile *getFile() const { return file; } 93*fe6060f1SDimitry Andric 945ffd83dbSDimitry Andric protected: 95*fe6060f1SDimitry Andric Symbol(Kind k, StringRefZ name, InputFile *file) 96*fe6060f1SDimitry Andric : symbolKind(k), nameData(name.data), nameSize(name.size), file(file), 97*fe6060f1SDimitry Andric isUsedInRegularObj(!file || isa<ObjFile>(file)), 98*fe6060f1SDimitry Andric used(!config->deadStrip) {} 995ffd83dbSDimitry Andric 1005ffd83dbSDimitry Andric Kind symbolKind; 101e8d8bef9SDimitry Andric const char *nameData; 102e8d8bef9SDimitry Andric mutable uint32_t nameSize; 103*fe6060f1SDimitry Andric InputFile *file; 104*fe6060f1SDimitry Andric 105*fe6060f1SDimitry Andric public: 106*fe6060f1SDimitry Andric // True if this symbol was referenced by a regular (non-bitcode) object. 107*fe6060f1SDimitry Andric bool isUsedInRegularObj : 1; 108*fe6060f1SDimitry Andric 109*fe6060f1SDimitry Andric // True if an undefined or dylib symbol is used from a live section. 110*fe6060f1SDimitry Andric bool used : 1; 1115ffd83dbSDimitry Andric }; 1125ffd83dbSDimitry Andric 1135ffd83dbSDimitry Andric class Defined : public Symbol { 1145ffd83dbSDimitry Andric public: 115*fe6060f1SDimitry Andric Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 116*fe6060f1SDimitry Andric uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 117*fe6060f1SDimitry Andric bool isThumb, bool isReferencedDynamically, bool noDeadStrip) 118*fe6060f1SDimitry Andric : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size), 119e8d8bef9SDimitry Andric overridesWeakDef(false), privateExtern(isPrivateExtern), 120*fe6060f1SDimitry Andric includeInSymtab(true), thumb(isThumb), 121*fe6060f1SDimitry Andric referencedDynamically(isReferencedDynamically), 122*fe6060f1SDimitry Andric noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) { 123*fe6060f1SDimitry Andric if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) 124*fe6060f1SDimitry Andric concatIsec->numRefs++; 125*fe6060f1SDimitry Andric } 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 128e8d8bef9SDimitry Andric bool isExternalWeakDef() const { 129e8d8bef9SDimitry Andric return isWeakDef() && isExternal() && !privateExtern; 130e8d8bef9SDimitry Andric } 131e8d8bef9SDimitry Andric bool isTlv() const override { 132*fe6060f1SDimitry Andric return !isAbsolute() && isThreadLocalVariables(isec->getFlags()); 133e8d8bef9SDimitry Andric } 134e8d8bef9SDimitry Andric 135e8d8bef9SDimitry Andric bool isExternal() const { return external; } 136e8d8bef9SDimitry Andric bool isAbsolute() const { return isec == nullptr; } 137e8d8bef9SDimitry Andric 138e8d8bef9SDimitry Andric uint64_t getVA() const override; 139e8d8bef9SDimitry Andric 140e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 1415ffd83dbSDimitry Andric 1425ffd83dbSDimitry Andric InputSection *isec; 143*fe6060f1SDimitry Andric // Contains the offset from the containing subsection. Note that this is 144*fe6060f1SDimitry Andric // different from nlist::n_value, which is the absolute address of the symbol. 145*fe6060f1SDimitry Andric uint64_t value; 146*fe6060f1SDimitry Andric // size is only calculated for regular (non-bitcode) symbols. 147*fe6060f1SDimitry Andric uint64_t size; 1485ffd83dbSDimitry Andric 149e8d8bef9SDimitry Andric bool overridesWeakDef : 1; 150*fe6060f1SDimitry Andric // Whether this symbol should appear in the output binary's export trie. 151e8d8bef9SDimitry Andric bool privateExtern : 1; 152*fe6060f1SDimitry Andric // Whether this symbol should appear in the output symbol table. 153*fe6060f1SDimitry Andric bool includeInSymtab : 1; 154*fe6060f1SDimitry Andric // Only relevant when compiling for Thumb-supporting arm32 archs. 155*fe6060f1SDimitry Andric bool thumb : 1; 156*fe6060f1SDimitry Andric // Symbols marked referencedDynamically won't be removed from the output's 157*fe6060f1SDimitry Andric // symbol table by tools like strip. In theory, this could be set on arbitrary 158*fe6060f1SDimitry Andric // symbols in input object files. In practice, it's used solely for the 159*fe6060f1SDimitry Andric // synthetic __mh_execute_header symbol. 160*fe6060f1SDimitry Andric // This is information for the static linker, and it's also written to the 161*fe6060f1SDimitry Andric // output file's symbol table for tools running later (such as `strip`). 162*fe6060f1SDimitry Andric bool referencedDynamically : 1; 163*fe6060f1SDimitry Andric // Set on symbols that should not be removed by dead code stripping. 164*fe6060f1SDimitry Andric // Set for example on `__attribute__((used))` globals, or on some Objective-C 165*fe6060f1SDimitry Andric // metadata. This is information only for the static linker and not written 166*fe6060f1SDimitry Andric // to the output. 167*fe6060f1SDimitry Andric bool noDeadStrip : 1; 168e8d8bef9SDimitry Andric 169e8d8bef9SDimitry Andric private: 170e8d8bef9SDimitry Andric const bool weakDef : 1; 171e8d8bef9SDimitry Andric const bool external : 1; 1725ffd83dbSDimitry Andric }; 1735ffd83dbSDimitry Andric 174e8d8bef9SDimitry Andric // This enum does double-duty: as a symbol property, it indicates whether & how 175e8d8bef9SDimitry Andric // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 176e8d8bef9SDimitry Andric // of referenced symbols contained within the file. If there are both weak 177e8d8bef9SDimitry Andric // and strong references to the same file, we will count the file as 178e8d8bef9SDimitry Andric // strongly-referenced. 179e8d8bef9SDimitry Andric enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 180e8d8bef9SDimitry Andric 1815ffd83dbSDimitry Andric class Undefined : public Symbol { 1825ffd83dbSDimitry Andric public: 183*fe6060f1SDimitry Andric Undefined(StringRefZ name, InputFile *file, RefState refState) 184*fe6060f1SDimitry Andric : Symbol(UndefinedKind, name, file), refState(refState) { 185e8d8bef9SDimitry Andric assert(refState != RefState::Unreferenced); 186e8d8bef9SDimitry Andric } 187e8d8bef9SDimitry Andric 188e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 191e8d8bef9SDimitry Andric 192e8d8bef9SDimitry Andric RefState refState : 2; 193e8d8bef9SDimitry Andric }; 194e8d8bef9SDimitry Andric 195e8d8bef9SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions without 196e8d8bef9SDimitry Andric // initialization expressions (such as "int foo;") to header files. These are 197e8d8bef9SDimitry Andric // called tentative definitions. 198e8d8bef9SDimitry Andric // 199e8d8bef9SDimitry Andric // Using tentative definitions is usually considered a bad practice; you should 200e8d8bef9SDimitry Andric // write only declarations (such as "extern int foo;") to header files. 201e8d8bef9SDimitry Andric // Nevertheless, the linker and the compiler have to do something to support 202e8d8bef9SDimitry Andric // bad code by allowing duplicate definitions for this particular case. 203e8d8bef9SDimitry Andric // 204e8d8bef9SDimitry Andric // The compiler creates common symbols when it sees tentative definitions. 205e8d8bef9SDimitry Andric // (You can suppress this behavior and let the compiler create a regular 206e8d8bef9SDimitry Andric // defined symbol by passing -fno-common. -fno-common is the default in clang 207e8d8bef9SDimitry Andric // as of LLVM 11.0.) When linking the final binary, if there are remaining 208e8d8bef9SDimitry Andric // common symbols after name resolution is complete, the linker converts them 209e8d8bef9SDimitry Andric // to regular defined symbols in a __common section. 210e8d8bef9SDimitry Andric class CommonSymbol : public Symbol { 211e8d8bef9SDimitry Andric public: 212e8d8bef9SDimitry Andric CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 213e8d8bef9SDimitry Andric bool isPrivateExtern) 214*fe6060f1SDimitry Andric : Symbol(CommonKind, name, file), size(size), 215e8d8bef9SDimitry Andric align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 216e8d8bef9SDimitry Andric privateExtern(isPrivateExtern) { 217e8d8bef9SDimitry Andric // TODO: cap maximum alignment 218e8d8bef9SDimitry Andric } 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 221e8d8bef9SDimitry Andric 222e8d8bef9SDimitry Andric const uint64_t size; 223e8d8bef9SDimitry Andric const uint32_t align; 224e8d8bef9SDimitry Andric const bool privateExtern; 2255ffd83dbSDimitry Andric }; 2265ffd83dbSDimitry Andric 2275ffd83dbSDimitry Andric class DylibSymbol : public Symbol { 2285ffd83dbSDimitry Andric public: 229e8d8bef9SDimitry Andric DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 230e8d8bef9SDimitry Andric RefState refState, bool isTlv) 231*fe6060f1SDimitry Andric : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 232*fe6060f1SDimitry Andric tlv(isTlv) { 233*fe6060f1SDimitry Andric if (file && refState > RefState::Unreferenced) 234*fe6060f1SDimitry Andric file->numReferencedSymbols++; 235*fe6060f1SDimitry Andric } 236e8d8bef9SDimitry Andric 237*fe6060f1SDimitry Andric uint64_t getVA() const override; 238e8d8bef9SDimitry Andric bool isWeakDef() const override { return weakDef; } 239e8d8bef9SDimitry Andric bool isWeakRef() const override { return refState == RefState::Weak; } 240e8d8bef9SDimitry Andric bool isReferenced() const { return refState != RefState::Unreferenced; } 241e8d8bef9SDimitry Andric bool isTlv() const override { return tlv; } 242*fe6060f1SDimitry Andric bool isDynamicLookup() const { return file == nullptr; } 243e8d8bef9SDimitry Andric bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 2445ffd83dbSDimitry Andric 245*fe6060f1SDimitry Andric DylibFile *getFile() const { 246*fe6060f1SDimitry Andric assert(!isDynamicLookup()); 247*fe6060f1SDimitry Andric return cast<DylibFile>(file); 248*fe6060f1SDimitry Andric } 249*fe6060f1SDimitry Andric 2505ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 2515ffd83dbSDimitry Andric 252e8d8bef9SDimitry Andric uint32_t stubsHelperIndex = UINT32_MAX; 2535ffd83dbSDimitry Andric uint32_t lazyBindOffset = UINT32_MAX; 254e8d8bef9SDimitry Andric 255*fe6060f1SDimitry Andric RefState getRefState() const { return refState; } 256*fe6060f1SDimitry Andric 257*fe6060f1SDimitry Andric void reference(RefState newState) { 258*fe6060f1SDimitry Andric assert(newState > RefState::Unreferenced); 259*fe6060f1SDimitry Andric if (refState == RefState::Unreferenced && file) 260*fe6060f1SDimitry Andric getFile()->numReferencedSymbols++; 261*fe6060f1SDimitry Andric refState = std::max(refState, newState); 262*fe6060f1SDimitry Andric } 263*fe6060f1SDimitry Andric 264*fe6060f1SDimitry Andric void unreference() { 265*fe6060f1SDimitry Andric // dynamic_lookup symbols have no file. 266*fe6060f1SDimitry Andric if (refState > RefState::Unreferenced && file) { 267*fe6060f1SDimitry Andric assert(getFile()->numReferencedSymbols > 0); 268*fe6060f1SDimitry Andric getFile()->numReferencedSymbols--; 269*fe6060f1SDimitry Andric } 270*fe6060f1SDimitry Andric } 271e8d8bef9SDimitry Andric 272e8d8bef9SDimitry Andric private: 273*fe6060f1SDimitry Andric RefState refState : 2; 274e8d8bef9SDimitry Andric const bool weakDef : 1; 275e8d8bef9SDimitry Andric const bool tlv : 1; 2765ffd83dbSDimitry Andric }; 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric class LazySymbol : public Symbol { 2795ffd83dbSDimitry Andric public: 2805ffd83dbSDimitry Andric LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 281*fe6060f1SDimitry Andric : Symbol(LazyKind, sym.getName(), file), sym(sym) {} 282*fe6060f1SDimitry Andric 283*fe6060f1SDimitry Andric ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 284*fe6060f1SDimitry Andric void fetchArchiveMember(); 2855ffd83dbSDimitry Andric 2865ffd83dbSDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 2875ffd83dbSDimitry Andric 2885ffd83dbSDimitry Andric private: 2895ffd83dbSDimitry Andric const llvm::object::Archive::Symbol sym; 2905ffd83dbSDimitry Andric }; 2915ffd83dbSDimitry Andric 2925ffd83dbSDimitry Andric union SymbolUnion { 2935ffd83dbSDimitry Andric alignas(Defined) char a[sizeof(Defined)]; 2945ffd83dbSDimitry Andric alignas(Undefined) char b[sizeof(Undefined)]; 295e8d8bef9SDimitry Andric alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 296e8d8bef9SDimitry Andric alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 297e8d8bef9SDimitry Andric alignas(LazySymbol) char e[sizeof(LazySymbol)]; 2985ffd83dbSDimitry Andric }; 2995ffd83dbSDimitry Andric 3005ffd83dbSDimitry Andric template <typename T, typename... ArgT> 301e8d8bef9SDimitry Andric T *replaceSymbol(Symbol *s, ArgT &&...arg) { 3025ffd83dbSDimitry Andric static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 3035ffd83dbSDimitry Andric static_assert(alignof(T) <= alignof(SymbolUnion), 3045ffd83dbSDimitry Andric "SymbolUnion not aligned enough"); 3055ffd83dbSDimitry Andric assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 3065ffd83dbSDimitry Andric "Not a Symbol"); 3075ffd83dbSDimitry Andric 308*fe6060f1SDimitry Andric bool isUsedInRegularObj = s->isUsedInRegularObj; 309*fe6060f1SDimitry Andric bool used = s->used; 310*fe6060f1SDimitry Andric T *sym = new (s) T(std::forward<ArgT>(arg)...); 311*fe6060f1SDimitry Andric sym->isUsedInRegularObj |= isUsedInRegularObj; 312*fe6060f1SDimitry Andric sym->used |= used; 313*fe6060f1SDimitry Andric return sym; 3145ffd83dbSDimitry Andric } 3155ffd83dbSDimitry Andric 3165ffd83dbSDimitry Andric } // namespace macho 3175ffd83dbSDimitry Andric 3185ffd83dbSDimitry Andric std::string toString(const macho::Symbol &); 319e8d8bef9SDimitry Andric std::string toMachOString(const llvm::object::Archive::Symbol &); 320e8d8bef9SDimitry Andric 3215ffd83dbSDimitry Andric } // namespace lld 3225ffd83dbSDimitry Andric 3235ffd83dbSDimitry Andric #endif 324