1*0b57cec5SDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file defines various types of Symbols. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #ifndef LLD_ELF_SYMBOLS_H 14*0b57cec5SDimitry Andric #define LLD_ELF_SYMBOLS_H 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric #include "InputFiles.h" 17*0b57cec5SDimitry Andric #include "InputSection.h" 18*0b57cec5SDimitry Andric #include "lld/Common/LLVM.h" 19*0b57cec5SDimitry Andric #include "lld/Common/Strings.h" 20*0b57cec5SDimitry Andric #include "llvm/Object/Archive.h" 21*0b57cec5SDimitry Andric #include "llvm/Object/ELF.h" 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric namespace lld { 24*0b57cec5SDimitry Andric namespace elf { 25*0b57cec5SDimitry Andric class CommonSymbol; 26*0b57cec5SDimitry Andric class Defined; 27*0b57cec5SDimitry Andric class InputFile; 28*0b57cec5SDimitry Andric class LazyArchive; 29*0b57cec5SDimitry Andric class LazyObject; 30*0b57cec5SDimitry Andric class SharedSymbol; 31*0b57cec5SDimitry Andric class Symbol; 32*0b57cec5SDimitry Andric class Undefined; 33*0b57cec5SDimitry Andric } // namespace elf 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric std::string toString(const elf::Symbol &); 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric // There are two different ways to convert an Archive::Symbol to a string: 38*0b57cec5SDimitry Andric // One for Microsoft name mangling and one for Itanium name mangling. 39*0b57cec5SDimitry Andric // Call the functions toCOFFString and toELFString, not just toString. 40*0b57cec5SDimitry Andric std::string toELFString(const elf::Archive::Symbol &); 41*0b57cec5SDimitry Andric 42*0b57cec5SDimitry Andric namespace elf { 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric // This is a StringRef-like container that doesn't run strlen(). 45*0b57cec5SDimitry Andric // 46*0b57cec5SDimitry Andric // ELF string tables contain a lot of null-terminated strings. Most of them 47*0b57cec5SDimitry Andric // are not necessary for the linker because they are names of local symbols, 48*0b57cec5SDimitry Andric // and the linker doesn't use local symbol names for name resolution. So, we 49*0b57cec5SDimitry Andric // use this class to represents strings read from string tables. 50*0b57cec5SDimitry Andric struct StringRefZ { 51*0b57cec5SDimitry Andric StringRefZ(const char *s) : data(s), size(-1) {} 52*0b57cec5SDimitry Andric StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric const char *data; 55*0b57cec5SDimitry Andric const uint32_t size; 56*0b57cec5SDimitry Andric }; 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric // The base class for real symbol classes. 59*0b57cec5SDimitry Andric class Symbol { 60*0b57cec5SDimitry Andric public: 61*0b57cec5SDimitry Andric enum Kind { 62*0b57cec5SDimitry Andric PlaceholderKind, 63*0b57cec5SDimitry Andric DefinedKind, 64*0b57cec5SDimitry Andric CommonKind, 65*0b57cec5SDimitry Andric SharedKind, 66*0b57cec5SDimitry Andric UndefinedKind, 67*0b57cec5SDimitry Andric LazyArchiveKind, 68*0b57cec5SDimitry Andric LazyObjectKind, 69*0b57cec5SDimitry Andric }; 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric Kind kind() const { return static_cast<Kind>(symbolKind); } 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric // The file from which this symbol was created. 74*0b57cec5SDimitry Andric InputFile *file; 75*0b57cec5SDimitry Andric 76*0b57cec5SDimitry Andric protected: 77*0b57cec5SDimitry Andric const char *nameData; 78*0b57cec5SDimitry Andric mutable uint32_t nameSize; 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric public: 81*0b57cec5SDimitry Andric uint32_t dynsymIndex = 0; 82*0b57cec5SDimitry Andric uint32_t gotIndex = -1; 83*0b57cec5SDimitry Andric uint32_t pltIndex = -1; 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric uint32_t globalDynIndex = -1; 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric // This field is a index to the symbol's version definition. 88*0b57cec5SDimitry Andric uint32_t verdefIndex = -1; 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric // Version definition index. 91*0b57cec5SDimitry Andric uint16_t versionId; 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric // An index into the .branch_lt section on PPC64. 94*0b57cec5SDimitry Andric uint16_t ppc64BranchltIndex = -1; 95*0b57cec5SDimitry Andric 96*0b57cec5SDimitry Andric // Symbol binding. This is not overwritten by replace() to track 97*0b57cec5SDimitry Andric // changes during resolution. In particular: 98*0b57cec5SDimitry Andric // - An undefined weak is still weak when it resolves to a shared library. 99*0b57cec5SDimitry Andric // - An undefined weak will not fetch archive members, but we have to 100*0b57cec5SDimitry Andric // remember it is weak. 101*0b57cec5SDimitry Andric uint8_t binding; 102*0b57cec5SDimitry Andric 103*0b57cec5SDimitry Andric // The following fields have the same meaning as the ELF symbol attributes. 104*0b57cec5SDimitry Andric uint8_t type; // symbol type 105*0b57cec5SDimitry Andric uint8_t stOther; // st_other field value 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric uint8_t symbolKind; 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric // Symbol visibility. This is the computed minimum visibility of all 110*0b57cec5SDimitry Andric // observed non-DSO symbols. 111*0b57cec5SDimitry Andric unsigned visibility : 2; 112*0b57cec5SDimitry Andric 113*0b57cec5SDimitry Andric // True if the symbol was used for linking and thus need to be added to the 114*0b57cec5SDimitry Andric // output file's symbol table. This is true for all symbols except for 115*0b57cec5SDimitry Andric // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that 116*0b57cec5SDimitry Andric // are unreferenced except by other bitcode objects. 117*0b57cec5SDimitry Andric unsigned isUsedInRegularObj : 1; 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric // If this flag is true and the symbol has protected or default visibility, it 120*0b57cec5SDimitry Andric // will appear in .dynsym. This flag is set by interposable DSO symbols in 121*0b57cec5SDimitry Andric // executables, by most symbols in DSOs and executables built with 122*0b57cec5SDimitry Andric // --export-dynamic, and by dynamic lists. 123*0b57cec5SDimitry Andric unsigned exportDynamic : 1; 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric // False if LTO shouldn't inline whatever this symbol points to. If a symbol 126*0b57cec5SDimitry Andric // is overwritten after LTO, LTO shouldn't inline the symbol because it 127*0b57cec5SDimitry Andric // doesn't know the final contents of the symbol. 128*0b57cec5SDimitry Andric unsigned canInline : 1; 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric // True if this symbol is specified by --trace-symbol option. 131*0b57cec5SDimitry Andric unsigned traced : 1; 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric inline void replace(const Symbol &New); 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric bool includeInDynsym() const; 136*0b57cec5SDimitry Andric uint8_t computeBinding() const; 137*0b57cec5SDimitry Andric bool isWeak() const { return binding == llvm::ELF::STB_WEAK; } 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric bool isUndefined() const { return symbolKind == UndefinedKind; } 140*0b57cec5SDimitry Andric bool isCommon() const { return symbolKind == CommonKind; } 141*0b57cec5SDimitry Andric bool isDefined() const { return symbolKind == DefinedKind; } 142*0b57cec5SDimitry Andric bool isShared() const { return symbolKind == SharedKind; } 143*0b57cec5SDimitry Andric bool isPlaceholder() const { return symbolKind == PlaceholderKind; } 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry Andric bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; } 146*0b57cec5SDimitry Andric 147*0b57cec5SDimitry Andric bool isLazy() const { 148*0b57cec5SDimitry Andric return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric // True if this is an undefined weak symbol. This only works once 152*0b57cec5SDimitry Andric // all input files have been added. 153*0b57cec5SDimitry Andric bool isUndefWeak() const { 154*0b57cec5SDimitry Andric // See comment on lazy symbols for details. 155*0b57cec5SDimitry Andric return isWeak() && (isUndefined() || isLazy()); 156*0b57cec5SDimitry Andric } 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric StringRef getName() const { 159*0b57cec5SDimitry Andric if (nameSize == (uint32_t)-1) 160*0b57cec5SDimitry Andric nameSize = strlen(nameData); 161*0b57cec5SDimitry Andric return {nameData, nameSize}; 162*0b57cec5SDimitry Andric } 163*0b57cec5SDimitry Andric 164*0b57cec5SDimitry Andric void setName(StringRef s) { 165*0b57cec5SDimitry Andric nameData = s.data(); 166*0b57cec5SDimitry Andric nameSize = s.size(); 167*0b57cec5SDimitry Andric } 168*0b57cec5SDimitry Andric 169*0b57cec5SDimitry Andric void parseSymbolVersion(); 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric bool isInGot() const { return gotIndex != -1U; } 172*0b57cec5SDimitry Andric bool isInPlt() const { return pltIndex != -1U; } 173*0b57cec5SDimitry Andric bool isInPPC64Branchlt() const { return ppc64BranchltIndex != 0xffff; } 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andric uint64_t getVA(int64_t addend = 0) const; 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric uint64_t getGotOffset() const; 178*0b57cec5SDimitry Andric uint64_t getGotVA() const; 179*0b57cec5SDimitry Andric uint64_t getGotPltOffset() const; 180*0b57cec5SDimitry Andric uint64_t getGotPltVA() const; 181*0b57cec5SDimitry Andric uint64_t getPltVA() const; 182*0b57cec5SDimitry Andric uint64_t getPPC64LongBranchTableVA() const; 183*0b57cec5SDimitry Andric uint64_t getPPC64LongBranchOffset() const; 184*0b57cec5SDimitry Andric uint64_t getSize() const; 185*0b57cec5SDimitry Andric OutputSection *getOutputSection() const; 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric // The following two functions are used for symbol resolution. 188*0b57cec5SDimitry Andric // 189*0b57cec5SDimitry Andric // You are expected to call mergeProperties for all symbols in input 190*0b57cec5SDimitry Andric // files so that attributes that are attached to names rather than 191*0b57cec5SDimitry Andric // indivisual symbol (such as visibility) are merged together. 192*0b57cec5SDimitry Andric // 193*0b57cec5SDimitry Andric // Every time you read a new symbol from an input, you are supposed 194*0b57cec5SDimitry Andric // to call resolve() with the new symbol. That function replaces 195*0b57cec5SDimitry Andric // "this" object as a result of name resolution if the new symbol is 196*0b57cec5SDimitry Andric // more appropriate to be included in the output. 197*0b57cec5SDimitry Andric // 198*0b57cec5SDimitry Andric // For example, if "this" is an undefined symbol and a new symbol is 199*0b57cec5SDimitry Andric // a defined symbol, "this" is replaced with the new symbol. 200*0b57cec5SDimitry Andric void mergeProperties(const Symbol &other); 201*0b57cec5SDimitry Andric void resolve(const Symbol &other); 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric // If this is a lazy symbol, fetch an input file and add the symbol 204*0b57cec5SDimitry Andric // in the file to the symbol table. Calling this function on 205*0b57cec5SDimitry Andric // non-lazy object causes a runtime error. 206*0b57cec5SDimitry Andric void fetch() const; 207*0b57cec5SDimitry Andric 208*0b57cec5SDimitry Andric private: 209*0b57cec5SDimitry Andric static bool isExportDynamic(Kind k, uint8_t visibility) { 210*0b57cec5SDimitry Andric if (k == SharedKind) 211*0b57cec5SDimitry Andric return visibility == llvm::ELF::STV_DEFAULT; 212*0b57cec5SDimitry Andric return config->shared || config->exportDynamic; 213*0b57cec5SDimitry Andric } 214*0b57cec5SDimitry Andric 215*0b57cec5SDimitry Andric void resolveUndefined(const Undefined &other); 216*0b57cec5SDimitry Andric void resolveCommon(const CommonSymbol &other); 217*0b57cec5SDimitry Andric void resolveDefined(const Defined &other); 218*0b57cec5SDimitry Andric template <class LazyT> void resolveLazy(const LazyT &other); 219*0b57cec5SDimitry Andric void resolveShared(const SharedSymbol &other); 220*0b57cec5SDimitry Andric 221*0b57cec5SDimitry Andric int compare(const Symbol *other) const; 222*0b57cec5SDimitry Andric 223*0b57cec5SDimitry Andric inline size_t getSymbolSize() const; 224*0b57cec5SDimitry Andric 225*0b57cec5SDimitry Andric protected: 226*0b57cec5SDimitry Andric Symbol(Kind k, InputFile *file, StringRefZ name, uint8_t binding, 227*0b57cec5SDimitry Andric uint8_t stOther, uint8_t type) 228*0b57cec5SDimitry Andric : file(file), nameData(name.data), nameSize(name.size), binding(binding), 229*0b57cec5SDimitry Andric type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3), 230*0b57cec5SDimitry Andric isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind), 231*0b57cec5SDimitry Andric exportDynamic(isExportDynamic(k, visibility)), canInline(false), 232*0b57cec5SDimitry Andric traced(false), needsPltAddr(false), isInIplt(false), gotInIgot(false), 233*0b57cec5SDimitry Andric isPreemptible(false), used(!config->gcSections), needsTocRestore(false), 234*0b57cec5SDimitry Andric scriptDefined(false) {} 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric public: 237*0b57cec5SDimitry Andric // True the symbol should point to its PLT entry. 238*0b57cec5SDimitry Andric // For SharedSymbol only. 239*0b57cec5SDimitry Andric unsigned needsPltAddr : 1; 240*0b57cec5SDimitry Andric 241*0b57cec5SDimitry Andric // True if this symbol is in the Iplt sub-section of the Plt and the Igot 242*0b57cec5SDimitry Andric // sub-section of the .got.plt or .got. 243*0b57cec5SDimitry Andric unsigned isInIplt : 1; 244*0b57cec5SDimitry Andric 245*0b57cec5SDimitry Andric // True if this symbol needs a GOT entry and its GOT entry is actually in 246*0b57cec5SDimitry Andric // Igot. This will be true only for certain non-preemptible ifuncs. 247*0b57cec5SDimitry Andric unsigned gotInIgot : 1; 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric // True if this symbol is preemptible at load time. 250*0b57cec5SDimitry Andric unsigned isPreemptible : 1; 251*0b57cec5SDimitry Andric 252*0b57cec5SDimitry Andric // True if an undefined or shared symbol is used from a live section. 253*0b57cec5SDimitry Andric unsigned used : 1; 254*0b57cec5SDimitry Andric 255*0b57cec5SDimitry Andric // True if a call to this symbol needs to be followed by a restore of the 256*0b57cec5SDimitry Andric // PPC64 toc pointer. 257*0b57cec5SDimitry Andric unsigned needsTocRestore : 1; 258*0b57cec5SDimitry Andric 259*0b57cec5SDimitry Andric // True if this symbol is defined by a linker script. 260*0b57cec5SDimitry Andric unsigned scriptDefined : 1; 261*0b57cec5SDimitry Andric 262*0b57cec5SDimitry Andric // The partition whose dynamic symbol table contains this symbol's definition. 263*0b57cec5SDimitry Andric uint8_t partition = 1; 264*0b57cec5SDimitry Andric 265*0b57cec5SDimitry Andric bool isSection() const { return type == llvm::ELF::STT_SECTION; } 266*0b57cec5SDimitry Andric bool isTls() const { return type == llvm::ELF::STT_TLS; } 267*0b57cec5SDimitry Andric bool isFunc() const { return type == llvm::ELF::STT_FUNC; } 268*0b57cec5SDimitry Andric bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; } 269*0b57cec5SDimitry Andric bool isObject() const { return type == llvm::ELF::STT_OBJECT; } 270*0b57cec5SDimitry Andric bool isFile() const { return type == llvm::ELF::STT_FILE; } 271*0b57cec5SDimitry Andric }; 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric // Represents a symbol that is defined in the current output file. 274*0b57cec5SDimitry Andric class Defined : public Symbol { 275*0b57cec5SDimitry Andric public: 276*0b57cec5SDimitry Andric Defined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, 277*0b57cec5SDimitry Andric uint8_t type, uint64_t value, uint64_t size, SectionBase *section) 278*0b57cec5SDimitry Andric : Symbol(DefinedKind, file, name, binding, stOther, type), value(value), 279*0b57cec5SDimitry Andric size(size), section(section) {} 280*0b57cec5SDimitry Andric 281*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->isDefined(); } 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric uint64_t value; 284*0b57cec5SDimitry Andric uint64_t size; 285*0b57cec5SDimitry Andric SectionBase *section; 286*0b57cec5SDimitry Andric }; 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric // Represents a common symbol. 289*0b57cec5SDimitry Andric // 290*0b57cec5SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions 291*0b57cec5SDimitry Andric // without initialization expressions (such as "int foo;") to header 292*0b57cec5SDimitry Andric // files. Such definition is called "tentative definition". 293*0b57cec5SDimitry Andric // 294*0b57cec5SDimitry Andric // Using tentative definition is usually considered a bad practice 295*0b57cec5SDimitry Andric // because you should write only declarations (such as "extern int 296*0b57cec5SDimitry Andric // foo;") to header files. Nevertheless, the linker and the compiler 297*0b57cec5SDimitry Andric // have to do something to support bad code by allowing duplicate 298*0b57cec5SDimitry Andric // definitions for this particular case. 299*0b57cec5SDimitry Andric // 300*0b57cec5SDimitry Andric // Common symbols represent variable definitions without initializations. 301*0b57cec5SDimitry Andric // The compiler creates common symbols when it sees varaible definitions 302*0b57cec5SDimitry Andric // without initialization (you can suppress this behavior and let the 303*0b57cec5SDimitry Andric // compiler create a regular defined symbol by -fno-common). 304*0b57cec5SDimitry Andric // 305*0b57cec5SDimitry Andric // The linker allows common symbols to be replaced by regular defined 306*0b57cec5SDimitry Andric // symbols. If there are remaining common symbols after name resolution is 307*0b57cec5SDimitry Andric // complete, they are converted to regular defined symbols in a .bss 308*0b57cec5SDimitry Andric // section. (Therefore, the later passes don't see any CommonSymbols.) 309*0b57cec5SDimitry Andric class CommonSymbol : public Symbol { 310*0b57cec5SDimitry Andric public: 311*0b57cec5SDimitry Andric CommonSymbol(InputFile *file, StringRefZ name, uint8_t binding, 312*0b57cec5SDimitry Andric uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size) 313*0b57cec5SDimitry Andric : Symbol(CommonKind, file, name, binding, stOther, type), 314*0b57cec5SDimitry Andric alignment(alignment), size(size) {} 315*0b57cec5SDimitry Andric 316*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->isCommon(); } 317*0b57cec5SDimitry Andric 318*0b57cec5SDimitry Andric uint32_t alignment; 319*0b57cec5SDimitry Andric uint64_t size; 320*0b57cec5SDimitry Andric }; 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric class Undefined : public Symbol { 323*0b57cec5SDimitry Andric public: 324*0b57cec5SDimitry Andric Undefined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther, 325*0b57cec5SDimitry Andric uint8_t type, uint32_t discardedSecIdx = 0) 326*0b57cec5SDimitry Andric : Symbol(UndefinedKind, file, name, binding, stOther, type), 327*0b57cec5SDimitry Andric discardedSecIdx(discardedSecIdx) {} 328*0b57cec5SDimitry Andric 329*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 330*0b57cec5SDimitry Andric 331*0b57cec5SDimitry Andric // The section index if in a discarded section, 0 otherwise. 332*0b57cec5SDimitry Andric uint32_t discardedSecIdx; 333*0b57cec5SDimitry Andric }; 334*0b57cec5SDimitry Andric 335*0b57cec5SDimitry Andric class SharedSymbol : public Symbol { 336*0b57cec5SDimitry Andric public: 337*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == SharedKind; } 338*0b57cec5SDimitry Andric 339*0b57cec5SDimitry Andric SharedSymbol(InputFile &file, StringRef name, uint8_t binding, 340*0b57cec5SDimitry Andric uint8_t stOther, uint8_t type, uint64_t value, uint64_t size, 341*0b57cec5SDimitry Andric uint32_t alignment, uint32_t verdefIndex) 342*0b57cec5SDimitry Andric : Symbol(SharedKind, &file, name, binding, stOther, type), value(value), 343*0b57cec5SDimitry Andric size(size), alignment(alignment) { 344*0b57cec5SDimitry Andric this->verdefIndex = verdefIndex; 345*0b57cec5SDimitry Andric // GNU ifunc is a mechanism to allow user-supplied functions to 346*0b57cec5SDimitry Andric // resolve PLT slot values at load-time. This is contrary to the 347*0b57cec5SDimitry Andric // regular symbol resolution scheme in which symbols are resolved just 348*0b57cec5SDimitry Andric // by name. Using this hook, you can program how symbols are solved 349*0b57cec5SDimitry Andric // for you program. For example, you can make "memcpy" to be resolved 350*0b57cec5SDimitry Andric // to a SSE-enabled version of memcpy only when a machine running the 351*0b57cec5SDimitry Andric // program supports the SSE instruction set. 352*0b57cec5SDimitry Andric // 353*0b57cec5SDimitry Andric // Naturally, such symbols should always be called through their PLT 354*0b57cec5SDimitry Andric // slots. What GNU ifunc symbols point to are resolver functions, and 355*0b57cec5SDimitry Andric // calling them directly doesn't make sense (unless you are writing a 356*0b57cec5SDimitry Andric // loader). 357*0b57cec5SDimitry Andric // 358*0b57cec5SDimitry Andric // For DSO symbols, we always call them through PLT slots anyway. 359*0b57cec5SDimitry Andric // So there's no difference between GNU ifunc and regular function 360*0b57cec5SDimitry Andric // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC. 361*0b57cec5SDimitry Andric if (this->type == llvm::ELF::STT_GNU_IFUNC) 362*0b57cec5SDimitry Andric this->type = llvm::ELF::STT_FUNC; 363*0b57cec5SDimitry Andric } 364*0b57cec5SDimitry Andric 365*0b57cec5SDimitry Andric SharedFile &getFile() const { return *cast<SharedFile>(file); } 366*0b57cec5SDimitry Andric 367*0b57cec5SDimitry Andric uint64_t value; // st_value 368*0b57cec5SDimitry Andric uint64_t size; // st_size 369*0b57cec5SDimitry Andric uint32_t alignment; 370*0b57cec5SDimitry Andric 371*0b57cec5SDimitry Andric // This is true if there has been at least one undefined reference to the 372*0b57cec5SDimitry Andric // symbol. The binding may change to STB_WEAK if the first undefined reference 373*0b57cec5SDimitry Andric // is weak. 374*0b57cec5SDimitry Andric bool referenced = false; 375*0b57cec5SDimitry Andric }; 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric // LazyArchive and LazyObject represent a symbols that is not yet in the link, 378*0b57cec5SDimitry Andric // but we know where to find it if needed. If the resolver finds both Undefined 379*0b57cec5SDimitry Andric // and Lazy for the same name, it will ask the Lazy to load a file. 380*0b57cec5SDimitry Andric // 381*0b57cec5SDimitry Andric // A special complication is the handling of weak undefined symbols. They should 382*0b57cec5SDimitry Andric // not load a file, but we have to remember we have seen both the weak undefined 383*0b57cec5SDimitry Andric // and the lazy. We represent that with a lazy symbol with a weak binding. This 384*0b57cec5SDimitry Andric // means that code looking for undefined symbols normally also has to take lazy 385*0b57cec5SDimitry Andric // symbols into consideration. 386*0b57cec5SDimitry Andric 387*0b57cec5SDimitry Andric // This class represents a symbol defined in an archive file. It is 388*0b57cec5SDimitry Andric // created from an archive file header, and it knows how to load an 389*0b57cec5SDimitry Andric // object file from an archive to replace itself with a defined 390*0b57cec5SDimitry Andric // symbol. 391*0b57cec5SDimitry Andric class LazyArchive : public Symbol { 392*0b57cec5SDimitry Andric public: 393*0b57cec5SDimitry Andric LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s) 394*0b57cec5SDimitry Andric : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL, 395*0b57cec5SDimitry Andric llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE), 396*0b57cec5SDimitry Andric sym(s) {} 397*0b57cec5SDimitry Andric 398*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 399*0b57cec5SDimitry Andric 400*0b57cec5SDimitry Andric MemoryBufferRef getMemberBuffer(); 401*0b57cec5SDimitry Andric 402*0b57cec5SDimitry Andric const llvm::object::Archive::Symbol sym; 403*0b57cec5SDimitry Andric }; 404*0b57cec5SDimitry Andric 405*0b57cec5SDimitry Andric // LazyObject symbols represents symbols in object files between 406*0b57cec5SDimitry Andric // --start-lib and --end-lib options. 407*0b57cec5SDimitry Andric class LazyObject : public Symbol { 408*0b57cec5SDimitry Andric public: 409*0b57cec5SDimitry Andric LazyObject(InputFile &file, StringRef name) 410*0b57cec5SDimitry Andric : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL, 411*0b57cec5SDimitry Andric llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {} 412*0b57cec5SDimitry Andric 413*0b57cec5SDimitry Andric static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 414*0b57cec5SDimitry Andric }; 415*0b57cec5SDimitry Andric 416*0b57cec5SDimitry Andric // Some linker-generated symbols need to be created as 417*0b57cec5SDimitry Andric // Defined symbols. 418*0b57cec5SDimitry Andric struct ElfSym { 419*0b57cec5SDimitry Andric // __bss_start 420*0b57cec5SDimitry Andric static Defined *bss; 421*0b57cec5SDimitry Andric 422*0b57cec5SDimitry Andric // etext and _etext 423*0b57cec5SDimitry Andric static Defined *etext1; 424*0b57cec5SDimitry Andric static Defined *etext2; 425*0b57cec5SDimitry Andric 426*0b57cec5SDimitry Andric // edata and _edata 427*0b57cec5SDimitry Andric static Defined *edata1; 428*0b57cec5SDimitry Andric static Defined *edata2; 429*0b57cec5SDimitry Andric 430*0b57cec5SDimitry Andric // end and _end 431*0b57cec5SDimitry Andric static Defined *end1; 432*0b57cec5SDimitry Andric static Defined *end2; 433*0b57cec5SDimitry Andric 434*0b57cec5SDimitry Andric // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to 435*0b57cec5SDimitry Andric // be at some offset from the base of the .got section, usually 0 or 436*0b57cec5SDimitry Andric // the end of the .got. 437*0b57cec5SDimitry Andric static Defined *globalOffsetTable; 438*0b57cec5SDimitry Andric 439*0b57cec5SDimitry Andric // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS. 440*0b57cec5SDimitry Andric static Defined *mipsGp; 441*0b57cec5SDimitry Andric static Defined *mipsGpDisp; 442*0b57cec5SDimitry Andric static Defined *mipsLocalGp; 443*0b57cec5SDimitry Andric 444*0b57cec5SDimitry Andric // __rel{,a}_iplt_{start,end} symbols. 445*0b57cec5SDimitry Andric static Defined *relaIpltStart; 446*0b57cec5SDimitry Andric static Defined *relaIpltEnd; 447*0b57cec5SDimitry Andric 448*0b57cec5SDimitry Andric // __global_pointer$ for RISC-V. 449*0b57cec5SDimitry Andric static Defined *riscvGlobalPointer; 450*0b57cec5SDimitry Andric 451*0b57cec5SDimitry Andric // _TLS_MODULE_BASE_ on targets that support TLSDESC. 452*0b57cec5SDimitry Andric static Defined *tlsModuleBase; 453*0b57cec5SDimitry Andric }; 454*0b57cec5SDimitry Andric 455*0b57cec5SDimitry Andric // A buffer class that is large enough to hold any Symbol-derived 456*0b57cec5SDimitry Andric // object. We allocate memory using this class and instantiate a symbol 457*0b57cec5SDimitry Andric // using the placement new. 458*0b57cec5SDimitry Andric union SymbolUnion { 459*0b57cec5SDimitry Andric alignas(Defined) char a[sizeof(Defined)]; 460*0b57cec5SDimitry Andric alignas(CommonSymbol) char b[sizeof(CommonSymbol)]; 461*0b57cec5SDimitry Andric alignas(Undefined) char c[sizeof(Undefined)]; 462*0b57cec5SDimitry Andric alignas(SharedSymbol) char d[sizeof(SharedSymbol)]; 463*0b57cec5SDimitry Andric alignas(LazyArchive) char e[sizeof(LazyArchive)]; 464*0b57cec5SDimitry Andric alignas(LazyObject) char f[sizeof(LazyObject)]; 465*0b57cec5SDimitry Andric }; 466*0b57cec5SDimitry Andric 467*0b57cec5SDimitry Andric // It is important to keep the size of SymbolUnion small for performance and 468*0b57cec5SDimitry Andric // memory usage reasons. 80 bytes is a soft limit based on the size of Defined 469*0b57cec5SDimitry Andric // on a 64-bit system. 470*0b57cec5SDimitry Andric static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large"); 471*0b57cec5SDimitry Andric 472*0b57cec5SDimitry Andric template <typename T> struct AssertSymbol { 473*0b57cec5SDimitry Andric static_assert(std::is_trivially_destructible<T>(), 474*0b57cec5SDimitry Andric "Symbol types must be trivially destructible"); 475*0b57cec5SDimitry Andric static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 476*0b57cec5SDimitry Andric static_assert(alignof(T) <= alignof(SymbolUnion), 477*0b57cec5SDimitry Andric "SymbolUnion not aligned enough"); 478*0b57cec5SDimitry Andric }; 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric static inline void assertSymbols() { 481*0b57cec5SDimitry Andric AssertSymbol<Defined>(); 482*0b57cec5SDimitry Andric AssertSymbol<CommonSymbol>(); 483*0b57cec5SDimitry Andric AssertSymbol<Undefined>(); 484*0b57cec5SDimitry Andric AssertSymbol<SharedSymbol>(); 485*0b57cec5SDimitry Andric AssertSymbol<LazyArchive>(); 486*0b57cec5SDimitry Andric AssertSymbol<LazyObject>(); 487*0b57cec5SDimitry Andric } 488*0b57cec5SDimitry Andric 489*0b57cec5SDimitry Andric void printTraceSymbol(const Symbol *sym); 490*0b57cec5SDimitry Andric 491*0b57cec5SDimitry Andric size_t Symbol::getSymbolSize() const { 492*0b57cec5SDimitry Andric switch (kind()) { 493*0b57cec5SDimitry Andric case CommonKind: 494*0b57cec5SDimitry Andric return sizeof(CommonSymbol); 495*0b57cec5SDimitry Andric case DefinedKind: 496*0b57cec5SDimitry Andric return sizeof(Defined); 497*0b57cec5SDimitry Andric case LazyArchiveKind: 498*0b57cec5SDimitry Andric return sizeof(LazyArchive); 499*0b57cec5SDimitry Andric case LazyObjectKind: 500*0b57cec5SDimitry Andric return sizeof(LazyObject); 501*0b57cec5SDimitry Andric case SharedKind: 502*0b57cec5SDimitry Andric return sizeof(SharedSymbol); 503*0b57cec5SDimitry Andric case UndefinedKind: 504*0b57cec5SDimitry Andric return sizeof(Undefined); 505*0b57cec5SDimitry Andric case PlaceholderKind: 506*0b57cec5SDimitry Andric return sizeof(Symbol); 507*0b57cec5SDimitry Andric } 508*0b57cec5SDimitry Andric llvm_unreachable("unknown symbol kind"); 509*0b57cec5SDimitry Andric } 510*0b57cec5SDimitry Andric 511*0b57cec5SDimitry Andric // replace() replaces "this" object with a given symbol by memcpy'ing 512*0b57cec5SDimitry Andric // it over to "this". This function is called as a result of name 513*0b57cec5SDimitry Andric // resolution, e.g. to replace an undefind symbol with a defined symbol. 514*0b57cec5SDimitry Andric void Symbol::replace(const Symbol &New) { 515*0b57cec5SDimitry Andric using llvm::ELF::STT_TLS; 516*0b57cec5SDimitry Andric 517*0b57cec5SDimitry Andric // Symbols representing thread-local variables must be referenced by 518*0b57cec5SDimitry Andric // TLS-aware relocations, and non-TLS symbols must be reference by 519*0b57cec5SDimitry Andric // non-TLS relocations, so there's a clear distinction between TLS 520*0b57cec5SDimitry Andric // and non-TLS symbols. It is an error if the same symbol is defined 521*0b57cec5SDimitry Andric // as a TLS symbol in one file and as a non-TLS symbol in other file. 522*0b57cec5SDimitry Andric if (symbolKind != PlaceholderKind && !isLazy() && !New.isLazy()) { 523*0b57cec5SDimitry Andric bool tlsMismatch = (type == STT_TLS && New.type != STT_TLS) || 524*0b57cec5SDimitry Andric (type != STT_TLS && New.type == STT_TLS); 525*0b57cec5SDimitry Andric if (tlsMismatch) 526*0b57cec5SDimitry Andric error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " + 527*0b57cec5SDimitry Andric toString(New.file) + "\n>>> defined in " + toString(file)); 528*0b57cec5SDimitry Andric } 529*0b57cec5SDimitry Andric 530*0b57cec5SDimitry Andric Symbol old = *this; 531*0b57cec5SDimitry Andric memcpy(this, &New, New.getSymbolSize()); 532*0b57cec5SDimitry Andric 533*0b57cec5SDimitry Andric versionId = old.versionId; 534*0b57cec5SDimitry Andric visibility = old.visibility; 535*0b57cec5SDimitry Andric isUsedInRegularObj = old.isUsedInRegularObj; 536*0b57cec5SDimitry Andric exportDynamic = old.exportDynamic; 537*0b57cec5SDimitry Andric canInline = old.canInline; 538*0b57cec5SDimitry Andric traced = old.traced; 539*0b57cec5SDimitry Andric isPreemptible = old.isPreemptible; 540*0b57cec5SDimitry Andric scriptDefined = old.scriptDefined; 541*0b57cec5SDimitry Andric partition = old.partition; 542*0b57cec5SDimitry Andric 543*0b57cec5SDimitry Andric // Symbol length is computed lazily. If we already know a symbol length, 544*0b57cec5SDimitry Andric // propagate it. 545*0b57cec5SDimitry Andric if (nameData == old.nameData && nameSize == 0 && old.nameSize != 0) 546*0b57cec5SDimitry Andric nameSize = old.nameSize; 547*0b57cec5SDimitry Andric 548*0b57cec5SDimitry Andric // Print out a log message if --trace-symbol was specified. 549*0b57cec5SDimitry Andric // This is for debugging. 550*0b57cec5SDimitry Andric if (traced) 551*0b57cec5SDimitry Andric printTraceSymbol(this); 552*0b57cec5SDimitry Andric } 553*0b57cec5SDimitry Andric 554*0b57cec5SDimitry Andric void maybeWarnUnorderableSymbol(const Symbol *sym); 555*0b57cec5SDimitry Andric } // namespace elf 556*0b57cec5SDimitry Andric } // namespace lld 557*0b57cec5SDimitry Andric 558*0b57cec5SDimitry Andric #endif 559