1*0b57cec5SDimitry Andric //===- SymbolTable.cpp ----------------------------------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // Symbol table is a bag of all known symbols. We put all symbols of 10*0b57cec5SDimitry Andric // all input files to the symbol table. The symbol table is basically 11*0b57cec5SDimitry Andric // a hash table with the logic to resolve symbol name conflicts using 12*0b57cec5SDimitry Andric // the symbol types. 13*0b57cec5SDimitry Andric // 14*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric #include "SymbolTable.h" 17*0b57cec5SDimitry Andric #include "Config.h" 18*0b57cec5SDimitry Andric #include "LinkerScript.h" 19*0b57cec5SDimitry Andric #include "Symbols.h" 20*0b57cec5SDimitry Andric #include "SyntheticSections.h" 21*0b57cec5SDimitry Andric #include "lld/Common/ErrorHandler.h" 22*0b57cec5SDimitry Andric #include "lld/Common/Memory.h" 23*0b57cec5SDimitry Andric #include "lld/Common/Strings.h" 24*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric using namespace llvm; 27*0b57cec5SDimitry Andric using namespace llvm::object; 28*0b57cec5SDimitry Andric using namespace llvm::ELF; 29*0b57cec5SDimitry Andric 30*0b57cec5SDimitry Andric using namespace lld; 31*0b57cec5SDimitry Andric using namespace lld::elf; 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric SymbolTable *elf::symtab; 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { 36*0b57cec5SDimitry Andric // Swap symbols as instructed by -wrap. 37*0b57cec5SDimitry Andric int &idx1 = symMap[CachedHashStringRef(sym->getName())]; 38*0b57cec5SDimitry Andric int &idx2 = symMap[CachedHashStringRef(real->getName())]; 39*0b57cec5SDimitry Andric int &idx3 = symMap[CachedHashStringRef(wrap->getName())]; 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric idx2 = idx1; 42*0b57cec5SDimitry Andric idx1 = idx3; 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric // Now renaming is complete. No one refers Real symbol. We could leave 45*0b57cec5SDimitry Andric // Real as-is, but if Real is written to the symbol table, that may 46*0b57cec5SDimitry Andric // contain irrelevant values. So, we copy all values from Sym to Real. 47*0b57cec5SDimitry Andric StringRef s = real->getName(); 48*0b57cec5SDimitry Andric memcpy(real, sym, sizeof(SymbolUnion)); 49*0b57cec5SDimitry Andric real->setName(s); 50*0b57cec5SDimitry Andric } 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric // Find an existing symbol or create a new one. 53*0b57cec5SDimitry Andric Symbol *SymbolTable::insert(StringRef name) { 54*0b57cec5SDimitry Andric // <name>@@<version> means the symbol is the default version. In that 55*0b57cec5SDimitry Andric // case <name>@@<version> will be used to resolve references to <name>. 56*0b57cec5SDimitry Andric // 57*0b57cec5SDimitry Andric // Since this is a hot path, the following string search code is 58*0b57cec5SDimitry Andric // optimized for speed. StringRef::find(char) is much faster than 59*0b57cec5SDimitry Andric // StringRef::find(StringRef). 60*0b57cec5SDimitry Andric size_t pos = name.find('@'); 61*0b57cec5SDimitry Andric if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@') 62*0b57cec5SDimitry Andric name = name.take_front(pos); 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 65*0b57cec5SDimitry Andric int &symIndex = p.first->second; 66*0b57cec5SDimitry Andric bool isNew = p.second; 67*0b57cec5SDimitry Andric 68*0b57cec5SDimitry Andric if (!isNew) 69*0b57cec5SDimitry Andric return symVector[symIndex]; 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 72*0b57cec5SDimitry Andric symVector.push_back(sym); 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric sym->setName(name); 75*0b57cec5SDimitry Andric sym->symbolKind = Symbol::PlaceholderKind; 76*0b57cec5SDimitry Andric sym->versionId = config->defaultSymbolVersion; 77*0b57cec5SDimitry Andric sym->visibility = STV_DEFAULT; 78*0b57cec5SDimitry Andric sym->isUsedInRegularObj = false; 79*0b57cec5SDimitry Andric sym->exportDynamic = false; 80*0b57cec5SDimitry Andric sym->canInline = true; 81*0b57cec5SDimitry Andric sym->scriptDefined = false; 82*0b57cec5SDimitry Andric sym->partition = 1; 83*0b57cec5SDimitry Andric return sym; 84*0b57cec5SDimitry Andric } 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric Symbol *SymbolTable::addSymbol(const Symbol &New) { 87*0b57cec5SDimitry Andric Symbol *sym = symtab->insert(New.getName()); 88*0b57cec5SDimitry Andric sym->resolve(New); 89*0b57cec5SDimitry Andric return sym; 90*0b57cec5SDimitry Andric } 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric Symbol *SymbolTable::find(StringRef name) { 93*0b57cec5SDimitry Andric auto it = symMap.find(CachedHashStringRef(name)); 94*0b57cec5SDimitry Andric if (it == symMap.end()) 95*0b57cec5SDimitry Andric return nullptr; 96*0b57cec5SDimitry Andric Symbol *sym = symVector[it->second]; 97*0b57cec5SDimitry Andric if (sym->isPlaceholder()) 98*0b57cec5SDimitry Andric return nullptr; 99*0b57cec5SDimitry Andric return sym; 100*0b57cec5SDimitry Andric } 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric // Initialize demangledSyms with a map from demangled symbols to symbol 103*0b57cec5SDimitry Andric // objects. Used to handle "extern C++" directive in version scripts. 104*0b57cec5SDimitry Andric // 105*0b57cec5SDimitry Andric // The map will contain all demangled symbols. That can be very large, 106*0b57cec5SDimitry Andric // and in LLD we generally want to avoid do anything for each symbol. 107*0b57cec5SDimitry Andric // Then, why are we doing this? Here's why. 108*0b57cec5SDimitry Andric // 109*0b57cec5SDimitry Andric // Users can use "extern C++ {}" directive to match against demangled 110*0b57cec5SDimitry Andric // C++ symbols. For example, you can write a pattern such as 111*0b57cec5SDimitry Andric // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this 112*0b57cec5SDimitry Andric // other than trying to match a pattern against all demangled symbols. 113*0b57cec5SDimitry Andric // So, if "extern C++" feature is used, we need to demangle all known 114*0b57cec5SDimitry Andric // symbols. 115*0b57cec5SDimitry Andric StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() { 116*0b57cec5SDimitry Andric if (!demangledSyms) { 117*0b57cec5SDimitry Andric demangledSyms.emplace(); 118*0b57cec5SDimitry Andric for (Symbol *sym : symVector) { 119*0b57cec5SDimitry Andric if (!sym->isDefined() && !sym->isCommon()) 120*0b57cec5SDimitry Andric continue; 121*0b57cec5SDimitry Andric if (Optional<std::string> s = demangleItanium(sym->getName())) 122*0b57cec5SDimitry Andric (*demangledSyms)[*s].push_back(sym); 123*0b57cec5SDimitry Andric else 124*0b57cec5SDimitry Andric (*demangledSyms)[sym->getName()].push_back(sym); 125*0b57cec5SDimitry Andric } 126*0b57cec5SDimitry Andric } 127*0b57cec5SDimitry Andric return *demangledSyms; 128*0b57cec5SDimitry Andric } 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) { 131*0b57cec5SDimitry Andric if (ver.isExternCpp) 132*0b57cec5SDimitry Andric return getDemangledSyms().lookup(ver.name); 133*0b57cec5SDimitry Andric if (Symbol *b = find(ver.name)) 134*0b57cec5SDimitry Andric if (b->isDefined() || b->isCommon()) 135*0b57cec5SDimitry Andric return {b}; 136*0b57cec5SDimitry Andric return {}; 137*0b57cec5SDimitry Andric } 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) { 140*0b57cec5SDimitry Andric std::vector<Symbol *> res; 141*0b57cec5SDimitry Andric StringMatcher m(ver.name); 142*0b57cec5SDimitry Andric 143*0b57cec5SDimitry Andric if (ver.isExternCpp) { 144*0b57cec5SDimitry Andric for (auto &p : getDemangledSyms()) 145*0b57cec5SDimitry Andric if (m.match(p.first())) 146*0b57cec5SDimitry Andric res.insert(res.end(), p.second.begin(), p.second.end()); 147*0b57cec5SDimitry Andric return res; 148*0b57cec5SDimitry Andric } 149*0b57cec5SDimitry Andric 150*0b57cec5SDimitry Andric for (Symbol *sym : symVector) 151*0b57cec5SDimitry Andric if ((sym->isDefined() || sym->isCommon()) && m.match(sym->getName())) 152*0b57cec5SDimitry Andric res.push_back(sym); 153*0b57cec5SDimitry Andric return res; 154*0b57cec5SDimitry Andric } 155*0b57cec5SDimitry Andric 156*0b57cec5SDimitry Andric // Handles -dynamic-list. 157*0b57cec5SDimitry Andric void SymbolTable::handleDynamicList() { 158*0b57cec5SDimitry Andric for (SymbolVersion &ver : config->dynamicList) { 159*0b57cec5SDimitry Andric std::vector<Symbol *> syms; 160*0b57cec5SDimitry Andric if (ver.hasWildcard) 161*0b57cec5SDimitry Andric syms = findAllByVersion(ver); 162*0b57cec5SDimitry Andric else 163*0b57cec5SDimitry Andric syms = findByVersion(ver); 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric for (Symbol *b : syms) { 166*0b57cec5SDimitry Andric if (!config->shared) 167*0b57cec5SDimitry Andric b->exportDynamic = true; 168*0b57cec5SDimitry Andric else if (b->includeInDynsym()) 169*0b57cec5SDimitry Andric b->isPreemptible = true; 170*0b57cec5SDimitry Andric } 171*0b57cec5SDimitry Andric } 172*0b57cec5SDimitry Andric } 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric // Set symbol versions to symbols. This function handles patterns 175*0b57cec5SDimitry Andric // containing no wildcard characters. 176*0b57cec5SDimitry Andric void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, 177*0b57cec5SDimitry Andric StringRef versionName) { 178*0b57cec5SDimitry Andric if (ver.hasWildcard) 179*0b57cec5SDimitry Andric return; 180*0b57cec5SDimitry Andric 181*0b57cec5SDimitry Andric // Get a list of symbols which we need to assign the version to. 182*0b57cec5SDimitry Andric std::vector<Symbol *> syms = findByVersion(ver); 183*0b57cec5SDimitry Andric if (syms.empty()) { 184*0b57cec5SDimitry Andric if (!config->undefinedVersion) 185*0b57cec5SDimitry Andric error("version script assignment of '" + versionName + "' to symbol '" + 186*0b57cec5SDimitry Andric ver.name + "' failed: symbol not defined"); 187*0b57cec5SDimitry Andric return; 188*0b57cec5SDimitry Andric } 189*0b57cec5SDimitry Andric 190*0b57cec5SDimitry Andric auto getName = [](uint16_t ver) -> std::string { 191*0b57cec5SDimitry Andric if (ver == VER_NDX_LOCAL) 192*0b57cec5SDimitry Andric return "VER_NDX_LOCAL"; 193*0b57cec5SDimitry Andric if (ver == VER_NDX_GLOBAL) 194*0b57cec5SDimitry Andric return "VER_NDX_GLOBAL"; 195*0b57cec5SDimitry Andric return ("version '" + config->versionDefinitions[ver - 2].name + "'").str(); 196*0b57cec5SDimitry Andric }; 197*0b57cec5SDimitry Andric 198*0b57cec5SDimitry Andric // Assign the version. 199*0b57cec5SDimitry Andric for (Symbol *sym : syms) { 200*0b57cec5SDimitry Andric // Skip symbols containing version info because symbol versions 201*0b57cec5SDimitry Andric // specified by symbol names take precedence over version scripts. 202*0b57cec5SDimitry Andric // See parseSymbolVersion(). 203*0b57cec5SDimitry Andric if (sym->getName().contains('@')) 204*0b57cec5SDimitry Andric continue; 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric if (sym->versionId == config->defaultSymbolVersion) 207*0b57cec5SDimitry Andric sym->versionId = versionId; 208*0b57cec5SDimitry Andric if (sym->versionId == versionId) 209*0b57cec5SDimitry Andric continue; 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric warn("attempt to reassign symbol '" + ver.name + "' of " + 212*0b57cec5SDimitry Andric getName(sym->versionId) + " to " + getName(versionId)); 213*0b57cec5SDimitry Andric } 214*0b57cec5SDimitry Andric } 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { 217*0b57cec5SDimitry Andric if (!ver.hasWildcard) 218*0b57cec5SDimitry Andric return; 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric // Exact matching takes precendence over fuzzy matching, 221*0b57cec5SDimitry Andric // so we set a version to a symbol only if no version has been assigned 222*0b57cec5SDimitry Andric // to the symbol. This behavior is compatible with GNU. 223*0b57cec5SDimitry Andric for (Symbol *b : findAllByVersion(ver)) 224*0b57cec5SDimitry Andric if (b->versionId == config->defaultSymbolVersion) 225*0b57cec5SDimitry Andric b->versionId = versionId; 226*0b57cec5SDimitry Andric } 227*0b57cec5SDimitry Andric 228*0b57cec5SDimitry Andric // This function processes version scripts by updating the versionId 229*0b57cec5SDimitry Andric // member of symbols. 230*0b57cec5SDimitry Andric // If there's only one anonymous version definition in a version 231*0b57cec5SDimitry Andric // script file, the script does not actually define any symbol version, 232*0b57cec5SDimitry Andric // but just specifies symbols visibilities. 233*0b57cec5SDimitry Andric void SymbolTable::scanVersionScript() { 234*0b57cec5SDimitry Andric // First, we assign versions to exact matching symbols, 235*0b57cec5SDimitry Andric // i.e. version definitions not containing any glob meta-characters. 236*0b57cec5SDimitry Andric for (SymbolVersion &ver : config->versionScriptGlobals) 237*0b57cec5SDimitry Andric assignExactVersion(ver, VER_NDX_GLOBAL, "global"); 238*0b57cec5SDimitry Andric for (SymbolVersion &ver : config->versionScriptLocals) 239*0b57cec5SDimitry Andric assignExactVersion(ver, VER_NDX_LOCAL, "local"); 240*0b57cec5SDimitry Andric for (VersionDefinition &v : config->versionDefinitions) 241*0b57cec5SDimitry Andric for (SymbolVersion &ver : v.globals) 242*0b57cec5SDimitry Andric assignExactVersion(ver, v.id, v.name); 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric // Next, we assign versions to fuzzy matching symbols, 245*0b57cec5SDimitry Andric // i.e. version definitions containing glob meta-characters. 246*0b57cec5SDimitry Andric for (SymbolVersion &ver : config->versionScriptGlobals) 247*0b57cec5SDimitry Andric assignWildcardVersion(ver, VER_NDX_GLOBAL); 248*0b57cec5SDimitry Andric for (SymbolVersion &ver : config->versionScriptLocals) 249*0b57cec5SDimitry Andric assignWildcardVersion(ver, VER_NDX_LOCAL); 250*0b57cec5SDimitry Andric 251*0b57cec5SDimitry Andric // Note that because the last match takes precedence over previous matches, 252*0b57cec5SDimitry Andric // we iterate over the definitions in the reverse order. 253*0b57cec5SDimitry Andric for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) 254*0b57cec5SDimitry Andric for (SymbolVersion &ver : v.globals) 255*0b57cec5SDimitry Andric assignWildcardVersion(ver, v.id); 256*0b57cec5SDimitry Andric 257*0b57cec5SDimitry Andric // Symbol themselves might know their versions because symbols 258*0b57cec5SDimitry Andric // can contain versions in the form of <name>@<version>. 259*0b57cec5SDimitry Andric // Let them parse and update their names to exclude version suffix. 260*0b57cec5SDimitry Andric for (Symbol *sym : symVector) 261*0b57cec5SDimitry Andric sym->parseSymbolVersion(); 262*0b57cec5SDimitry Andric 263*0b57cec5SDimitry Andric // isPreemptible is false at this point. To correctly compute the binding of a 264*0b57cec5SDimitry Andric // Defined (which is used by includeInDynsym()), we need to know if it is 265*0b57cec5SDimitry Andric // VER_NDX_LOCAL or not. If defaultSymbolVersion is VER_NDX_LOCAL, we should 266*0b57cec5SDimitry Andric // compute symbol versions before handling --dynamic-list. 267*0b57cec5SDimitry Andric handleDynamicList(); 268*0b57cec5SDimitry Andric } 269