xref: /freebsd/contrib/llvm-project/lld/ELF/SymbolTable.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- SymbolTable.cpp ----------------------------------------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // Symbol table is a bag of all known symbols. We put all symbols of
10*0b57cec5SDimitry Andric // all input files to the symbol table. The symbol table is basically
11*0b57cec5SDimitry Andric // a hash table with the logic to resolve symbol name conflicts using
12*0b57cec5SDimitry Andric // the symbol types.
13*0b57cec5SDimitry Andric //
14*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
15*0b57cec5SDimitry Andric 
16*0b57cec5SDimitry Andric #include "SymbolTable.h"
17*0b57cec5SDimitry Andric #include "Config.h"
18*0b57cec5SDimitry Andric #include "LinkerScript.h"
19*0b57cec5SDimitry Andric #include "Symbols.h"
20*0b57cec5SDimitry Andric #include "SyntheticSections.h"
21*0b57cec5SDimitry Andric #include "lld/Common/ErrorHandler.h"
22*0b57cec5SDimitry Andric #include "lld/Common/Memory.h"
23*0b57cec5SDimitry Andric #include "lld/Common/Strings.h"
24*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric using namespace llvm;
27*0b57cec5SDimitry Andric using namespace llvm::object;
28*0b57cec5SDimitry Andric using namespace llvm::ELF;
29*0b57cec5SDimitry Andric 
30*0b57cec5SDimitry Andric using namespace lld;
31*0b57cec5SDimitry Andric using namespace lld::elf;
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric SymbolTable *elf::symtab;
34*0b57cec5SDimitry Andric 
35*0b57cec5SDimitry Andric void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
36*0b57cec5SDimitry Andric   // Swap symbols as instructed by -wrap.
37*0b57cec5SDimitry Andric   int &idx1 = symMap[CachedHashStringRef(sym->getName())];
38*0b57cec5SDimitry Andric   int &idx2 = symMap[CachedHashStringRef(real->getName())];
39*0b57cec5SDimitry Andric   int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
40*0b57cec5SDimitry Andric 
41*0b57cec5SDimitry Andric   idx2 = idx1;
42*0b57cec5SDimitry Andric   idx1 = idx3;
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric   // Now renaming is complete. No one refers Real symbol. We could leave
45*0b57cec5SDimitry Andric   // Real as-is, but if Real is written to the symbol table, that may
46*0b57cec5SDimitry Andric   // contain irrelevant values. So, we copy all values from Sym to Real.
47*0b57cec5SDimitry Andric   StringRef s = real->getName();
48*0b57cec5SDimitry Andric   memcpy(real, sym, sizeof(SymbolUnion));
49*0b57cec5SDimitry Andric   real->setName(s);
50*0b57cec5SDimitry Andric }
51*0b57cec5SDimitry Andric 
52*0b57cec5SDimitry Andric // Find an existing symbol or create a new one.
53*0b57cec5SDimitry Andric Symbol *SymbolTable::insert(StringRef name) {
54*0b57cec5SDimitry Andric   // <name>@@<version> means the symbol is the default version. In that
55*0b57cec5SDimitry Andric   // case <name>@@<version> will be used to resolve references to <name>.
56*0b57cec5SDimitry Andric   //
57*0b57cec5SDimitry Andric   // Since this is a hot path, the following string search code is
58*0b57cec5SDimitry Andric   // optimized for speed. StringRef::find(char) is much faster than
59*0b57cec5SDimitry Andric   // StringRef::find(StringRef).
60*0b57cec5SDimitry Andric   size_t pos = name.find('@');
61*0b57cec5SDimitry Andric   if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
62*0b57cec5SDimitry Andric     name = name.take_front(pos);
63*0b57cec5SDimitry Andric 
64*0b57cec5SDimitry Andric   auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()});
65*0b57cec5SDimitry Andric   int &symIndex = p.first->second;
66*0b57cec5SDimitry Andric   bool isNew = p.second;
67*0b57cec5SDimitry Andric 
68*0b57cec5SDimitry Andric   if (!isNew)
69*0b57cec5SDimitry Andric     return symVector[symIndex];
70*0b57cec5SDimitry Andric 
71*0b57cec5SDimitry Andric   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
72*0b57cec5SDimitry Andric   symVector.push_back(sym);
73*0b57cec5SDimitry Andric 
74*0b57cec5SDimitry Andric   sym->setName(name);
75*0b57cec5SDimitry Andric   sym->symbolKind = Symbol::PlaceholderKind;
76*0b57cec5SDimitry Andric   sym->versionId = config->defaultSymbolVersion;
77*0b57cec5SDimitry Andric   sym->visibility = STV_DEFAULT;
78*0b57cec5SDimitry Andric   sym->isUsedInRegularObj = false;
79*0b57cec5SDimitry Andric   sym->exportDynamic = false;
80*0b57cec5SDimitry Andric   sym->canInline = true;
81*0b57cec5SDimitry Andric   sym->scriptDefined = false;
82*0b57cec5SDimitry Andric   sym->partition = 1;
83*0b57cec5SDimitry Andric   return sym;
84*0b57cec5SDimitry Andric }
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric Symbol *SymbolTable::addSymbol(const Symbol &New) {
87*0b57cec5SDimitry Andric   Symbol *sym = symtab->insert(New.getName());
88*0b57cec5SDimitry Andric   sym->resolve(New);
89*0b57cec5SDimitry Andric   return sym;
90*0b57cec5SDimitry Andric }
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric Symbol *SymbolTable::find(StringRef name) {
93*0b57cec5SDimitry Andric   auto it = symMap.find(CachedHashStringRef(name));
94*0b57cec5SDimitry Andric   if (it == symMap.end())
95*0b57cec5SDimitry Andric     return nullptr;
96*0b57cec5SDimitry Andric   Symbol *sym = symVector[it->second];
97*0b57cec5SDimitry Andric   if (sym->isPlaceholder())
98*0b57cec5SDimitry Andric     return nullptr;
99*0b57cec5SDimitry Andric   return sym;
100*0b57cec5SDimitry Andric }
101*0b57cec5SDimitry Andric 
102*0b57cec5SDimitry Andric // Initialize demangledSyms with a map from demangled symbols to symbol
103*0b57cec5SDimitry Andric // objects. Used to handle "extern C++" directive in version scripts.
104*0b57cec5SDimitry Andric //
105*0b57cec5SDimitry Andric // The map will contain all demangled symbols. That can be very large,
106*0b57cec5SDimitry Andric // and in LLD we generally want to avoid do anything for each symbol.
107*0b57cec5SDimitry Andric // Then, why are we doing this? Here's why.
108*0b57cec5SDimitry Andric //
109*0b57cec5SDimitry Andric // Users can use "extern C++ {}" directive to match against demangled
110*0b57cec5SDimitry Andric // C++ symbols. For example, you can write a pattern such as
111*0b57cec5SDimitry Andric // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
112*0b57cec5SDimitry Andric // other than trying to match a pattern against all demangled symbols.
113*0b57cec5SDimitry Andric // So, if "extern C++" feature is used, we need to demangle all known
114*0b57cec5SDimitry Andric // symbols.
115*0b57cec5SDimitry Andric StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
116*0b57cec5SDimitry Andric   if (!demangledSyms) {
117*0b57cec5SDimitry Andric     demangledSyms.emplace();
118*0b57cec5SDimitry Andric     for (Symbol *sym : symVector) {
119*0b57cec5SDimitry Andric       if (!sym->isDefined() && !sym->isCommon())
120*0b57cec5SDimitry Andric         continue;
121*0b57cec5SDimitry Andric       if (Optional<std::string> s = demangleItanium(sym->getName()))
122*0b57cec5SDimitry Andric         (*demangledSyms)[*s].push_back(sym);
123*0b57cec5SDimitry Andric       else
124*0b57cec5SDimitry Andric         (*demangledSyms)[sym->getName()].push_back(sym);
125*0b57cec5SDimitry Andric     }
126*0b57cec5SDimitry Andric   }
127*0b57cec5SDimitry Andric   return *demangledSyms;
128*0b57cec5SDimitry Andric }
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) {
131*0b57cec5SDimitry Andric   if (ver.isExternCpp)
132*0b57cec5SDimitry Andric     return getDemangledSyms().lookup(ver.name);
133*0b57cec5SDimitry Andric   if (Symbol *b = find(ver.name))
134*0b57cec5SDimitry Andric     if (b->isDefined() || b->isCommon())
135*0b57cec5SDimitry Andric       return {b};
136*0b57cec5SDimitry Andric   return {};
137*0b57cec5SDimitry Andric }
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) {
140*0b57cec5SDimitry Andric   std::vector<Symbol *> res;
141*0b57cec5SDimitry Andric   StringMatcher m(ver.name);
142*0b57cec5SDimitry Andric 
143*0b57cec5SDimitry Andric   if (ver.isExternCpp) {
144*0b57cec5SDimitry Andric     for (auto &p : getDemangledSyms())
145*0b57cec5SDimitry Andric       if (m.match(p.first()))
146*0b57cec5SDimitry Andric         res.insert(res.end(), p.second.begin(), p.second.end());
147*0b57cec5SDimitry Andric     return res;
148*0b57cec5SDimitry Andric   }
149*0b57cec5SDimitry Andric 
150*0b57cec5SDimitry Andric   for (Symbol *sym : symVector)
151*0b57cec5SDimitry Andric     if ((sym->isDefined() || sym->isCommon()) && m.match(sym->getName()))
152*0b57cec5SDimitry Andric       res.push_back(sym);
153*0b57cec5SDimitry Andric   return res;
154*0b57cec5SDimitry Andric }
155*0b57cec5SDimitry Andric 
156*0b57cec5SDimitry Andric // Handles -dynamic-list.
157*0b57cec5SDimitry Andric void SymbolTable::handleDynamicList() {
158*0b57cec5SDimitry Andric   for (SymbolVersion &ver : config->dynamicList) {
159*0b57cec5SDimitry Andric     std::vector<Symbol *> syms;
160*0b57cec5SDimitry Andric     if (ver.hasWildcard)
161*0b57cec5SDimitry Andric       syms = findAllByVersion(ver);
162*0b57cec5SDimitry Andric     else
163*0b57cec5SDimitry Andric       syms = findByVersion(ver);
164*0b57cec5SDimitry Andric 
165*0b57cec5SDimitry Andric     for (Symbol *b : syms) {
166*0b57cec5SDimitry Andric       if (!config->shared)
167*0b57cec5SDimitry Andric         b->exportDynamic = true;
168*0b57cec5SDimitry Andric       else if (b->includeInDynsym())
169*0b57cec5SDimitry Andric         b->isPreemptible = true;
170*0b57cec5SDimitry Andric     }
171*0b57cec5SDimitry Andric   }
172*0b57cec5SDimitry Andric }
173*0b57cec5SDimitry Andric 
174*0b57cec5SDimitry Andric // Set symbol versions to symbols. This function handles patterns
175*0b57cec5SDimitry Andric // containing no wildcard characters.
176*0b57cec5SDimitry Andric void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
177*0b57cec5SDimitry Andric                                      StringRef versionName) {
178*0b57cec5SDimitry Andric   if (ver.hasWildcard)
179*0b57cec5SDimitry Andric     return;
180*0b57cec5SDimitry Andric 
181*0b57cec5SDimitry Andric   // Get a list of symbols which we need to assign the version to.
182*0b57cec5SDimitry Andric   std::vector<Symbol *> syms = findByVersion(ver);
183*0b57cec5SDimitry Andric   if (syms.empty()) {
184*0b57cec5SDimitry Andric     if (!config->undefinedVersion)
185*0b57cec5SDimitry Andric       error("version script assignment of '" + versionName + "' to symbol '" +
186*0b57cec5SDimitry Andric             ver.name + "' failed: symbol not defined");
187*0b57cec5SDimitry Andric     return;
188*0b57cec5SDimitry Andric   }
189*0b57cec5SDimitry Andric 
190*0b57cec5SDimitry Andric   auto getName = [](uint16_t ver) -> std::string {
191*0b57cec5SDimitry Andric     if (ver == VER_NDX_LOCAL)
192*0b57cec5SDimitry Andric       return "VER_NDX_LOCAL";
193*0b57cec5SDimitry Andric     if (ver == VER_NDX_GLOBAL)
194*0b57cec5SDimitry Andric       return "VER_NDX_GLOBAL";
195*0b57cec5SDimitry Andric     return ("version '" + config->versionDefinitions[ver - 2].name + "'").str();
196*0b57cec5SDimitry Andric   };
197*0b57cec5SDimitry Andric 
198*0b57cec5SDimitry Andric   // Assign the version.
199*0b57cec5SDimitry Andric   for (Symbol *sym : syms) {
200*0b57cec5SDimitry Andric     // Skip symbols containing version info because symbol versions
201*0b57cec5SDimitry Andric     // specified by symbol names take precedence over version scripts.
202*0b57cec5SDimitry Andric     // See parseSymbolVersion().
203*0b57cec5SDimitry Andric     if (sym->getName().contains('@'))
204*0b57cec5SDimitry Andric       continue;
205*0b57cec5SDimitry Andric 
206*0b57cec5SDimitry Andric     if (sym->versionId == config->defaultSymbolVersion)
207*0b57cec5SDimitry Andric       sym->versionId = versionId;
208*0b57cec5SDimitry Andric     if (sym->versionId == versionId)
209*0b57cec5SDimitry Andric       continue;
210*0b57cec5SDimitry Andric 
211*0b57cec5SDimitry Andric     warn("attempt to reassign symbol '" + ver.name + "' of " +
212*0b57cec5SDimitry Andric          getName(sym->versionId) + " to " + getName(versionId));
213*0b57cec5SDimitry Andric   }
214*0b57cec5SDimitry Andric }
215*0b57cec5SDimitry Andric 
216*0b57cec5SDimitry Andric void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
217*0b57cec5SDimitry Andric   if (!ver.hasWildcard)
218*0b57cec5SDimitry Andric     return;
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric   // Exact matching takes precendence over fuzzy matching,
221*0b57cec5SDimitry Andric   // so we set a version to a symbol only if no version has been assigned
222*0b57cec5SDimitry Andric   // to the symbol. This behavior is compatible with GNU.
223*0b57cec5SDimitry Andric   for (Symbol *b : findAllByVersion(ver))
224*0b57cec5SDimitry Andric     if (b->versionId == config->defaultSymbolVersion)
225*0b57cec5SDimitry Andric       b->versionId = versionId;
226*0b57cec5SDimitry Andric }
227*0b57cec5SDimitry Andric 
228*0b57cec5SDimitry Andric // This function processes version scripts by updating the versionId
229*0b57cec5SDimitry Andric // member of symbols.
230*0b57cec5SDimitry Andric // If there's only one anonymous version definition in a version
231*0b57cec5SDimitry Andric // script file, the script does not actually define any symbol version,
232*0b57cec5SDimitry Andric // but just specifies symbols visibilities.
233*0b57cec5SDimitry Andric void SymbolTable::scanVersionScript() {
234*0b57cec5SDimitry Andric   // First, we assign versions to exact matching symbols,
235*0b57cec5SDimitry Andric   // i.e. version definitions not containing any glob meta-characters.
236*0b57cec5SDimitry Andric   for (SymbolVersion &ver : config->versionScriptGlobals)
237*0b57cec5SDimitry Andric     assignExactVersion(ver, VER_NDX_GLOBAL, "global");
238*0b57cec5SDimitry Andric   for (SymbolVersion &ver : config->versionScriptLocals)
239*0b57cec5SDimitry Andric     assignExactVersion(ver, VER_NDX_LOCAL, "local");
240*0b57cec5SDimitry Andric   for (VersionDefinition &v : config->versionDefinitions)
241*0b57cec5SDimitry Andric     for (SymbolVersion &ver : v.globals)
242*0b57cec5SDimitry Andric       assignExactVersion(ver, v.id, v.name);
243*0b57cec5SDimitry Andric 
244*0b57cec5SDimitry Andric   // Next, we assign versions to fuzzy matching symbols,
245*0b57cec5SDimitry Andric   // i.e. version definitions containing glob meta-characters.
246*0b57cec5SDimitry Andric   for (SymbolVersion &ver : config->versionScriptGlobals)
247*0b57cec5SDimitry Andric     assignWildcardVersion(ver, VER_NDX_GLOBAL);
248*0b57cec5SDimitry Andric   for (SymbolVersion &ver : config->versionScriptLocals)
249*0b57cec5SDimitry Andric     assignWildcardVersion(ver, VER_NDX_LOCAL);
250*0b57cec5SDimitry Andric 
251*0b57cec5SDimitry Andric   // Note that because the last match takes precedence over previous matches,
252*0b57cec5SDimitry Andric   // we iterate over the definitions in the reverse order.
253*0b57cec5SDimitry Andric   for (VersionDefinition &v : llvm::reverse(config->versionDefinitions))
254*0b57cec5SDimitry Andric     for (SymbolVersion &ver : v.globals)
255*0b57cec5SDimitry Andric       assignWildcardVersion(ver, v.id);
256*0b57cec5SDimitry Andric 
257*0b57cec5SDimitry Andric   // Symbol themselves might know their versions because symbols
258*0b57cec5SDimitry Andric   // can contain versions in the form of <name>@<version>.
259*0b57cec5SDimitry Andric   // Let them parse and update their names to exclude version suffix.
260*0b57cec5SDimitry Andric   for (Symbol *sym : symVector)
261*0b57cec5SDimitry Andric     sym->parseSymbolVersion();
262*0b57cec5SDimitry Andric 
263*0b57cec5SDimitry Andric   // isPreemptible is false at this point. To correctly compute the binding of a
264*0b57cec5SDimitry Andric   // Defined (which is used by includeInDynsym()), we need to know if it is
265*0b57cec5SDimitry Andric   // VER_NDX_LOCAL or not. If defaultSymbolVersion is VER_NDX_LOCAL, we should
266*0b57cec5SDimitry Andric   // compute symbol versions before handling --dynamic-list.
267*0b57cec5SDimitry Andric   handleDynamicList();
268*0b57cec5SDimitry Andric }
269