xref: /freebsd/contrib/llvm-project/lld/ELF/SymbolTable.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- SymbolTable.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbol table is a bag of all known symbols. We put all symbols of
10 // all input files to the symbol table. The symbol table is basically
11 // a hash table with the logic to resolve symbol name conflicts using
12 // the symbol types.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "SymbolTable.h"
17 #include "Config.h"
18 #include "InputFiles.h"
19 #include "Symbols.h"
20 #include "lld/Common/Memory.h"
21 #include "lld/Common/Strings.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/Demangle/Demangle.h"
24 
25 using namespace llvm;
26 using namespace llvm::object;
27 using namespace llvm::ELF;
28 using namespace lld;
29 using namespace lld::elf;
30 
wrap(Symbol * sym,Symbol * real,Symbol * wrap)31 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
32   // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
33   int &idx1 = symMap[CachedHashStringRef(sym->getName())];
34   int &idx2 = symMap[CachedHashStringRef(real->getName())];
35   int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
36 
37   idx2 = idx1;
38   idx1 = idx3;
39 
40   // Propagate symbol usage information to the redirected symbols.
41   if (sym->isUsedInRegularObj)
42     wrap->isUsedInRegularObj = true;
43   if (real->isUsedInRegularObj)
44     sym->isUsedInRegularObj = true;
45   else if (!sym->isDefined())
46     // Now that all references to sym have been redirected to wrap, if there are
47     // no references to real (which has been redirected to sym), we only need to
48     // keep sym if it was defined, otherwise it's unused and can be dropped.
49     sym->isUsedInRegularObj = false;
50 
51   // Now renaming is complete, and no one refers to real. We drop real from
52   // .symtab and .dynsym. If real is undefined, it is important that we don't
53   // leave it in .dynsym, because otherwise it might lead to an undefined symbol
54   // error in a subsequent link. If real is defined, we could emit real as an
55   // alias for sym, but that could degrade the user experience of some tools
56   // that can print out only one symbol for each location: sym is a preferred
57   // name than real, but they might print out real instead.
58   memcpy(static_cast<void *>(real), sym, sizeof(SymbolUnion));
59   real->isUsedInRegularObj = false;
60 }
61 
62 // Find an existing symbol or create a new one.
insert(StringRef name)63 Symbol *SymbolTable::insert(StringRef name) {
64   // <name>@@<version> means the symbol is the default version. In that
65   // case <name>@@<version> will be used to resolve references to <name>.
66   //
67   // Since this is a hot path, the following string search code is
68   // optimized for speed. StringRef::find(char) is much faster than
69   // StringRef::find(StringRef).
70   StringRef stem = name;
71   size_t pos = name.find('@');
72   if (pos != StringRef::npos && pos + 1 < name.size() && name[pos + 1] == '@')
73     stem = name.take_front(pos);
74 
75   auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()});
76   if (!p.second) {
77     Symbol *sym = symVector[p.first->second];
78     if (stem.size() != name.size()) {
79       sym->setName(name);
80       sym->hasVersionSuffix = true;
81     }
82     return sym;
83   }
84 
85   Symbol *sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
86   symVector.push_back(sym);
87 
88   // *sym was not initialized by a constructor. Initialize all Symbol fields.
89   memset(static_cast<void *>(sym), 0, sizeof(Symbol));
90   sym->setName(name);
91   sym->partition = 1;
92   sym->versionId = VER_NDX_GLOBAL;
93   if (pos != StringRef::npos)
94     sym->hasVersionSuffix = true;
95   return sym;
96 }
97 
98 // This variant of addSymbol is used by BinaryFile::parse to check duplicate
99 // symbol errors.
addAndCheckDuplicate(Ctx & ctx,const Defined & newSym)100 Symbol *SymbolTable::addAndCheckDuplicate(Ctx &ctx, const Defined &newSym) {
101   Symbol *sym = insert(newSym.getName());
102   if (sym->isDefined())
103     sym->checkDuplicate(ctx, newSym);
104   sym->resolve(ctx, newSym);
105   sym->isUsedInRegularObj = true;
106   return sym;
107 }
108 
find(StringRef name)109 Symbol *SymbolTable::find(StringRef name) {
110   auto it = symMap.find(CachedHashStringRef(name));
111   if (it == symMap.end())
112     return nullptr;
113   return symVector[it->second];
114 }
115 
116 // A version script/dynamic list is only meaningful for a Defined symbol.
117 // A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
118 // A lazy symbol may be made Defined if an LTO libcall extracts it.
canBeVersioned(const Symbol & sym)119 static bool canBeVersioned(const Symbol &sym) {
120   return sym.isDefined() || sym.isCommon() || sym.isLazy();
121 }
122 
123 // Initialize demangledSyms with a map from demangled symbols to symbol
124 // objects. Used to handle "extern C++" directive in version scripts.
125 //
126 // The map will contain all demangled symbols. That can be very large,
127 // and in LLD we generally want to avoid do anything for each symbol.
128 // Then, why are we doing this? Here's why.
129 //
130 // Users can use "extern C++ {}" directive to match against demangled
131 // C++ symbols. For example, you can write a pattern such as
132 // "llvm::*::foo(int, ?)". Obviously, there's no way to handle this
133 // other than trying to match a pattern against all demangled symbols.
134 // So, if "extern C++" feature is used, we need to demangle all known
135 // symbols.
getDemangledSyms()136 StringMap<SmallVector<Symbol *, 0>> &SymbolTable::getDemangledSyms() {
137   if (!demangledSyms) {
138     demangledSyms.emplace();
139     std::string demangled;
140     for (Symbol *sym : symVector)
141       if (canBeVersioned(*sym)) {
142         StringRef name = sym->getName();
143         size_t pos = name.find('@');
144         std::string substr;
145         if (pos == std::string::npos)
146           demangled = demangle(name);
147         else if (pos + 1 == name.size() || name[pos + 1] == '@') {
148           substr = name.substr(0, pos);
149           demangled = demangle(substr);
150         } else {
151           substr = name.substr(0, pos);
152           demangled = (demangle(substr) + name.substr(pos)).str();
153         }
154         (*demangledSyms)[demangled].push_back(sym);
155       }
156   }
157   return *demangledSyms;
158 }
159 
findByVersion(SymbolVersion ver)160 SmallVector<Symbol *, 0> SymbolTable::findByVersion(SymbolVersion ver) {
161   if (ver.isExternCpp)
162     return getDemangledSyms().lookup(ver.name);
163   if (Symbol *sym = find(ver.name))
164     if (canBeVersioned(*sym))
165       return {sym};
166   return {};
167 }
168 
findAllByVersion(SymbolVersion ver,bool includeNonDefault)169 SmallVector<Symbol *, 0> SymbolTable::findAllByVersion(SymbolVersion ver,
170                                                        bool includeNonDefault) {
171   SmallVector<Symbol *, 0> res;
172   SingleStringMatcher m(ver.name);
173   auto check = [&](const Symbol &sym) -> bool {
174     if (!includeNonDefault)
175       return !sym.hasVersionSuffix;
176     StringRef name = sym.getName();
177     size_t pos = name.find('@');
178     return !(pos + 1 < name.size() && name[pos + 1] == '@');
179   };
180 
181   if (ver.isExternCpp) {
182     for (auto &p : getDemangledSyms())
183       if (m.match(p.first()))
184         for (Symbol *sym : p.second)
185           if (check(*sym))
186             res.push_back(sym);
187     return res;
188   }
189 
190   for (Symbol *sym : symVector)
191     if (canBeVersioned(*sym) && check(*sym) && m.match(sym->getName()))
192       res.push_back(sym);
193   return res;
194 }
195 
handleDynamicList()196 void SymbolTable::handleDynamicList() {
197   SmallVector<Symbol *, 0> syms;
198   for (SymbolVersion &ver : ctx.arg.dynamicList) {
199     if (ver.hasWildcard)
200       syms = findAllByVersion(ver, /*includeNonDefault=*/true);
201     else
202       syms = findByVersion(ver);
203 
204     for (Symbol *sym : syms)
205       sym->isExported = sym->inDynamicList = true;
206   }
207 }
208 
209 // Set symbol versions to symbols. This function handles patterns containing no
210 // wildcard characters. Return false if no symbol definition matches ver.
assignExactVersion(SymbolVersion ver,uint16_t versionId,StringRef versionName,bool includeNonDefault)211 bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
212                                      StringRef versionName,
213                                      bool includeNonDefault) {
214   // Get a list of symbols which we need to assign the version to.
215   SmallVector<Symbol *, 0> syms = findByVersion(ver);
216 
217   auto getName = [&ctx = ctx](uint16_t ver) -> std::string {
218     if (ver == VER_NDX_LOCAL)
219       return "VER_NDX_LOCAL";
220     if (ver == VER_NDX_GLOBAL)
221       return "VER_NDX_GLOBAL";
222     return ("version '" + ctx.arg.versionDefinitions[ver].name + "'").str();
223   };
224 
225   // Assign the version.
226   for (Symbol *sym : syms) {
227     // For a non-local versionId, skip symbols containing version info because
228     // symbol versions specified by symbol names take precedence over version
229     // scripts. See parseSymbolVersion(ctx).
230     if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
231         sym->getName().contains('@'))
232       continue;
233 
234     // If the version has not been assigned, assign versionId to the symbol.
235     if (!sym->versionScriptAssigned) {
236       sym->versionScriptAssigned = true;
237       sym->versionId = versionId;
238     }
239     if (sym->versionId == versionId)
240       continue;
241 
242     Warn(ctx) << "attempt to reassign symbol '" << ver.name << "' of "
243               << getName(sym->versionId) << " to " << getName(versionId);
244   }
245   return !syms.empty();
246 }
247 
assignWildcardVersion(SymbolVersion ver,uint16_t versionId,bool includeNonDefault)248 void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
249                                         bool includeNonDefault) {
250   // Exact matching takes precedence over fuzzy matching,
251   // so we set a version to a symbol only if no version has been assigned
252   // to the symbol. This behavior is compatible with GNU.
253   for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
254     if (!sym->versionScriptAssigned) {
255       sym->versionScriptAssigned = true;
256       sym->versionId = versionId;
257     }
258 }
259 
260 // This function processes version scripts by updating the versionId
261 // member of symbols.
262 // If there's only one anonymous version definition in a version
263 // script file, the script does not actually define any symbol version,
264 // but just specifies symbols visibilities.
scanVersionScript()265 void SymbolTable::scanVersionScript() {
266   SmallString<128> buf;
267   // First, we assign versions to exact matching symbols,
268   // i.e. version definitions not containing any glob meta-characters.
269   for (VersionDefinition &v : ctx.arg.versionDefinitions) {
270     auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
271       bool found =
272           assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
273       buf.clear();
274       found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
275                                    pat.isExternCpp, /*hasWildCard=*/false},
276                                   id, ver, /*includeNonDefault=*/true);
277       if (!found && !ctx.arg.undefinedVersion)
278         Err(ctx) << "version script assignment of '" << ver << "' to symbol '"
279                  << pat.name << "' failed: symbol not defined";
280     };
281     for (SymbolVersion &pat : v.nonLocalPatterns)
282       if (!pat.hasWildcard)
283         assignExact(pat, v.id, v.name);
284     for (SymbolVersion pat : v.localPatterns)
285       if (!pat.hasWildcard)
286         assignExact(pat, VER_NDX_LOCAL, "local");
287   }
288 
289   // Next, assign versions to wildcards that are not "*". Note that because the
290   // last match takes precedence over previous matches, we iterate over the
291   // definitions in the reverse order.
292   auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
293     assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
294     buf.clear();
295     assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
296                            pat.isExternCpp, /*hasWildCard=*/true},
297                           id,
298                           /*includeNonDefault=*/true);
299   };
300   for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
301     for (SymbolVersion &pat : v.nonLocalPatterns)
302       if (pat.hasWildcard && pat.name != "*")
303         assignWildcard(pat, v.id, v.name);
304     for (SymbolVersion &pat : v.localPatterns)
305       if (pat.hasWildcard && pat.name != "*")
306         assignWildcard(pat, VER_NDX_LOCAL, v.name);
307   }
308 
309   // Then, assign versions to "*". In GNU linkers they have lower priority than
310   // other wildcards.
311   bool globalAsteriskFound = false;
312   bool localAsteriskFound = false;
313   bool asteriskReported = false;
314   auto assignAsterisk = [&](SymbolVersion &pat, VersionDefinition *ver,
315                             bool isLocal) {
316     // Avoid issuing a warning if both '--retain-symbol-file' and a version
317     // script with `global: *` are used.
318     //
319     // '--retain-symbol-file' adds a "*" pattern to
320     // 'versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns', see
321     // 'readConfigs()' in 'Driver.cpp'. Note that it is not '.localPatterns',
322     // and may seem counterintuitive, but still works as expected. Here we can
323     // exploit that and skip analyzing the pattern added for this option.
324     if (!asteriskReported && (isLocal || ver->id > VER_NDX_LOCAL)) {
325       if ((isLocal && globalAsteriskFound) ||
326           (!isLocal && localAsteriskFound)) {
327         Warn(ctx)
328             << "wildcard pattern '*' is used for both 'local' and 'global' "
329                "scopes in version script";
330         asteriskReported = true;
331       } else if (!isLocal && globalAsteriskFound) {
332         Warn(ctx) << "wildcard pattern '*' is used for multiple version "
333                      "definitions in "
334                      "version script";
335         asteriskReported = true;
336       } else {
337         localAsteriskFound = isLocal;
338         globalAsteriskFound = !isLocal;
339       }
340     }
341     assignWildcard(pat, isLocal ? (uint16_t)VER_NDX_LOCAL : ver->id, ver->name);
342   };
343   for (VersionDefinition &v : llvm::reverse(ctx.arg.versionDefinitions)) {
344     for (SymbolVersion &pat : v.nonLocalPatterns)
345       if (pat.hasWildcard && pat.name == "*")
346         assignAsterisk(pat, &v, false);
347     for (SymbolVersion &pat : v.localPatterns)
348       if (pat.hasWildcard && pat.name == "*")
349         assignAsterisk(pat, &v, true);
350   }
351 
352   // Handle --dynamic-list. If a specified symbol is also matched by local: in a
353   // version script, the version script takes precedence.
354   handleDynamicList();
355 }
356 
addUnusedUndefined(StringRef name,uint8_t binding)357 Symbol *SymbolTable::addUnusedUndefined(StringRef name, uint8_t binding) {
358   return addSymbol(Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
359 }
360