xref: /freebsd/contrib/llvm-project/lldb/source/Core/Mangled.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/RichManglingContext.h"
13 #include "lldb/Target/Language.h"
14 #include "lldb/Utility/ConstString.h"
15 #include "lldb/Utility/DataEncoder.h"
16 #include "lldb/Utility/LLDBLog.h"
17 #include "lldb/Utility/Log.h"
18 #include "lldb/Utility/RegularExpression.h"
19 #include "lldb/Utility/Stream.h"
20 #include "lldb/lldb-enumerations.h"
21 
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Demangle/Demangle.h"
25 #include "llvm/Support/Compiler.h"
26 
27 #include <mutex>
28 #include <string>
29 #include <string_view>
30 #include <utility>
31 
32 #include <cstdlib>
33 #include <cstring>
34 using namespace lldb_private;
35 
cstring_is_mangled(llvm::StringRef s)36 static inline bool cstring_is_mangled(llvm::StringRef s) {
37   return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
38 }
39 
40 #pragma mark Mangled
41 
GetManglingScheme(llvm::StringRef const name)42 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
43   if (name.empty())
44     return Mangled::eManglingSchemeNone;
45 
46   if (name.starts_with("?"))
47     return Mangled::eManglingSchemeMSVC;
48 
49   if (name.starts_with("_R"))
50     return Mangled::eManglingSchemeRustV0;
51 
52   if (name.starts_with("_D")) {
53     // A dlang mangled name begins with `_D`, followed by a numeric length. One
54     // known exception is the symbol `_Dmain`.
55     // See `SymbolName` and `LName` in
56     // https://dlang.org/spec/abi.html#name_mangling
57     llvm::StringRef buf = name.drop_front(2);
58     if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
59       return Mangled::eManglingSchemeD;
60   }
61 
62   if (name.starts_with("_Z"))
63     return Mangled::eManglingSchemeItanium;
64 
65   // ___Z is a clang extension of block invocations
66   if (name.starts_with("___Z"))
67     return Mangled::eManglingSchemeItanium;
68 
69   // Swift's older style of mangling used "_T" as a mangling prefix. This can
70   // lead to false positives with other symbols that just so happen to start
71   // with "_T". To minimize the chance of that happening, we only return true
72   // for select old-style swift mangled names. The known cases are ObjC classes
73   // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74   // Protocols are prefixed with "_TtP".
75   if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
76       name.starts_with("_TtP"))
77     return Mangled::eManglingSchemeSwift;
78 
79   // Swift 4.2 used "$S" and "_$S".
80   // Swift 5 and onward uses "$s" and "_$s".
81   // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82   if (name.starts_with("$S") || name.starts_with("_$S") ||
83       name.starts_with("$s") || name.starts_with("_$s") ||
84       name.starts_with("@__swiftmacro_"))
85     return Mangled::eManglingSchemeSwift;
86 
87   return Mangled::eManglingSchemeNone;
88 }
89 
Mangled(ConstString s)90 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
91   if (s)
92     SetValue(s);
93 }
94 
Mangled(llvm::StringRef name)95 Mangled::Mangled(llvm::StringRef name) {
96   if (!name.empty())
97     SetValue(ConstString(name));
98 }
99 
100 // Convert to bool operator. This allows code to check any Mangled objects
101 // to see if they contain anything valid using code such as:
102 //
103 //  Mangled mangled(...);
104 //  if (mangled)
105 //  { ...
operator bool() const106 Mangled::operator bool() const { return m_mangled || m_demangled; }
107 
108 // Clear the mangled and demangled values.
Clear()109 void Mangled::Clear() {
110   m_mangled.Clear();
111   m_demangled.Clear();
112 }
113 
114 // Compare the string values.
Compare(const Mangled & a,const Mangled & b)115 int Mangled::Compare(const Mangled &a, const Mangled &b) {
116   return ConstString::Compare(a.GetName(ePreferMangled),
117                               b.GetName(ePreferMangled));
118 }
119 
SetValue(ConstString name)120 void Mangled::SetValue(ConstString name) {
121   if (name) {
122     if (cstring_is_mangled(name.GetStringRef())) {
123       m_demangled.Clear();
124       m_mangled = name;
125     } else {
126       m_demangled = name;
127       m_mangled.Clear();
128     }
129   } else {
130     m_demangled.Clear();
131     m_mangled.Clear();
132   }
133 }
134 
135 // Local helpers for different demangling implementations.
GetMSVCDemangledStr(llvm::StringRef M)136 static char *GetMSVCDemangledStr(llvm::StringRef M) {
137   char *demangled_cstr = llvm::microsoftDemangle(
138       M, nullptr, nullptr,
139       llvm::MSDemangleFlags(
140           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
141           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
142 
143   if (Log *log = GetLog(LLDBLog::Demangle)) {
144     if (demangled_cstr && demangled_cstr[0])
145       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
146     else
147       LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
148   }
149 
150   return demangled_cstr;
151 }
152 
GetItaniumDemangledStr(const char * M)153 static char *GetItaniumDemangledStr(const char *M) {
154   char *demangled_cstr = nullptr;
155 
156   llvm::ItaniumPartialDemangler ipd;
157   bool err = ipd.partialDemangle(M);
158   if (!err) {
159     // Default buffer and size (will realloc in case it's too small).
160     size_t demangled_size = 80;
161     demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
162     demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
163 
164     assert(demangled_cstr &&
165            "finishDemangle must always succeed if partialDemangle did");
166     assert(demangled_cstr[demangled_size - 1] == '\0' &&
167            "Expected demangled_size to return length including trailing null");
168   }
169 
170   if (Log *log = GetLog(LLDBLog::Demangle)) {
171     if (demangled_cstr)
172       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
173     else
174       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
175   }
176 
177   return demangled_cstr;
178 }
179 
GetRustV0DemangledStr(llvm::StringRef M)180 static char *GetRustV0DemangledStr(llvm::StringRef M) {
181   char *demangled_cstr = llvm::rustDemangle(M);
182 
183   if (Log *log = GetLog(LLDBLog::Demangle)) {
184     if (demangled_cstr && demangled_cstr[0])
185       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
186     else
187       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
188                static_cast<std::string_view>(M));
189   }
190 
191   return demangled_cstr;
192 }
193 
GetDLangDemangledStr(llvm::StringRef M)194 static char *GetDLangDemangledStr(llvm::StringRef M) {
195   char *demangled_cstr = llvm::dlangDemangle(M);
196 
197   if (Log *log = GetLog(LLDBLog::Demangle)) {
198     if (demangled_cstr && demangled_cstr[0])
199       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
200     else
201       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
202                static_cast<std::string_view>(M));
203   }
204 
205   return demangled_cstr;
206 }
207 
208 // Explicit demangling for scheduled requests during batch processing. This
209 // makes use of ItaniumPartialDemangler's rich demangle info
GetRichManglingInfo(RichManglingContext & context,SkipMangledNameFn * skip_mangled_name)210 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
211                                   SkipMangledNameFn *skip_mangled_name) {
212   // Others are not meant to arrive here. ObjC names or C's main() for example
213   // have their names stored in m_demangled, while m_mangled is empty.
214   assert(m_mangled);
215 
216   // Check whether or not we are interested in this name at all.
217   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
218   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
219     return false;
220 
221   switch (scheme) {
222   case eManglingSchemeNone:
223     // The current mangled_name_filter would allow llvm_unreachable here.
224     return false;
225 
226   case eManglingSchemeItanium:
227     // We want the rich mangling info here, so we don't care whether or not
228     // there is a demangled string in the pool already.
229     return context.FromItaniumName(m_mangled);
230 
231   case eManglingSchemeMSVC: {
232     // We have no rich mangling for MSVC-mangled names yet, so first try to
233     // demangle it if necessary.
234     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
235       if (char *d = GetMSVCDemangledStr(m_mangled)) {
236         // Without the rich mangling info we have to demangle the full name.
237         // Copy it to string pool and connect the counterparts to accelerate
238         // later access in GetDemangledName().
239         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
240                                                     m_mangled);
241         ::free(d);
242       } else {
243         m_demangled.SetCString("");
244       }
245     }
246 
247     if (m_demangled.IsEmpty()) {
248       // Cannot demangle it, so don't try parsing.
249       return false;
250     } else {
251       // Demangled successfully, we can try and parse it with
252       // CPlusPlusLanguage::MethodName.
253       return context.FromCxxMethodName(m_demangled);
254     }
255   }
256 
257   case eManglingSchemeRustV0:
258   case eManglingSchemeD:
259   case eManglingSchemeSwift:
260     // Rich demangling scheme is not supported
261     return false;
262   }
263   llvm_unreachable("Fully covered switch above!");
264 }
265 
266 // Generate the demangled name on demand using this accessor. Code in this
267 // class will need to use this accessor if it wishes to decode the demangled
268 // name. The result is cached and will be kept until a new string value is
269 // supplied to this object, or until the end of the object's lifetime.
GetDemangledName() const270 ConstString Mangled::GetDemangledName() const {
271   // Check to make sure we have a valid mangled name and that we haven't
272   // already decoded our mangled name.
273   if (m_mangled && m_demangled.IsNull()) {
274     // Don't bother running anything that isn't mangled
275     const char *mangled_name = m_mangled.GetCString();
276     ManglingScheme mangling_scheme =
277         GetManglingScheme(m_mangled.GetStringRef());
278     if (mangling_scheme != eManglingSchemeNone &&
279         !m_mangled.GetMangledCounterpart(m_demangled)) {
280       // We didn't already mangle this name, demangle it and if all goes well
281       // add it to our map.
282       char *demangled_name = nullptr;
283       switch (mangling_scheme) {
284       case eManglingSchemeMSVC:
285         demangled_name = GetMSVCDemangledStr(mangled_name);
286         break;
287       case eManglingSchemeItanium: {
288         demangled_name = GetItaniumDemangledStr(mangled_name);
289         break;
290       }
291       case eManglingSchemeRustV0:
292         demangled_name = GetRustV0DemangledStr(m_mangled);
293         break;
294       case eManglingSchemeD:
295         demangled_name = GetDLangDemangledStr(m_mangled);
296         break;
297       case eManglingSchemeSwift:
298         // Demangling a swift name requires the swift compiler. This is
299         // explicitly unsupported on llvm.org.
300         break;
301       case eManglingSchemeNone:
302         llvm_unreachable("eManglingSchemeNone was handled already");
303       }
304       if (demangled_name) {
305         m_demangled.SetStringWithMangledCounterpart(
306             llvm::StringRef(demangled_name), m_mangled);
307         free(demangled_name);
308       }
309     }
310     if (m_demangled.IsNull()) {
311       // Set the demangled string to the empty string to indicate we tried to
312       // parse it once and failed.
313       m_demangled.SetCString("");
314     }
315   }
316 
317   return m_demangled;
318 }
319 
GetDisplayDemangledName() const320 ConstString Mangled::GetDisplayDemangledName() const {
321   if (Language *lang = Language::FindPlugin(GuessLanguage()))
322     return lang->GetDisplayDemangledName(*this);
323   return GetDemangledName();
324 }
325 
NameMatches(const RegularExpression & regex) const326 bool Mangled::NameMatches(const RegularExpression &regex) const {
327   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
328     return true;
329 
330   ConstString demangled = GetDemangledName();
331   return demangled && regex.Execute(demangled.GetStringRef());
332 }
333 
334 // Get the demangled name if there is one, else return the mangled name.
GetName(Mangled::NamePreference preference) const335 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
336   if (preference == ePreferMangled && m_mangled)
337     return m_mangled;
338 
339   // Call the accessor to make sure we get a demangled name in case it hasn't
340   // been demangled yet...
341   ConstString demangled = GetDemangledName();
342 
343   if (preference == ePreferDemangledWithoutArguments) {
344     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
345       return lang->GetDemangledFunctionNameWithoutArguments(*this);
346     }
347   }
348   if (preference == ePreferDemangled) {
349     if (demangled)
350       return demangled;
351     return m_mangled;
352   }
353   return demangled;
354 }
355 
356 // Dump a Mangled object to stream "s". We don't force our demangled name to be
357 // computed currently (we don't use the accessor).
Dump(Stream * s) const358 void Mangled::Dump(Stream *s) const {
359   if (m_mangled) {
360     *s << ", mangled = " << m_mangled;
361   }
362   if (m_demangled) {
363     const char *demangled = m_demangled.AsCString();
364     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
365   }
366 }
367 
368 // Dumps a debug version of this string with extra object and state information
369 // to stream "s".
DumpDebug(Stream * s) const370 void Mangled::DumpDebug(Stream *s) const {
371   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
372             static_cast<const void *>(this));
373   m_mangled.DumpDebug(s);
374   s->Printf(", demangled = ");
375   m_demangled.DumpDebug(s);
376 }
377 
378 // Return the size in byte that this object takes in memory. The size includes
379 // the size of the objects it owns, and not the strings that it references
380 // because they are shared strings.
MemorySize() const381 size_t Mangled::MemorySize() const {
382   return m_mangled.MemorySize() + m_demangled.MemorySize();
383 }
384 
385 // We "guess" the language because we can't determine a symbol's language from
386 // it's name.  For example, a Pascal symbol can be mangled using the C++
387 // Itanium scheme, and defined in a compilation unit within the same module as
388 // other C++ units.  In addition, different targets could have different ways
389 // of mangling names from a given language, likewise the compilation units
390 // within those targets.
GuessLanguage() const391 lldb::LanguageType Mangled::GuessLanguage() const {
392   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
393   // Ask each language plugin to check if the mangled name belongs to it.
394   Language::ForEach([this, &result](Language *l) {
395     if (l->SymbolNameFitsToLanguage(*this)) {
396       result = l->GetLanguageType();
397       return false;
398     }
399     return true;
400   });
401   return result;
402 }
403 
404 // Dump OBJ to the supplied stream S.
operator <<(Stream & s,const Mangled & obj)405 Stream &operator<<(Stream &s, const Mangled &obj) {
406   if (obj.GetMangledName())
407     s << "mangled = '" << obj.GetMangledName() << "'";
408 
409   ConstString demangled = obj.GetDemangledName();
410   if (demangled)
411     s << ", demangled = '" << demangled << '\'';
412   else
413     s << ", demangled = <error>";
414   return s;
415 }
416 
417 // When encoding Mangled objects we can get away with encoding as little
418 // information as is required. The enumeration below helps us to efficiently
419 // encode Mangled objects.
420 enum MangledEncoding {
421   /// If the Mangled object has neither a mangled name or demangled name we can
422   /// encode the object with one zero byte using the Empty enumeration.
423   Empty = 0u,
424   /// If the Mangled object has only a demangled name and no mangled named, we
425   /// can encode only the demangled name.
426   DemangledOnly = 1u,
427   /// If the mangle name can calculate the demangled name (it is the
428   /// mangled/demangled counterpart), then we only need to encode the mangled
429   /// name as the demangled name can be recomputed.
430   MangledOnly = 2u,
431   /// If we have a Mangled object with two different names that are not related
432   /// then we need to save both strings. This can happen if we have a name that
433   /// isn't a true mangled name, but we want to be able to lookup a symbol by
434   /// name and type in the symbol table. We do this for Objective C symbols like
435   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
436   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
437   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
438   /// would fail, but in these cases we want these unrelated names to be
439   /// preserved.
440   MangledAndDemangled = 3u
441 };
442 
Decode(const DataExtractor & data,lldb::offset_t * offset_ptr,const StringTableReader & strtab)443 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
444                      const StringTableReader &strtab) {
445   m_mangled.Clear();
446   m_demangled.Clear();
447   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
448   switch (encoding) {
449     case Empty:
450       return true;
451 
452     case DemangledOnly:
453       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
454       return true;
455 
456     case MangledOnly:
457       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
458       return true;
459 
460     case MangledAndDemangled:
461       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
462       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
463       return true;
464   }
465   return false;
466 }
467 /// The encoding format for the Mangled object is as follows:
468 ///
469 /// uint8_t encoding;
470 /// char str1[]; (only if DemangledOnly, MangledOnly)
471 /// char str2[]; (only if MangledAndDemangled)
472 ///
473 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
474 /// are only saved if we need them based on the encoding.
475 ///
476 /// Some mangled names have a mangled name that can be demangled by the built
477 /// in demanglers. These kinds of mangled objects know when the mangled and
478 /// demangled names are the counterparts for each other. This is done because
479 /// demangling is very expensive and avoiding demangling the same name twice
480 /// saves us a lot of compute time. For these kinds of names we only need to
481 /// save the mangled name and have the encoding set to "MangledOnly".
482 ///
483 /// If a mangled obejct has only a demangled name, then we save only that string
484 /// and have the encoding set to "DemangledOnly".
485 ///
486 /// Some mangled objects have both mangled and demangled names, but the
487 /// demangled name can not be computed from the mangled name. This is often used
488 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
489 /// names must be saved and the encoding is set to "MangledAndDemangled".
490 ///
491 /// For a Mangled object with no names, we only need to set the encoding to
492 /// "Empty" and not store any string values.
Encode(DataEncoder & file,ConstStringTable & strtab) const493 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
494   MangledEncoding encoding = Empty;
495   if (m_mangled) {
496     encoding = MangledOnly;
497     if (m_demangled) {
498       // We have both mangled and demangled names. If the demangled name is the
499       // counterpart of the mangled name, then we only need to save the mangled
500       // named. If they are different, we need to save both.
501       ConstString s;
502       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
503         encoding = MangledAndDemangled;
504     }
505   } else if (m_demangled) {
506     encoding = DemangledOnly;
507   }
508   file.AppendU8(encoding);
509   switch (encoding) {
510     case Empty:
511       break;
512     case DemangledOnly:
513       file.AppendU32(strtab.Add(m_demangled));
514       break;
515     case MangledOnly:
516       file.AppendU32(strtab.Add(m_mangled));
517       break;
518     case MangledAndDemangled:
519       file.AppendU32(strtab.Add(m_mangled));
520       file.AppendU32(strtab.Add(m_demangled));
521       break;
522   }
523 }
524