xref: /freebsd/contrib/llvm-project/lldb/source/Core/Mangled.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- Mangled.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Core/Mangled.h"
10 
11 #include "lldb/Core/DataFileCache.h"
12 #include "lldb/Core/DemangledNameInfo.h"
13 #include "lldb/Core/RichManglingContext.h"
14 #include "lldb/Target/Language.h"
15 #include "lldb/Utility/ConstString.h"
16 #include "lldb/Utility/DataEncoder.h"
17 #include "lldb/Utility/LLDBLog.h"
18 #include "lldb/Utility/Log.h"
19 #include "lldb/Utility/RegularExpression.h"
20 #include "lldb/Utility/Stream.h"
21 #include "lldb/lldb-enumerations.h"
22 
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Demangle/Demangle.h"
26 #include "llvm/Support/Compiler.h"
27 
28 #include <mutex>
29 #include <string>
30 #include <string_view>
31 #include <utility>
32 
33 #include <cstdlib>
34 #include <cstring>
35 using namespace lldb_private;
36 
37 #pragma mark Mangled
38 
IsMangledName(llvm::StringRef name)39 bool Mangled::IsMangledName(llvm::StringRef name) {
40   return Mangled::GetManglingScheme(name) != Mangled::eManglingSchemeNone;
41 }
42 
GetManglingScheme(llvm::StringRef const name)43 Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
44   if (name.empty())
45     return Mangled::eManglingSchemeNone;
46 
47   if (name.starts_with("?"))
48     return Mangled::eManglingSchemeMSVC;
49 
50   if (name.starts_with("_R"))
51     return Mangled::eManglingSchemeRustV0;
52 
53   if (name.starts_with("_D")) {
54     // A dlang mangled name begins with `_D`, followed by a numeric length. One
55     // known exception is the symbol `_Dmain`.
56     // See `SymbolName` and `LName` in
57     // https://dlang.org/spec/abi.html#name_mangling
58     llvm::StringRef buf = name.drop_front(2);
59     if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
60       return Mangled::eManglingSchemeD;
61   }
62 
63   if (name.starts_with("_Z"))
64     return Mangled::eManglingSchemeItanium;
65 
66   // ___Z is a clang extension of block invocations
67   if (name.starts_with("___Z"))
68     return Mangled::eManglingSchemeItanium;
69 
70   // Swift's older style of mangling used "_T" as a mangling prefix. This can
71   // lead to false positives with other symbols that just so happen to start
72   // with "_T". To minimize the chance of that happening, we only return true
73   // for select old-style swift mangled names. The known cases are ObjC classes
74   // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
75   // Protocols are prefixed with "_TtP".
76   if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
77       name.starts_with("_TtP"))
78     return Mangled::eManglingSchemeSwift;
79 
80   // Swift 4.2 used "$S" and "_$S".
81   // Swift 5 and onward uses "$s" and "_$s".
82   // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
83   // Embedded Swift introduced "$e" and  "_$e" as Swift mangling prefixes.
84   if (name.starts_with("$S") || name.starts_with("_$S") ||
85       name.starts_with("$s") || name.starts_with("_$s") ||
86       name.starts_with("$e") || name.starts_with("_$e") ||
87       name.starts_with("@__swiftmacro_"))
88     return Mangled::eManglingSchemeSwift;
89 
90   return Mangled::eManglingSchemeNone;
91 }
92 
Mangled(ConstString s)93 Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
94   if (s)
95     SetValue(s);
96 }
97 
Mangled(llvm::StringRef name)98 Mangled::Mangled(llvm::StringRef name) {
99   if (!name.empty())
100     SetValue(ConstString(name));
101 }
102 
103 // Convert to bool operator. This allows code to check any Mangled objects
104 // to see if they contain anything valid using code such as:
105 //
106 //  Mangled mangled(...);
107 //  if (mangled)
108 //  { ...
operator bool() const109 Mangled::operator bool() const { return m_mangled || m_demangled; }
110 
111 // Clear the mangled and demangled values.
Clear()112 void Mangled::Clear() {
113   m_mangled.Clear();
114   m_demangled.Clear();
115   m_demangled_info.reset();
116 }
117 
118 // Compare the string values.
Compare(const Mangled & a,const Mangled & b)119 int Mangled::Compare(const Mangled &a, const Mangled &b) {
120   return ConstString::Compare(a.GetName(ePreferMangled),
121                               b.GetName(ePreferMangled));
122 }
123 
SetValue(ConstString name)124 void Mangled::SetValue(ConstString name) {
125   if (name) {
126     if (IsMangledName(name.GetStringRef())) {
127       m_demangled.Clear();
128       m_mangled = name;
129       m_demangled_info.reset();
130     } else {
131       m_demangled = name;
132       m_mangled.Clear();
133       m_demangled_info.reset();
134     }
135   } else {
136     m_demangled.Clear();
137     m_mangled.Clear();
138     m_demangled_info.reset();
139   }
140 }
141 
142 // Local helpers for different demangling implementations.
GetMSVCDemangledStr(llvm::StringRef M)143 static char *GetMSVCDemangledStr(llvm::StringRef M) {
144   char *demangled_cstr = llvm::microsoftDemangle(
145       M, nullptr, nullptr,
146       llvm::MSDemangleFlags(
147           llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
148           llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
149 
150   if (Log *log = GetLog(LLDBLog::Demangle)) {
151     if (demangled_cstr && demangled_cstr[0])
152       LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
153     else
154       LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
155   }
156 
157   return demangled_cstr;
158 }
159 
160 static std::pair<char *, DemangledNameInfo>
GetItaniumDemangledStr(const char * M)161 GetItaniumDemangledStr(const char *M) {
162   char *demangled_cstr = nullptr;
163 
164   DemangledNameInfo info;
165   llvm::ItaniumPartialDemangler ipd;
166   bool err = ipd.partialDemangle(M);
167   if (!err) {
168     // Default buffer and size (OutputBuffer will realloc in case it's too
169     // small).
170     size_t demangled_size = 80;
171     demangled_cstr = static_cast<char *>(std::malloc(80));
172 
173     TrackingOutputBuffer OB(demangled_cstr, demangled_size);
174     demangled_cstr = ipd.finishDemangle(&OB);
175     OB.NameInfo.SuffixRange.first = OB.NameInfo.QualifiersRange.second;
176     OB.NameInfo.SuffixRange.second = std::string_view(OB).size();
177     info = std::move(OB.NameInfo);
178 
179     assert(demangled_cstr &&
180            "finishDemangle must always succeed if partialDemangle did");
181     assert(demangled_cstr[OB.getCurrentPosition() - 1] == '\0' &&
182            "Expected demangled_size to return length including trailing null");
183   }
184 
185   if (Log *log = GetLog(LLDBLog::Demangle)) {
186     if (demangled_cstr)
187       LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
188     else
189       LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
190 
191     if (!info.hasBasename())
192       LLDB_LOGF(log,
193                 "demangled itanium: %s -> error: failed to retrieve name info",
194                 M);
195   }
196 
197   return {demangled_cstr, std::move(info)};
198 }
199 
GetRustV0DemangledStr(llvm::StringRef M)200 static char *GetRustV0DemangledStr(llvm::StringRef M) {
201   char *demangled_cstr = llvm::rustDemangle(M);
202 
203   if (Log *log = GetLog(LLDBLog::Demangle)) {
204     if (demangled_cstr && demangled_cstr[0])
205       LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
206     else
207       LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
208                static_cast<std::string_view>(M));
209   }
210 
211   return demangled_cstr;
212 }
213 
GetDLangDemangledStr(llvm::StringRef M)214 static char *GetDLangDemangledStr(llvm::StringRef M) {
215   char *demangled_cstr = llvm::dlangDemangle(M);
216 
217   if (Log *log = GetLog(LLDBLog::Demangle)) {
218     if (demangled_cstr && demangled_cstr[0])
219       LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
220     else
221       LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
222                static_cast<std::string_view>(M));
223   }
224 
225   return demangled_cstr;
226 }
227 
228 // Explicit demangling for scheduled requests during batch processing. This
229 // makes use of ItaniumPartialDemangler's rich demangle info
GetRichManglingInfo(RichManglingContext & context,SkipMangledNameFn * skip_mangled_name)230 bool Mangled::GetRichManglingInfo(RichManglingContext &context,
231                                   SkipMangledNameFn *skip_mangled_name) {
232   // Others are not meant to arrive here. ObjC names or C's main() for example
233   // have their names stored in m_demangled, while m_mangled is empty.
234   assert(m_mangled);
235 
236   // Check whether or not we are interested in this name at all.
237   ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());
238   if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
239     return false;
240 
241   switch (scheme) {
242   case eManglingSchemeNone:
243     // The current mangled_name_filter would allow llvm_unreachable here.
244     return false;
245 
246   case eManglingSchemeItanium:
247     // We want the rich mangling info here, so we don't care whether or not
248     // there is a demangled string in the pool already.
249     return context.FromItaniumName(m_mangled);
250 
251   case eManglingSchemeMSVC: {
252     // We have no rich mangling for MSVC-mangled names yet, so first try to
253     // demangle it if necessary.
254     if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {
255       if (char *d = GetMSVCDemangledStr(m_mangled)) {
256         // Without the rich mangling info we have to demangle the full name.
257         // Copy it to string pool and connect the counterparts to accelerate
258         // later access in GetDemangledName().
259         m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),
260                                                     m_mangled);
261         ::free(d);
262       } else {
263         m_demangled.SetCString("");
264       }
265     }
266 
267     if (m_demangled.IsEmpty()) {
268       // Cannot demangle it, so don't try parsing.
269       return false;
270     } else {
271       // Demangled successfully, we can try and parse it with
272       // CPlusPlusLanguage::CxxMethodName.
273       return context.FromCxxMethodName(m_demangled);
274     }
275   }
276 
277   case eManglingSchemeRustV0:
278   case eManglingSchemeD:
279   case eManglingSchemeSwift:
280     // Rich demangling scheme is not supported
281     return false;
282   }
283   llvm_unreachable("Fully covered switch above!");
284 }
285 
GetDemangledName() const286 ConstString Mangled::GetDemangledName() const {
287   return GetDemangledNameImpl(/*force=*/false);
288 }
289 
GetDemangledInfo() const290 std::optional<DemangledNameInfo> const &Mangled::GetDemangledInfo() const {
291   if (!m_demangled_info)
292     GetDemangledNameImpl(/*force=*/true);
293 
294   return m_demangled_info;
295 }
296 
297 // Generate the demangled name on demand using this accessor. Code in this
298 // class will need to use this accessor if it wishes to decode the demangled
299 // name. The result is cached and will be kept until a new string value is
300 // supplied to this object, or until the end of the object's lifetime.
GetDemangledNameImpl(bool force) const301 ConstString Mangled::GetDemangledNameImpl(bool force) const {
302   if (!m_mangled)
303     return m_demangled;
304 
305   // Re-use previously demangled names.
306   if (!force && !m_demangled.IsNull())
307     return m_demangled;
308 
309   if (!force && m_mangled.GetMangledCounterpart(m_demangled) &&
310       !m_demangled.IsNull())
311     return m_demangled;
312 
313   // We didn't already mangle this name, demangle it and if all goes well
314   // add it to our map.
315   char *demangled_name = nullptr;
316   switch (GetManglingScheme(m_mangled.GetStringRef())) {
317   case eManglingSchemeMSVC:
318     demangled_name = GetMSVCDemangledStr(m_mangled);
319     break;
320   case eManglingSchemeItanium: {
321     std::pair<char *, DemangledNameInfo> demangled =
322         GetItaniumDemangledStr(m_mangled.GetCString());
323     demangled_name = demangled.first;
324     m_demangled_info.emplace(std::move(demangled.second));
325     break;
326   }
327   case eManglingSchemeRustV0:
328     demangled_name = GetRustV0DemangledStr(m_mangled);
329     break;
330   case eManglingSchemeD:
331     demangled_name = GetDLangDemangledStr(m_mangled);
332     break;
333   case eManglingSchemeSwift:
334     // Demangling a swift name requires the swift compiler. This is
335     // explicitly unsupported on llvm.org.
336     break;
337   case eManglingSchemeNone:
338     // Don't bother demangling anything that isn't mangled.
339     break;
340   }
341 
342   if (demangled_name) {
343     m_demangled.SetStringWithMangledCounterpart(demangled_name, m_mangled);
344     free(demangled_name);
345   }
346 
347   if (m_demangled.IsNull()) {
348     // Set the demangled string to the empty string to indicate we tried to
349     // parse it once and failed.
350     m_demangled.SetCString("");
351   }
352 
353   return m_demangled;
354 }
355 
GetDisplayDemangledName() const356 ConstString Mangled::GetDisplayDemangledName() const {
357   if (Language *lang = Language::FindPlugin(GuessLanguage()))
358     return lang->GetDisplayDemangledName(*this);
359   return GetDemangledName();
360 }
361 
NameMatches(const RegularExpression & regex) const362 bool Mangled::NameMatches(const RegularExpression &regex) const {
363   if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
364     return true;
365 
366   ConstString demangled = GetDemangledName();
367   return demangled && regex.Execute(demangled.GetStringRef());
368 }
369 
370 // Get the demangled name if there is one, else return the mangled name.
GetName(Mangled::NamePreference preference) const371 ConstString Mangled::GetName(Mangled::NamePreference preference) const {
372   if (preference == ePreferMangled && m_mangled)
373     return m_mangled;
374 
375   // Call the accessor to make sure we get a demangled name in case it hasn't
376   // been demangled yet...
377   ConstString demangled = GetDemangledName();
378 
379   if (preference == ePreferDemangledWithoutArguments) {
380     if (Language *lang = Language::FindPlugin(GuessLanguage())) {
381       return lang->GetDemangledFunctionNameWithoutArguments(*this);
382     }
383   }
384   if (preference == ePreferDemangled) {
385     if (demangled)
386       return demangled;
387     return m_mangled;
388   }
389   return demangled;
390 }
391 
392 // Dump a Mangled object to stream "s". We don't force our demangled name to be
393 // computed currently (we don't use the accessor).
Dump(Stream * s) const394 void Mangled::Dump(Stream *s) const {
395   if (m_mangled) {
396     *s << ", mangled = " << m_mangled;
397   }
398   if (m_demangled) {
399     const char *demangled = m_demangled.AsCString();
400     s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
401   }
402 }
403 
404 // Dumps a debug version of this string with extra object and state information
405 // to stream "s".
DumpDebug(Stream * s) const406 void Mangled::DumpDebug(Stream *s) const {
407   s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
408             static_cast<const void *>(this));
409   m_mangled.DumpDebug(s);
410   s->Printf(", demangled = ");
411   m_demangled.DumpDebug(s);
412 }
413 
414 // Return the size in byte that this object takes in memory. The size includes
415 // the size of the objects it owns, and not the strings that it references
416 // because they are shared strings.
MemorySize() const417 size_t Mangled::MemorySize() const {
418   return m_mangled.MemorySize() + m_demangled.MemorySize();
419 }
420 
421 // We "guess" the language because we can't determine a symbol's language from
422 // it's name.  For example, a Pascal symbol can be mangled using the C++
423 // Itanium scheme, and defined in a compilation unit within the same module as
424 // other C++ units.  In addition, different targets could have different ways
425 // of mangling names from a given language, likewise the compilation units
426 // within those targets.
GuessLanguage() const427 lldb::LanguageType Mangled::GuessLanguage() const {
428   lldb::LanguageType result = lldb::eLanguageTypeUnknown;
429   // Ask each language plugin to check if the mangled name belongs to it.
430   Language::ForEach([this, &result](Language *l) {
431     if (l->SymbolNameFitsToLanguage(*this)) {
432       result = l->GetLanguageType();
433       return false;
434     }
435     return true;
436   });
437   return result;
438 }
439 
440 // Dump OBJ to the supplied stream S.
operator <<(Stream & s,const Mangled & obj)441 Stream &operator<<(Stream &s, const Mangled &obj) {
442   if (obj.GetMangledName())
443     s << "mangled = '" << obj.GetMangledName() << "'";
444 
445   ConstString demangled = obj.GetDemangledName();
446   if (demangled)
447     s << ", demangled = '" << demangled << '\'';
448   else
449     s << ", demangled = <error>";
450   return s;
451 }
452 
453 // When encoding Mangled objects we can get away with encoding as little
454 // information as is required. The enumeration below helps us to efficiently
455 // encode Mangled objects.
456 enum MangledEncoding {
457   /// If the Mangled object has neither a mangled name or demangled name we can
458   /// encode the object with one zero byte using the Empty enumeration.
459   Empty = 0u,
460   /// If the Mangled object has only a demangled name and no mangled named, we
461   /// can encode only the demangled name.
462   DemangledOnly = 1u,
463   /// If the mangle name can calculate the demangled name (it is the
464   /// mangled/demangled counterpart), then we only need to encode the mangled
465   /// name as the demangled name can be recomputed.
466   MangledOnly = 2u,
467   /// If we have a Mangled object with two different names that are not related
468   /// then we need to save both strings. This can happen if we have a name that
469   /// isn't a true mangled name, but we want to be able to lookup a symbol by
470   /// name and type in the symbol table. We do this for Objective C symbols like
471   /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
472   /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
473   /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
474   /// would fail, but in these cases we want these unrelated names to be
475   /// preserved.
476   MangledAndDemangled = 3u
477 };
478 
Decode(const DataExtractor & data,lldb::offset_t * offset_ptr,const StringTableReader & strtab)479 bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
480                      const StringTableReader &strtab) {
481   m_mangled.Clear();
482   m_demangled.Clear();
483   m_demangled_info.reset();
484   MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
485   switch (encoding) {
486     case Empty:
487       return true;
488 
489     case DemangledOnly:
490       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
491       return true;
492 
493     case MangledOnly:
494       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
495       return true;
496 
497     case MangledAndDemangled:
498       m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
499       m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
500       return true;
501   }
502   return false;
503 }
504 /// The encoding format for the Mangled object is as follows:
505 ///
506 /// uint8_t encoding;
507 /// char str1[]; (only if DemangledOnly, MangledOnly)
508 /// char str2[]; (only if MangledAndDemangled)
509 ///
510 /// The strings are stored as NULL terminated UTF8 strings and str1 and str2
511 /// are only saved if we need them based on the encoding.
512 ///
513 /// Some mangled names have a mangled name that can be demangled by the built
514 /// in demanglers. These kinds of mangled objects know when the mangled and
515 /// demangled names are the counterparts for each other. This is done because
516 /// demangling is very expensive and avoiding demangling the same name twice
517 /// saves us a lot of compute time. For these kinds of names we only need to
518 /// save the mangled name and have the encoding set to "MangledOnly".
519 ///
520 /// If a mangled obejct has only a demangled name, then we save only that string
521 /// and have the encoding set to "DemangledOnly".
522 ///
523 /// Some mangled objects have both mangled and demangled names, but the
524 /// demangled name can not be computed from the mangled name. This is often used
525 /// for runtime named, like Objective C runtime V2 and V3 names. Both these
526 /// names must be saved and the encoding is set to "MangledAndDemangled".
527 ///
528 /// For a Mangled object with no names, we only need to set the encoding to
529 /// "Empty" and not store any string values.
Encode(DataEncoder & file,ConstStringTable & strtab) const530 void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
531   MangledEncoding encoding = Empty;
532   if (m_mangled) {
533     encoding = MangledOnly;
534     if (m_demangled) {
535       // We have both mangled and demangled names. If the demangled name is the
536       // counterpart of the mangled name, then we only need to save the mangled
537       // named. If they are different, we need to save both.
538       ConstString s;
539       if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))
540         encoding = MangledAndDemangled;
541     }
542   } else if (m_demangled) {
543     encoding = DemangledOnly;
544   }
545   file.AppendU8(encoding);
546   switch (encoding) {
547     case Empty:
548       break;
549     case DemangledOnly:
550       file.AppendU32(strtab.Add(m_demangled));
551       break;
552     case MangledOnly:
553       file.AppendU32(strtab.Add(m_mangled));
554       break;
555     case MangledAndDemangled:
556       file.AppendU32(strtab.Add(m_mangled));
557       file.AppendU32(strtab.Add(m_demangled));
558       break;
559   }
560 }
561