xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 //===--- DLangDemangle.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines a demangler for the D programming language as specified
11 /// in the ABI specification, available at:
12 /// https://dlang.org/spec/abi.html#name_mangling
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Demangle/Demangle.h"
17 #include "llvm/Demangle/StringView.h"
18 #include "llvm/Demangle/Utility.h"
19 
20 #include <cctype>
21 #include <cstring>
22 #include <limits>
23 
24 using namespace llvm;
25 using llvm::itanium_demangle::OutputBuffer;
26 using llvm::itanium_demangle::StringView;
27 
28 namespace {
29 
30 /// Demangle information structure.
31 struct Demangler {
32   /// Initialize the information structure we use to pass around information.
33   ///
34   /// \param Mangled String to demangle.
35   Demangler(const char *Mangled);
36 
37   /// Extract and demangle the mangled symbol and append it to the output
38   /// string.
39   ///
40   /// \param Demangled Output buffer to write the demangled name.
41   ///
42   /// \return The remaining string on success or nullptr on failure.
43   ///
44   /// \see https://dlang.org/spec/abi.html#name_mangling .
45   /// \see https://dlang.org/spec/abi.html#MangledName .
46   const char *parseMangle(OutputBuffer *Demangled);
47 
48 private:
49   /// Extract and demangle a given mangled symbol and append it to the output
50   /// string.
51   ///
52   /// \param Demangled output buffer to write the demangled name.
53   /// \param Mangled mangled symbol to be demangled.
54   ///
55   /// \return The remaining string on success or nullptr on failure.
56   ///
57   /// \see https://dlang.org/spec/abi.html#name_mangling .
58   /// \see https://dlang.org/spec/abi.html#MangledName .
59   const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
60 
61   /// Extract the number from a given string.
62   ///
63   /// \param Mangled string to extract the number.
64   /// \param Ret assigned result value.
65   ///
66   /// \return The remaining string on success or nullptr on failure.
67   ///
68   /// \note A result larger than UINT_MAX is considered a failure.
69   ///
70   /// \see https://dlang.org/spec/abi.html#Number .
71   const char *decodeNumber(const char *Mangled, unsigned long &Ret);
72 
73   /// Extract the back reference position from a given string.
74   ///
75   /// \param Mangled string to extract the back reference position.
76   /// \param Ret assigned result value.
77   ///
78   /// \return the remaining string on success or nullptr on failure.
79   ///
80   /// \note Ret is always >= 0 on success, and unspecified on failure
81   ///
82   /// \see https://dlang.org/spec/abi.html#back_ref .
83   /// \see https://dlang.org/spec/abi.html#NumberBackRef .
84   const char *decodeBackrefPos(const char *Mangled, long &Ret);
85 
86   /// Extract the symbol pointed by the back reference form a given string.
87   ///
88   /// \param Mangled string to extract the back reference position.
89   /// \param Ret assigned result value.
90   ///
91   /// \return the remaining string on success or nullptr on failure.
92   ///
93   /// \see https://dlang.org/spec/abi.html#back_ref .
94   const char *decodeBackref(const char *Mangled, const char *&Ret);
95 
96   /// Extract and demangle backreferenced symbol from a given mangled symbol
97   /// and append it to the output string.
98   ///
99   /// \param Demangled output buffer to write the demangled name.
100   /// \param Mangled mangled symbol to be demangled.
101   ///
102   /// \return the remaining string on success or nullptr on failure.
103   ///
104   /// \see https://dlang.org/spec/abi.html#back_ref .
105   /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
106   const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
107 
108   /// Extract and demangle backreferenced type from a given mangled symbol
109   /// and append it to the output string.
110   ///
111   /// \param Mangled mangled symbol to be demangled.
112   ///
113   /// \return the remaining string on success or nullptr on failure.
114   ///
115   /// \see https://dlang.org/spec/abi.html#back_ref .
116   /// \see https://dlang.org/spec/abi.html#TypeBackRef .
117   const char *parseTypeBackref(const char *Mangled);
118 
119   /// Check whether it is the beginning of a symbol name.
120   ///
121   /// \param Mangled string to extract the symbol name.
122   ///
123   /// \return true on success, false otherwise.
124   ///
125   /// \see https://dlang.org/spec/abi.html#SymbolName .
126   bool isSymbolName(const char *Mangled);
127 
128   /// Extract and demangle an identifier from a given mangled symbol append it
129   /// to the output string.
130   ///
131   /// \param Demangled Output buffer to write the demangled name.
132   /// \param Mangled Mangled symbol to be demangled.
133   ///
134   /// \return The remaining string on success or nullptr on failure.
135   ///
136   /// \see https://dlang.org/spec/abi.html#SymbolName .
137   const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
138 
139   /// Extract and demangle the plain identifier from a given mangled symbol and
140   /// prepend/append it to the output string, with a special treatment for some
141   /// magic compiler generated symbols.
142   ///
143   /// \param Demangled Output buffer to write the demangled name.
144   /// \param Mangled Mangled symbol to be demangled.
145   /// \param Len Length of the mangled symbol name.
146   ///
147   /// \return The remaining string on success or nullptr on failure.
148   ///
149   /// \see https://dlang.org/spec/abi.html#LName .
150   const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
151                          unsigned long Len);
152 
153   /// Extract and demangle the qualified symbol from a given mangled symbol
154   /// append it to the output string.
155   ///
156   /// \param Demangled Output buffer to write the demangled name.
157   /// \param Mangled Mangled symbol to be demangled.
158   ///
159   /// \return The remaining string on success or nullptr on failure.
160   ///
161   /// \see https://dlang.org/spec/abi.html#QualifiedName .
162   const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
163 
164   /// Extract and demangle a type from a given mangled symbol append it to
165   /// the output string.
166   ///
167   /// \param Mangled mangled symbol to be demangled.
168   ///
169   /// \return the remaining string on success or nullptr on failure.
170   ///
171   /// \see https://dlang.org/spec/abi.html#Type .
172   const char *parseType(const char *Mangled);
173 
174   /// The string we are demangling.
175   const char *Str;
176   /// The index of the last back reference.
177   int LastBackref;
178 };
179 
180 } // namespace
181 
182 const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
183   // Return nullptr if trying to extract something that isn't a digit.
184   if (Mangled == nullptr || !std::isdigit(*Mangled))
185     return nullptr;
186 
187   unsigned long Val = 0;
188 
189   do {
190     unsigned long Digit = Mangled[0] - '0';
191 
192     // Check for overflow.
193     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
194       return nullptr;
195 
196     Val = Val * 10 + Digit;
197     ++Mangled;
198   } while (std::isdigit(*Mangled));
199 
200   if (*Mangled == '\0')
201     return nullptr;
202 
203   Ret = Val;
204   return Mangled;
205 }
206 
207 const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
208   // Return nullptr if trying to extract something that isn't a digit
209   if (Mangled == nullptr || !std::isalpha(*Mangled))
210     return nullptr;
211 
212   // Any identifier or non-basic type that has been emitted to the mangled
213   // symbol before will not be emitted again, but is referenced by a special
214   // sequence encoding the relative position of the original occurrence in the
215   // mangled symbol name.
216   // Numbers in back references are encoded with base 26 by upper case letters
217   // A-Z for higher digits but lower case letters a-z for the last digit.
218   //    NumberBackRef:
219   //        [a-z]
220   //        [A-Z] NumberBackRef
221   //        ^
222   unsigned long Val = 0;
223 
224   while (std::isalpha(*Mangled)) {
225     // Check for overflow
226     if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
227       break;
228 
229     Val *= 26;
230 
231     if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
232       Val += Mangled[0] - 'a';
233       if ((long)Val <= 0)
234         break;
235       Ret = Val;
236       return Mangled + 1;
237     }
238 
239     Val += Mangled[0] - 'A';
240     ++Mangled;
241   }
242 
243   return nullptr;
244 }
245 
246 const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
247   assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
248   Ret = nullptr;
249 
250   // Position of 'Q'
251   const char *Qpos = Mangled;
252   long RefPos;
253   ++Mangled;
254 
255   Mangled = decodeBackrefPos(Mangled, RefPos);
256   if (Mangled == nullptr)
257     return nullptr;
258 
259   if (RefPos > Qpos - Str)
260     return nullptr;
261 
262   // Set the position of the back reference.
263   Ret = Qpos - RefPos;
264 
265   return Mangled;
266 }
267 
268 const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
269                                           const char *Mangled) {
270   // An identifier back reference always points to a digit 0 to 9.
271   //    IdentifierBackRef:
272   //        Q NumberBackRef
273   //        ^
274   const char *Backref;
275   unsigned long Len;
276 
277   // Get position of the back reference
278   Mangled = decodeBackref(Mangled, Backref);
279 
280   // Must point to a simple identifier
281   Backref = decodeNumber(Backref, Len);
282   if (Backref == nullptr || strlen(Backref) < Len)
283     return nullptr;
284 
285   Backref = parseLName(Demangled, Backref, Len);
286   if (Backref == nullptr)
287     return nullptr;
288 
289   return Mangled;
290 }
291 
292 const char *Demangler::parseTypeBackref(const char *Mangled) {
293   // A type back reference always points to a letter.
294   //    TypeBackRef:
295   //        Q NumberBackRef
296   //        ^
297   const char *Backref;
298 
299   // If we appear to be moving backwards through the mangle string, then
300   // bail as this may be a recursive back reference.
301   if (Mangled - Str >= LastBackref)
302     return nullptr;
303 
304   int SaveRefPos = LastBackref;
305   LastBackref = Mangled - Str;
306 
307   // Get position of the back reference.
308   Mangled = decodeBackref(Mangled, Backref);
309 
310   // Can't decode back reference.
311   if (Backref == nullptr)
312     return nullptr;
313 
314   // TODO: Add support for function type back references.
315   Backref = parseType(Backref);
316 
317   LastBackref = SaveRefPos;
318 
319   if (Backref == nullptr)
320     return nullptr;
321 
322   return Mangled;
323 }
324 
325 bool Demangler::isSymbolName(const char *Mangled) {
326   long Ret;
327   const char *Qref = Mangled;
328 
329   if (std::isdigit(*Mangled))
330     return true;
331 
332   // TODO: Handle template instances.
333 
334   if (*Mangled != 'Q')
335     return false;
336 
337   Mangled = decodeBackrefPos(Mangled + 1, Ret);
338   if (Mangled == nullptr || Ret > Qref - Str)
339     return false;
340 
341   return std::isdigit(Qref[-Ret]);
342 }
343 
344 const char *Demangler::parseMangle(OutputBuffer *Demangled,
345                                    const char *Mangled) {
346   // A D mangled symbol is comprised of both scope and type information.
347   //    MangleName:
348   //        _D QualifiedName Type
349   //        _D QualifiedName Z
350   //        ^
351   // The caller should have guaranteed that the start pointer is at the
352   // above location.
353   // Note that type is never a function type, but only the return type of
354   // a function or the type of a variable.
355   Mangled += 2;
356 
357   Mangled = parseQualified(Demangled, Mangled);
358 
359   if (Mangled != nullptr) {
360     // Artificial symbols end with 'Z' and have no type.
361     if (*Mangled == 'Z')
362       ++Mangled;
363     else {
364       Mangled = parseType(Mangled);
365     }
366   }
367 
368   return Mangled;
369 }
370 
371 const char *Demangler::parseQualified(OutputBuffer *Demangled,
372                                       const char *Mangled) {
373   // Qualified names are identifiers separated by their encoded length.
374   // Nested functions also encode their argument types without specifying
375   // what they return.
376   //    QualifiedName:
377   //        SymbolFunctionName
378   //        SymbolFunctionName QualifiedName
379   //        ^
380   //    SymbolFunctionName:
381   //        SymbolName
382   //        SymbolName TypeFunctionNoReturn
383   //        SymbolName M TypeFunctionNoReturn
384   //        SymbolName M TypeModifiers TypeFunctionNoReturn
385   // The start pointer should be at the above location.
386 
387   // Whether it has more than one symbol
388   size_t NotFirst = false;
389   do {
390     // Skip over anonymous symbols.
391     if (*Mangled == '0') {
392       do
393         ++Mangled;
394       while (*Mangled == '0');
395 
396       continue;
397     }
398 
399     if (NotFirst)
400       *Demangled << '.';
401     NotFirst = true;
402 
403     Mangled = parseIdentifier(Demangled, Mangled);
404 
405   } while (Mangled && isSymbolName(Mangled));
406 
407   return Mangled;
408 }
409 
410 const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
411                                        const char *Mangled) {
412   unsigned long Len;
413 
414   if (Mangled == nullptr || *Mangled == '\0')
415     return nullptr;
416 
417   if (*Mangled == 'Q')
418     return parseSymbolBackref(Demangled, Mangled);
419 
420   // TODO: Parse lengthless template instances.
421 
422   const char *Endptr = decodeNumber(Mangled, Len);
423 
424   if (Endptr == nullptr || Len == 0)
425     return nullptr;
426 
427   if (strlen(Endptr) < Len)
428     return nullptr;
429 
430   Mangled = Endptr;
431 
432   // TODO: Parse template instances with a length prefix.
433 
434   // There can be multiple different declarations in the same function that
435   // have the same mangled name.  To make the mangled names unique, a fake
436   // parent in the form `__Sddd' is added to the symbol.
437   if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
438     const char *NumPtr = Mangled + 3;
439     while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
440       ++NumPtr;
441 
442     if (Mangled + Len == NumPtr) {
443       // Skip over the fake parent.
444       Mangled += Len;
445       return parseIdentifier(Demangled, Mangled);
446     }
447 
448     // Else demangle it as a plain identifier.
449   }
450 
451   return parseLName(Demangled, Mangled, Len);
452 }
453 
454 const char *Demangler::parseType(const char *Mangled) {
455   if (*Mangled == '\0')
456     return nullptr;
457 
458   switch (*Mangled) {
459   // TODO: Parse type qualifiers.
460   // TODO: Parse function types.
461   // TODO: Parse compound types.
462   // TODO: Parse delegate types.
463   // TODO: Parse tuple types.
464 
465   // Basic types.
466   case 'i':
467     ++Mangled;
468     // TODO: Add type name dumping
469     return Mangled;
470 
471     // TODO: Add support for the rest of the basic types.
472 
473   // Back referenced type.
474   case 'Q':
475     return parseTypeBackref(Mangled);
476 
477   default: // unhandled.
478     return nullptr;
479   }
480 }
481 
482 const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
483                                   unsigned long Len) {
484   switch (Len) {
485   case 6:
486     if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
487       // The static initializer for a given symbol.
488       Demangled->prepend("initializer for ");
489       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
490       Mangled += Len;
491       return Mangled;
492     }
493     if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
494       // The vtable symbol for a given class.
495       Demangled->prepend("vtable for ");
496       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
497       Mangled += Len;
498       return Mangled;
499     }
500     break;
501 
502   case 7:
503     if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
504       // The classinfo symbol for a given class.
505       Demangled->prepend("ClassInfo for ");
506       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
507       Mangled += Len;
508       return Mangled;
509     }
510     break;
511 
512   case 11:
513     if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
514       // The interface symbol for a given class.
515       Demangled->prepend("Interface for ");
516       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
517       Mangled += Len;
518       return Mangled;
519     }
520     break;
521 
522   case 12:
523     if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
524       // The ModuleInfo symbol for a given module.
525       Demangled->prepend("ModuleInfo for ");
526       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
527       Mangled += Len;
528       return Mangled;
529     }
530     break;
531   }
532 
533   *Demangled << StringView(Mangled, Len);
534   Mangled += Len;
535 
536   return Mangled;
537 }
538 
539 Demangler::Demangler(const char *Mangled)
540     : Str(Mangled), LastBackref(strlen(Mangled)) {}
541 
542 const char *Demangler::parseMangle(OutputBuffer *Demangled) {
543   return parseMangle(Demangled, this->Str);
544 }
545 
546 char *llvm::dlangDemangle(const char *MangledName) {
547   if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
548     return nullptr;
549 
550   OutputBuffer Demangled;
551   if (strcmp(MangledName, "_Dmain") == 0) {
552     Demangled << "D main";
553   } else {
554 
555     Demangler D = Demangler(MangledName);
556     MangledName = D.parseMangle(&Demangled);
557 
558     // Check that the entire symbol was successfully demangled.
559     if (MangledName == nullptr || *MangledName != '\0') {
560       std::free(Demangled.getBuffer());
561       return nullptr;
562     }
563   }
564 
565   // OutputBuffer's internal buffer is not null terminated and therefore we need
566   // to add it to comply with C null terminated strings.
567   if (Demangled.getCurrentPosition() > 0) {
568     Demangled << '\0';
569     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
570     return Demangled.getBuffer();
571   }
572 
573   std::free(Demangled.getBuffer());
574   return nullptr;
575 }
576