xref: /freebsd/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1349cc55cSDimitry Andric //===--- DLangDemangle.cpp ------------------------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric ///
9349cc55cSDimitry Andric /// \file
10349cc55cSDimitry Andric /// This file defines a demangler for the D programming language as specified
11349cc55cSDimitry Andric /// in the ABI specification, available at:
12349cc55cSDimitry Andric /// https://dlang.org/spec/abi.html#name_mangling
13349cc55cSDimitry Andric ///
14349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
15349cc55cSDimitry Andric 
16349cc55cSDimitry Andric #include "llvm/Demangle/Demangle.h"
17*06c3fb27SDimitry Andric #include "llvm/Demangle/StringViewExtras.h"
18349cc55cSDimitry Andric #include "llvm/Demangle/Utility.h"
19349cc55cSDimitry Andric 
204824e7fdSDimitry Andric #include <cctype>
21349cc55cSDimitry Andric #include <cstring>
224824e7fdSDimitry Andric #include <limits>
23*06c3fb27SDimitry Andric #include <string_view>
24349cc55cSDimitry Andric 
25349cc55cSDimitry Andric using namespace llvm;
26349cc55cSDimitry Andric using llvm::itanium_demangle::OutputBuffer;
27*06c3fb27SDimitry Andric using llvm::itanium_demangle::starts_with;
284824e7fdSDimitry Andric 
294824e7fdSDimitry Andric namespace {
304824e7fdSDimitry Andric 
314824e7fdSDimitry Andric /// Demangle information structure.
324824e7fdSDimitry Andric struct Demangler {
334824e7fdSDimitry Andric   /// Initialize the information structure we use to pass around information.
344824e7fdSDimitry Andric   ///
354824e7fdSDimitry Andric   /// \param Mangled String to demangle.
36*06c3fb27SDimitry Andric   Demangler(std::string_view Mangled);
374824e7fdSDimitry Andric 
384824e7fdSDimitry Andric   /// Extract and demangle the mangled symbol and append it to the output
394824e7fdSDimitry Andric   /// string.
404824e7fdSDimitry Andric   ///
414824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
424824e7fdSDimitry Andric   ///
434824e7fdSDimitry Andric   /// \return The remaining string on success or nullptr on failure.
444824e7fdSDimitry Andric   ///
454824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#name_mangling .
464824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#MangledName .
474824e7fdSDimitry Andric   const char *parseMangle(OutputBuffer *Demangled);
484824e7fdSDimitry Andric 
494824e7fdSDimitry Andric private:
504824e7fdSDimitry Andric   /// Extract and demangle a given mangled symbol and append it to the output
514824e7fdSDimitry Andric   /// string.
524824e7fdSDimitry Andric   ///
534824e7fdSDimitry Andric   /// \param Demangled output buffer to write the demangled name.
544824e7fdSDimitry Andric   /// \param Mangled mangled symbol to be demangled.
554824e7fdSDimitry Andric   ///
564824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#name_mangling .
574824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#MangledName .
58*06c3fb27SDimitry Andric   void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
594824e7fdSDimitry Andric 
604824e7fdSDimitry Andric   /// Extract the number from a given string.
614824e7fdSDimitry Andric   ///
624824e7fdSDimitry Andric   /// \param Mangled string to extract the number.
634824e7fdSDimitry Andric   /// \param Ret assigned result value.
644824e7fdSDimitry Andric   ///
65*06c3fb27SDimitry Andric   /// \note Ret larger than UINT_MAX is considered a failure.
664824e7fdSDimitry Andric   ///
674824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#Number .
68*06c3fb27SDimitry Andric   void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
6904eeddc0SDimitry Andric 
7004eeddc0SDimitry Andric   /// Extract the back reference position from a given string.
7104eeddc0SDimitry Andric   ///
7204eeddc0SDimitry Andric   /// \param Mangled string to extract the back reference position.
7304eeddc0SDimitry Andric   /// \param Ret assigned result value.
7404eeddc0SDimitry Andric   ///
75*06c3fb27SDimitry Andric   /// \return true on success, false on error.
7604eeddc0SDimitry Andric   ///
7704eeddc0SDimitry Andric   /// \note Ret is always >= 0 on success, and unspecified on failure
7804eeddc0SDimitry Andric   ///
7904eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#back_ref .
8004eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#NumberBackRef .
81*06c3fb27SDimitry Andric   bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
8204eeddc0SDimitry Andric 
8304eeddc0SDimitry Andric   /// Extract the symbol pointed by the back reference form a given string.
8404eeddc0SDimitry Andric   ///
8504eeddc0SDimitry Andric   /// \param Mangled string to extract the back reference position.
8604eeddc0SDimitry Andric   /// \param Ret assigned result value.
8704eeddc0SDimitry Andric   ///
88*06c3fb27SDimitry Andric   /// \return true on success, false on error.
8904eeddc0SDimitry Andric   ///
9004eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#back_ref .
91*06c3fb27SDimitry Andric   bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
9204eeddc0SDimitry Andric 
9304eeddc0SDimitry Andric   /// Extract and demangle backreferenced symbol from a given mangled symbol
9404eeddc0SDimitry Andric   /// and append it to the output string.
9504eeddc0SDimitry Andric   ///
9604eeddc0SDimitry Andric   /// \param Demangled output buffer to write the demangled name.
9704eeddc0SDimitry Andric   /// \param Mangled mangled symbol to be demangled.
9804eeddc0SDimitry Andric   ///
9904eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#back_ref .
10004eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
101*06c3fb27SDimitry Andric   void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
10204eeddc0SDimitry Andric 
10304eeddc0SDimitry Andric   /// Extract and demangle backreferenced type from a given mangled symbol
10404eeddc0SDimitry Andric   /// and append it to the output string.
10504eeddc0SDimitry Andric   ///
10604eeddc0SDimitry Andric   /// \param Mangled mangled symbol to be demangled.
10704eeddc0SDimitry Andric   ///
10804eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#back_ref .
10904eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#TypeBackRef .
110*06c3fb27SDimitry Andric   void parseTypeBackref(std::string_view &Mangled);
1114824e7fdSDimitry Andric 
1124824e7fdSDimitry Andric   /// Check whether it is the beginning of a symbol name.
1134824e7fdSDimitry Andric   ///
1144824e7fdSDimitry Andric   /// \param Mangled string to extract the symbol name.
1154824e7fdSDimitry Andric   ///
1164824e7fdSDimitry Andric   /// \return true on success, false otherwise.
1174824e7fdSDimitry Andric   ///
1184824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#SymbolName .
119*06c3fb27SDimitry Andric   bool isSymbolName(std::string_view Mangled);
1204824e7fdSDimitry Andric 
1214824e7fdSDimitry Andric   /// Extract and demangle an identifier from a given mangled symbol append it
1224824e7fdSDimitry Andric   /// to the output string.
1234824e7fdSDimitry Andric   ///
1244824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
1254824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
1264824e7fdSDimitry Andric   ///
1274824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#SymbolName .
128*06c3fb27SDimitry Andric   void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
1294824e7fdSDimitry Andric 
1304824e7fdSDimitry Andric   /// Extract and demangle the plain identifier from a given mangled symbol and
1314824e7fdSDimitry Andric   /// prepend/append it to the output string, with a special treatment for some
1324824e7fdSDimitry Andric   /// magic compiler generated symbols.
1334824e7fdSDimitry Andric   ///
1344824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
1354824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
1364824e7fdSDimitry Andric   /// \param Len Length of the mangled symbol name.
1374824e7fdSDimitry Andric   ///
1384824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#LName .
139*06c3fb27SDimitry Andric   void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
1404824e7fdSDimitry Andric                   unsigned long Len);
1414824e7fdSDimitry Andric 
1424824e7fdSDimitry Andric   /// Extract and demangle the qualified symbol from a given mangled symbol
1434824e7fdSDimitry Andric   /// append it to the output string.
1444824e7fdSDimitry Andric   ///
1454824e7fdSDimitry Andric   /// \param Demangled Output buffer to write the demangled name.
1464824e7fdSDimitry Andric   /// \param Mangled Mangled symbol to be demangled.
1474824e7fdSDimitry Andric   ///
1484824e7fdSDimitry Andric   /// \see https://dlang.org/spec/abi.html#QualifiedName .
149*06c3fb27SDimitry Andric   void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
1504824e7fdSDimitry Andric 
15104eeddc0SDimitry Andric   /// Extract and demangle a type from a given mangled symbol append it to
15204eeddc0SDimitry Andric   /// the output string.
15304eeddc0SDimitry Andric   ///
15404eeddc0SDimitry Andric   /// \param Mangled mangled symbol to be demangled.
15504eeddc0SDimitry Andric   ///
156*06c3fb27SDimitry Andric   /// \return true on success, false on error.
15704eeddc0SDimitry Andric   ///
15804eeddc0SDimitry Andric   /// \see https://dlang.org/spec/abi.html#Type .
159*06c3fb27SDimitry Andric   bool parseType(std::string_view &Mangled);
16004eeddc0SDimitry Andric 
161*06c3fb27SDimitry Andric   /// An immutable view of the string we are demangling.
162*06c3fb27SDimitry Andric   const std::string_view Str;
16304eeddc0SDimitry Andric   /// The index of the last back reference.
16404eeddc0SDimitry Andric   int LastBackref;
1654824e7fdSDimitry Andric };
1664824e7fdSDimitry Andric 
1674824e7fdSDimitry Andric } // namespace
1684824e7fdSDimitry Andric 
decodeNumber(std::string_view & Mangled,unsigned long & Ret)169*06c3fb27SDimitry Andric void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
170*06c3fb27SDimitry Andric   // Clear Mangled if trying to extract something that isn't a digit.
171*06c3fb27SDimitry Andric   if (Mangled.empty()) {
172*06c3fb27SDimitry Andric     Mangled = {};
173*06c3fb27SDimitry Andric     return;
174*06c3fb27SDimitry Andric   }
175*06c3fb27SDimitry Andric 
176*06c3fb27SDimitry Andric   if (!std::isdigit(Mangled.front())) {
177*06c3fb27SDimitry Andric     Mangled = {};
178*06c3fb27SDimitry Andric     return;
179*06c3fb27SDimitry Andric   }
1804824e7fdSDimitry Andric 
1814824e7fdSDimitry Andric   unsigned long Val = 0;
1824824e7fdSDimitry Andric 
1834824e7fdSDimitry Andric   do {
1844824e7fdSDimitry Andric     unsigned long Digit = Mangled[0] - '0';
1854824e7fdSDimitry Andric 
1864824e7fdSDimitry Andric     // Check for overflow.
187*06c3fb27SDimitry Andric     if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
188*06c3fb27SDimitry Andric       Mangled = {};
189*06c3fb27SDimitry Andric       return;
19004eeddc0SDimitry Andric     }
19104eeddc0SDimitry Andric 
192*06c3fb27SDimitry Andric     Val = Val * 10 + Digit;
193*06c3fb27SDimitry Andric     Mangled.remove_prefix(1);
194*06c3fb27SDimitry Andric   } while (!Mangled.empty() && std::isdigit(Mangled.front()));
19504eeddc0SDimitry Andric 
196*06c3fb27SDimitry Andric   if (Mangled.empty()) {
197*06c3fb27SDimitry Andric     Mangled = {};
198*06c3fb27SDimitry Andric     return;
199*06c3fb27SDimitry Andric   }
200*06c3fb27SDimitry Andric 
201*06c3fb27SDimitry Andric   Ret = Val;
202*06c3fb27SDimitry Andric }
203*06c3fb27SDimitry Andric 
decodeBackrefPos(std::string_view & Mangled,long & Ret)204*06c3fb27SDimitry Andric bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
205*06c3fb27SDimitry Andric   // Return nullptr if trying to extract something that isn't a digit
206*06c3fb27SDimitry Andric   if (Mangled.empty()) {
207*06c3fb27SDimitry Andric     Mangled = {};
208*06c3fb27SDimitry Andric     return false;
209*06c3fb27SDimitry Andric   }
21004eeddc0SDimitry Andric   // Any identifier or non-basic type that has been emitted to the mangled
21104eeddc0SDimitry Andric   // symbol before will not be emitted again, but is referenced by a special
21204eeddc0SDimitry Andric   // sequence encoding the relative position of the original occurrence in the
21304eeddc0SDimitry Andric   // mangled symbol name.
21404eeddc0SDimitry Andric   // Numbers in back references are encoded with base 26 by upper case letters
21504eeddc0SDimitry Andric   // A-Z for higher digits but lower case letters a-z for the last digit.
21604eeddc0SDimitry Andric   //    NumberBackRef:
21704eeddc0SDimitry Andric   //        [a-z]
21804eeddc0SDimitry Andric   //        [A-Z] NumberBackRef
21904eeddc0SDimitry Andric   //        ^
22004eeddc0SDimitry Andric   unsigned long Val = 0;
22104eeddc0SDimitry Andric 
222*06c3fb27SDimitry Andric   while (!Mangled.empty() && std::isalpha(Mangled.front())) {
22304eeddc0SDimitry Andric     // Check for overflow
22404eeddc0SDimitry Andric     if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
22504eeddc0SDimitry Andric       break;
22604eeddc0SDimitry Andric 
22704eeddc0SDimitry Andric     Val *= 26;
22804eeddc0SDimitry Andric 
22904eeddc0SDimitry Andric     if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
23004eeddc0SDimitry Andric       Val += Mangled[0] - 'a';
23104eeddc0SDimitry Andric       if ((long)Val <= 0)
23204eeddc0SDimitry Andric         break;
23304eeddc0SDimitry Andric       Ret = Val;
234*06c3fb27SDimitry Andric       Mangled.remove_prefix(1);
235*06c3fb27SDimitry Andric       return true;
23604eeddc0SDimitry Andric     }
23704eeddc0SDimitry Andric 
23804eeddc0SDimitry Andric     Val += Mangled[0] - 'A';
239*06c3fb27SDimitry Andric     Mangled.remove_prefix(1);
24004eeddc0SDimitry Andric   }
24104eeddc0SDimitry Andric 
242*06c3fb27SDimitry Andric   Mangled = {};
243*06c3fb27SDimitry Andric   return false;
24404eeddc0SDimitry Andric }
24504eeddc0SDimitry Andric 
decodeBackref(std::string_view & Mangled,std::string_view & Ret)246*06c3fb27SDimitry Andric bool Demangler::decodeBackref(std::string_view &Mangled,
247*06c3fb27SDimitry Andric                               std::string_view &Ret) {
248*06c3fb27SDimitry Andric   assert(!Mangled.empty() && Mangled.front() == 'Q' &&
249*06c3fb27SDimitry Andric          "Invalid back reference!");
250*06c3fb27SDimitry Andric   Ret = {};
25104eeddc0SDimitry Andric 
25204eeddc0SDimitry Andric   // Position of 'Q'
253*06c3fb27SDimitry Andric   const char *Qpos = Mangled.data();
25404eeddc0SDimitry Andric   long RefPos;
255*06c3fb27SDimitry Andric   Mangled.remove_prefix(1);
25604eeddc0SDimitry Andric 
257*06c3fb27SDimitry Andric   if (!decodeBackrefPos(Mangled, RefPos)) {
258*06c3fb27SDimitry Andric     Mangled = {};
259*06c3fb27SDimitry Andric     return false;
260*06c3fb27SDimitry Andric   }
26104eeddc0SDimitry Andric 
262*06c3fb27SDimitry Andric   if (RefPos > Qpos - Str.data()) {
263*06c3fb27SDimitry Andric     Mangled = {};
264*06c3fb27SDimitry Andric     return false;
265*06c3fb27SDimitry Andric   }
26604eeddc0SDimitry Andric 
26704eeddc0SDimitry Andric   // Set the position of the back reference.
26804eeddc0SDimitry Andric   Ret = Qpos - RefPos;
26904eeddc0SDimitry Andric 
270*06c3fb27SDimitry Andric   return true;
27104eeddc0SDimitry Andric }
27204eeddc0SDimitry Andric 
parseSymbolBackref(OutputBuffer * Demangled,std::string_view & Mangled)273*06c3fb27SDimitry Andric void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
274*06c3fb27SDimitry Andric                                    std::string_view &Mangled) {
27504eeddc0SDimitry Andric   // An identifier back reference always points to a digit 0 to 9.
27604eeddc0SDimitry Andric   //    IdentifierBackRef:
27704eeddc0SDimitry Andric   //        Q NumberBackRef
27804eeddc0SDimitry Andric   //        ^
27904eeddc0SDimitry Andric   unsigned long Len;
28004eeddc0SDimitry Andric 
28104eeddc0SDimitry Andric   // Get position of the back reference
282*06c3fb27SDimitry Andric   std::string_view Backref;
283*06c3fb27SDimitry Andric   if (!decodeBackref(Mangled, Backref)) {
284*06c3fb27SDimitry Andric     Mangled = {};
285*06c3fb27SDimitry Andric     return;
28604eeddc0SDimitry Andric   }
28704eeddc0SDimitry Andric 
288*06c3fb27SDimitry Andric   // Must point to a simple identifier
289*06c3fb27SDimitry Andric   decodeNumber(Backref, Len);
290*06c3fb27SDimitry Andric   if (Backref.empty() || Backref.length() < Len) {
291*06c3fb27SDimitry Andric     Mangled = {};
292*06c3fb27SDimitry Andric     return;
293*06c3fb27SDimitry Andric   }
294*06c3fb27SDimitry Andric 
295*06c3fb27SDimitry Andric   parseLName(Demangled, Backref, Len);
296*06c3fb27SDimitry Andric   if (Backref.empty())
297*06c3fb27SDimitry Andric     Mangled = {};
298*06c3fb27SDimitry Andric }
299*06c3fb27SDimitry Andric 
parseTypeBackref(std::string_view & Mangled)300*06c3fb27SDimitry Andric void Demangler::parseTypeBackref(std::string_view &Mangled) {
30104eeddc0SDimitry Andric   // A type back reference always points to a letter.
30204eeddc0SDimitry Andric   //    TypeBackRef:
30304eeddc0SDimitry Andric   //        Q NumberBackRef
30404eeddc0SDimitry Andric   //        ^
30504eeddc0SDimitry Andric 
30604eeddc0SDimitry Andric   // If we appear to be moving backwards through the mangle string, then
30704eeddc0SDimitry Andric   // bail as this may be a recursive back reference.
308*06c3fb27SDimitry Andric   if (Mangled.data() - Str.data() >= LastBackref) {
309*06c3fb27SDimitry Andric     Mangled = {};
310*06c3fb27SDimitry Andric     return;
311*06c3fb27SDimitry Andric   }
31204eeddc0SDimitry Andric 
31304eeddc0SDimitry Andric   int SaveRefPos = LastBackref;
314*06c3fb27SDimitry Andric   LastBackref = Mangled.data() - Str.data();
31504eeddc0SDimitry Andric 
31604eeddc0SDimitry Andric   // Get position of the back reference.
317*06c3fb27SDimitry Andric   std::string_view Backref;
318*06c3fb27SDimitry Andric   if (!decodeBackref(Mangled, Backref)) {
319*06c3fb27SDimitry Andric     Mangled = {};
320*06c3fb27SDimitry Andric     return;
321*06c3fb27SDimitry Andric   }
32204eeddc0SDimitry Andric 
32304eeddc0SDimitry Andric   // Can't decode back reference.
324*06c3fb27SDimitry Andric   if (Backref.empty()) {
325*06c3fb27SDimitry Andric     Mangled = {};
326*06c3fb27SDimitry Andric     return;
327*06c3fb27SDimitry Andric   }
32804eeddc0SDimitry Andric 
32904eeddc0SDimitry Andric   // TODO: Add support for function type back references.
330*06c3fb27SDimitry Andric   if (!parseType(Backref))
331*06c3fb27SDimitry Andric     Mangled = {};
33204eeddc0SDimitry Andric 
33304eeddc0SDimitry Andric   LastBackref = SaveRefPos;
33404eeddc0SDimitry Andric 
335*06c3fb27SDimitry Andric   if (Backref.empty())
336*06c3fb27SDimitry Andric     Mangled = {};
3374824e7fdSDimitry Andric }
3384824e7fdSDimitry Andric 
isSymbolName(std::string_view Mangled)339*06c3fb27SDimitry Andric bool Demangler::isSymbolName(std::string_view Mangled) {
34004eeddc0SDimitry Andric   long Ret;
341*06c3fb27SDimitry Andric   const char *Qref = Mangled.data();
34204eeddc0SDimitry Andric 
343*06c3fb27SDimitry Andric   if (std::isdigit(Mangled.front()))
3444824e7fdSDimitry Andric     return true;
3454824e7fdSDimitry Andric 
34604eeddc0SDimitry Andric   // TODO: Handle template instances.
34704eeddc0SDimitry Andric 
348*06c3fb27SDimitry Andric   if (Mangled.front() != 'Q')
3494824e7fdSDimitry Andric     return false;
35004eeddc0SDimitry Andric 
351*06c3fb27SDimitry Andric   Mangled.remove_prefix(1);
352*06c3fb27SDimitry Andric   bool Valid = decodeBackrefPos(Mangled, Ret);
353*06c3fb27SDimitry Andric   if (!Valid || Ret > Qref - Str.data())
35404eeddc0SDimitry Andric     return false;
35504eeddc0SDimitry Andric 
35604eeddc0SDimitry Andric   return std::isdigit(Qref[-Ret]);
3574824e7fdSDimitry Andric }
3584824e7fdSDimitry Andric 
parseMangle(OutputBuffer * Demangled,std::string_view & Mangled)359*06c3fb27SDimitry Andric void Demangler::parseMangle(OutputBuffer *Demangled,
360*06c3fb27SDimitry Andric                             std::string_view &Mangled) {
3614824e7fdSDimitry Andric   // A D mangled symbol is comprised of both scope and type information.
3624824e7fdSDimitry Andric   //    MangleName:
3634824e7fdSDimitry Andric   //        _D QualifiedName Type
3644824e7fdSDimitry Andric   //        _D QualifiedName Z
3654824e7fdSDimitry Andric   //        ^
3664824e7fdSDimitry Andric   // The caller should have guaranteed that the start pointer is at the
3674824e7fdSDimitry Andric   // above location.
3684824e7fdSDimitry Andric   // Note that type is never a function type, but only the return type of
3694824e7fdSDimitry Andric   // a function or the type of a variable.
370*06c3fb27SDimitry Andric   Mangled.remove_prefix(2);
3714824e7fdSDimitry Andric 
372*06c3fb27SDimitry Andric   parseQualified(Demangled, Mangled);
3734824e7fdSDimitry Andric 
374*06c3fb27SDimitry Andric   if (Mangled.empty()) {
375*06c3fb27SDimitry Andric     Mangled = {};
376*06c3fb27SDimitry Andric     return;
377*06c3fb27SDimitry Andric   }
378*06c3fb27SDimitry Andric 
3794824e7fdSDimitry Andric   // Artificial symbols end with 'Z' and have no type.
380*06c3fb27SDimitry Andric   if (Mangled.front() == 'Z') {
381*06c3fb27SDimitry Andric     Mangled.remove_prefix(1);
382*06c3fb27SDimitry Andric   } else if (!parseType(Mangled))
383*06c3fb27SDimitry Andric     Mangled = {};
3844824e7fdSDimitry Andric }
3854824e7fdSDimitry Andric 
parseQualified(OutputBuffer * Demangled,std::string_view & Mangled)386*06c3fb27SDimitry Andric void Demangler::parseQualified(OutputBuffer *Demangled,
387*06c3fb27SDimitry Andric                                std::string_view &Mangled) {
3884824e7fdSDimitry Andric   // Qualified names are identifiers separated by their encoded length.
3894824e7fdSDimitry Andric   // Nested functions also encode their argument types without specifying
3904824e7fdSDimitry Andric   // what they return.
3914824e7fdSDimitry Andric   //    QualifiedName:
3924824e7fdSDimitry Andric   //        SymbolFunctionName
3934824e7fdSDimitry Andric   //        SymbolFunctionName QualifiedName
3944824e7fdSDimitry Andric   //        ^
3954824e7fdSDimitry Andric   //    SymbolFunctionName:
3964824e7fdSDimitry Andric   //        SymbolName
3974824e7fdSDimitry Andric   //        SymbolName TypeFunctionNoReturn
3984824e7fdSDimitry Andric   //        SymbolName M TypeFunctionNoReturn
3994824e7fdSDimitry Andric   //        SymbolName M TypeModifiers TypeFunctionNoReturn
4004824e7fdSDimitry Andric   // The start pointer should be at the above location.
4014824e7fdSDimitry Andric 
4024824e7fdSDimitry Andric   // Whether it has more than one symbol
4034824e7fdSDimitry Andric   size_t NotFirst = false;
4044824e7fdSDimitry Andric   do {
4054824e7fdSDimitry Andric     // Skip over anonymous symbols.
406*06c3fb27SDimitry Andric     if (!Mangled.empty() && Mangled.front() == '0') {
4074824e7fdSDimitry Andric       do
408*06c3fb27SDimitry Andric         Mangled.remove_prefix(1);
409*06c3fb27SDimitry Andric       while (!Mangled.empty() && Mangled.front() == '0');
4104824e7fdSDimitry Andric 
4114824e7fdSDimitry Andric       continue;
4124824e7fdSDimitry Andric     }
4134824e7fdSDimitry Andric 
4144824e7fdSDimitry Andric     if (NotFirst)
4154824e7fdSDimitry Andric       *Demangled << '.';
4164824e7fdSDimitry Andric     NotFirst = true;
4174824e7fdSDimitry Andric 
418*06c3fb27SDimitry Andric     parseIdentifier(Demangled, Mangled);
419*06c3fb27SDimitry Andric   } while (!Mangled.empty() && isSymbolName(Mangled));
4204824e7fdSDimitry Andric }
4214824e7fdSDimitry Andric 
parseIdentifier(OutputBuffer * Demangled,std::string_view & Mangled)422*06c3fb27SDimitry Andric void Demangler::parseIdentifier(OutputBuffer *Demangled,
423*06c3fb27SDimitry Andric                                 std::string_view &Mangled) {
424*06c3fb27SDimitry Andric   if (Mangled.empty()) {
425*06c3fb27SDimitry Andric     Mangled = {};
426*06c3fb27SDimitry Andric     return;
427*06c3fb27SDimitry Andric   }
4284824e7fdSDimitry Andric 
429*06c3fb27SDimitry Andric   if (Mangled.front() == 'Q')
43004eeddc0SDimitry Andric     return parseSymbolBackref(Demangled, Mangled);
4314824e7fdSDimitry Andric 
43204eeddc0SDimitry Andric   // TODO: Parse lengthless template instances.
43304eeddc0SDimitry Andric 
434*06c3fb27SDimitry Andric   unsigned long Len;
435*06c3fb27SDimitry Andric   decodeNumber(Mangled, Len);
4364824e7fdSDimitry Andric 
437*06c3fb27SDimitry Andric   if (Mangled.empty()) {
438*06c3fb27SDimitry Andric     Mangled = {};
439*06c3fb27SDimitry Andric     return;
440*06c3fb27SDimitry Andric   }
441*06c3fb27SDimitry Andric   if (!Len || Mangled.length() < Len) {
442*06c3fb27SDimitry Andric     Mangled = {};
443*06c3fb27SDimitry Andric     return;
444*06c3fb27SDimitry Andric   }
4454824e7fdSDimitry Andric 
4464824e7fdSDimitry Andric   // TODO: Parse template instances with a length prefix.
4474824e7fdSDimitry Andric 
4480eae32dcSDimitry Andric   // There can be multiple different declarations in the same function that
4490eae32dcSDimitry Andric   // have the same mangled name.  To make the mangled names unique, a fake
4500eae32dcSDimitry Andric   // parent in the form `__Sddd' is added to the symbol.
451*06c3fb27SDimitry Andric   if (Len >= 4 && starts_with(Mangled, "__S")) {
452*06c3fb27SDimitry Andric     const size_t SuffixLen = Mangled.length() - Len;
453*06c3fb27SDimitry Andric     std::string_view P = Mangled.substr(3);
454*06c3fb27SDimitry Andric     while (P.length() > SuffixLen && std::isdigit(P.front()))
455*06c3fb27SDimitry Andric       P.remove_prefix(1);
456*06c3fb27SDimitry Andric     if (P.length() == SuffixLen) {
4570eae32dcSDimitry Andric       // Skip over the fake parent.
458*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
4590eae32dcSDimitry Andric       return parseIdentifier(Demangled, Mangled);
4600eae32dcSDimitry Andric     }
4610eae32dcSDimitry Andric 
4620eae32dcSDimitry Andric     // Else demangle it as a plain identifier.
4630eae32dcSDimitry Andric   }
4640eae32dcSDimitry Andric 
465*06c3fb27SDimitry Andric   parseLName(Demangled, Mangled, Len);
4664824e7fdSDimitry Andric }
4674824e7fdSDimitry Andric 
parseType(std::string_view & Mangled)468*06c3fb27SDimitry Andric bool Demangler::parseType(std::string_view &Mangled) {
469*06c3fb27SDimitry Andric   if (Mangled.empty()) {
470*06c3fb27SDimitry Andric     Mangled = {};
471*06c3fb27SDimitry Andric     return false;
472*06c3fb27SDimitry Andric   }
47304eeddc0SDimitry Andric 
474*06c3fb27SDimitry Andric   switch (Mangled.front()) {
47504eeddc0SDimitry Andric   // TODO: Parse type qualifiers.
47604eeddc0SDimitry Andric   // TODO: Parse function types.
47704eeddc0SDimitry Andric   // TODO: Parse compound types.
47804eeddc0SDimitry Andric   // TODO: Parse delegate types.
47904eeddc0SDimitry Andric   // TODO: Parse tuple types.
48004eeddc0SDimitry Andric 
48104eeddc0SDimitry Andric   // Basic types.
48204eeddc0SDimitry Andric   case 'i':
483*06c3fb27SDimitry Andric     Mangled.remove_prefix(1);
48404eeddc0SDimitry Andric     // TODO: Add type name dumping
485*06c3fb27SDimitry Andric     return true;
48604eeddc0SDimitry Andric 
48704eeddc0SDimitry Andric     // TODO: Add support for the rest of the basic types.
48804eeddc0SDimitry Andric 
48904eeddc0SDimitry Andric   // Back referenced type.
490*06c3fb27SDimitry Andric   case 'Q': {
491*06c3fb27SDimitry Andric     parseTypeBackref(Mangled);
492*06c3fb27SDimitry Andric     return true;
493*06c3fb27SDimitry Andric   }
49404eeddc0SDimitry Andric 
49504eeddc0SDimitry Andric   default: // unhandled.
496*06c3fb27SDimitry Andric     Mangled = {};
497*06c3fb27SDimitry Andric     return false;
49804eeddc0SDimitry Andric   }
49904eeddc0SDimitry Andric }
50004eeddc0SDimitry Andric 
parseLName(OutputBuffer * Demangled,std::string_view & Mangled,unsigned long Len)501*06c3fb27SDimitry Andric void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
5024824e7fdSDimitry Andric                            unsigned long Len) {
5030eae32dcSDimitry Andric   switch (Len) {
5040eae32dcSDimitry Andric   case 6:
505*06c3fb27SDimitry Andric     if (starts_with(Mangled, "__initZ")) {
5060eae32dcSDimitry Andric       // The static initializer for a given symbol.
5070eae32dcSDimitry Andric       Demangled->prepend("initializer for ");
5080eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
509*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
510*06c3fb27SDimitry Andric       return;
5110eae32dcSDimitry Andric     }
512*06c3fb27SDimitry Andric     if (starts_with(Mangled, "__vtblZ")) {
5130eae32dcSDimitry Andric       // The vtable symbol for a given class.
5140eae32dcSDimitry Andric       Demangled->prepend("vtable for ");
5150eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
516*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
517*06c3fb27SDimitry Andric       return;
5180eae32dcSDimitry Andric     }
5190eae32dcSDimitry Andric     break;
5200eae32dcSDimitry Andric 
5210eae32dcSDimitry Andric   case 7:
522*06c3fb27SDimitry Andric     if (starts_with(Mangled, "__ClassZ")) {
5230eae32dcSDimitry Andric       // The classinfo symbol for a given class.
5240eae32dcSDimitry Andric       Demangled->prepend("ClassInfo for ");
5250eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
526*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
527*06c3fb27SDimitry Andric       return;
5280eae32dcSDimitry Andric     }
5290eae32dcSDimitry Andric     break;
5300eae32dcSDimitry Andric 
5310eae32dcSDimitry Andric   case 11:
532*06c3fb27SDimitry Andric     if (starts_with(Mangled, "__InterfaceZ")) {
5330eae32dcSDimitry Andric       // The interface symbol for a given class.
5340eae32dcSDimitry Andric       Demangled->prepend("Interface for ");
5350eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
536*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
537*06c3fb27SDimitry Andric       return;
5380eae32dcSDimitry Andric     }
5390eae32dcSDimitry Andric     break;
5400eae32dcSDimitry Andric 
5410eae32dcSDimitry Andric   case 12:
542*06c3fb27SDimitry Andric     if (starts_with(Mangled, "__ModuleInfoZ")) {
5430eae32dcSDimitry Andric       // The ModuleInfo symbol for a given module.
5440eae32dcSDimitry Andric       Demangled->prepend("ModuleInfo for ");
5450eae32dcSDimitry Andric       Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
546*06c3fb27SDimitry Andric       Mangled.remove_prefix(Len);
547*06c3fb27SDimitry Andric       return;
5480eae32dcSDimitry Andric     }
5490eae32dcSDimitry Andric     break;
5500eae32dcSDimitry Andric   }
5510eae32dcSDimitry Andric 
552*06c3fb27SDimitry Andric   *Demangled << Mangled.substr(0, Len);
553*06c3fb27SDimitry Andric   Mangled.remove_prefix(Len);
5544824e7fdSDimitry Andric }
5554824e7fdSDimitry Andric 
Demangler(std::string_view Mangled)556*06c3fb27SDimitry Andric Demangler::Demangler(std::string_view Mangled)
557*06c3fb27SDimitry Andric     : Str(Mangled), LastBackref(Mangled.length()) {}
5584824e7fdSDimitry Andric 
parseMangle(OutputBuffer * Demangled)5594824e7fdSDimitry Andric const char *Demangler::parseMangle(OutputBuffer *Demangled) {
560*06c3fb27SDimitry Andric   std::string_view M(this->Str);
561*06c3fb27SDimitry Andric   parseMangle(Demangled, M);
562*06c3fb27SDimitry Andric   return M.data();
5634824e7fdSDimitry Andric }
564349cc55cSDimitry Andric 
dlangDemangle(std::string_view MangledName)565*06c3fb27SDimitry Andric char *llvm::dlangDemangle(std::string_view MangledName) {
566*06c3fb27SDimitry Andric   if (MangledName.empty() || !starts_with(MangledName, "_D"))
567349cc55cSDimitry Andric     return nullptr;
568349cc55cSDimitry Andric 
569349cc55cSDimitry Andric   OutputBuffer Demangled;
570*06c3fb27SDimitry Andric   if (MangledName == "_Dmain") {
571349cc55cSDimitry Andric     Demangled << "D main";
5724824e7fdSDimitry Andric   } else {
5734824e7fdSDimitry Andric 
574*06c3fb27SDimitry Andric     Demangler D(MangledName);
575*06c3fb27SDimitry Andric     const char *M = D.parseMangle(&Demangled);
5764824e7fdSDimitry Andric 
5774824e7fdSDimitry Andric     // Check that the entire symbol was successfully demangled.
578*06c3fb27SDimitry Andric     if (M == nullptr || *M != '\0') {
5794824e7fdSDimitry Andric       std::free(Demangled.getBuffer());
5804824e7fdSDimitry Andric       return nullptr;
5814824e7fdSDimitry Andric     }
5824824e7fdSDimitry Andric   }
583349cc55cSDimitry Andric 
584349cc55cSDimitry Andric   // OutputBuffer's internal buffer is not null terminated and therefore we need
585349cc55cSDimitry Andric   // to add it to comply with C null terminated strings.
586349cc55cSDimitry Andric   if (Demangled.getCurrentPosition() > 0) {
587349cc55cSDimitry Andric     Demangled << '\0';
588349cc55cSDimitry Andric     Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
589349cc55cSDimitry Andric     return Demangled.getBuffer();
590349cc55cSDimitry Andric   }
591349cc55cSDimitry Andric 
5924824e7fdSDimitry Andric   std::free(Demangled.getBuffer());
593349cc55cSDimitry Andric   return nullptr;
594349cc55cSDimitry Andric }
595