106c3fb27SDimitry Andric //===----------------- ItaniumManglingCanonicalizer.cpp -------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #include "llvm/ProfileData/ItaniumManglingCanonicalizer.h" 1006c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 1106c3fb27SDimitry Andric #include "llvm/ADT/FoldingSet.h" 1206c3fb27SDimitry Andric #include "llvm/ADT/StringRef.h" 1306c3fb27SDimitry Andric #include "llvm/Demangle/ItaniumDemangle.h" 1406c3fb27SDimitry Andric #include "llvm/Support/Allocator.h" 1506c3fb27SDimitry Andric 1606c3fb27SDimitry Andric using namespace llvm; 1706c3fb27SDimitry Andric using llvm::itanium_demangle::ForwardTemplateReference; 1806c3fb27SDimitry Andric using llvm::itanium_demangle::Node; 1906c3fb27SDimitry Andric using llvm::itanium_demangle::NodeKind; 2006c3fb27SDimitry Andric 2106c3fb27SDimitry Andric namespace { 2206c3fb27SDimitry Andric struct FoldingSetNodeIDBuilder { 2306c3fb27SDimitry Andric llvm::FoldingSetNodeID &ID; 2406c3fb27SDimitry Andric void operator()(const Node *P) { ID.AddPointer(P); } 2506c3fb27SDimitry Andric void operator()(std::string_view Str) { 2606c3fb27SDimitry Andric if (Str.empty()) 2706c3fb27SDimitry Andric ID.AddString({}); 2806c3fb27SDimitry Andric else 2906c3fb27SDimitry Andric ID.AddString(llvm::StringRef(&*Str.begin(), Str.size())); 3006c3fb27SDimitry Andric } 3106c3fb27SDimitry Andric template <typename T> 3206c3fb27SDimitry Andric std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) { 3306c3fb27SDimitry Andric ID.AddInteger((unsigned long long)V); 3406c3fb27SDimitry Andric } 3506c3fb27SDimitry Andric void operator()(itanium_demangle::NodeArray A) { 3606c3fb27SDimitry Andric ID.AddInteger(A.size()); 3706c3fb27SDimitry Andric for (const Node *N : A) 3806c3fb27SDimitry Andric (*this)(N); 3906c3fb27SDimitry Andric } 4006c3fb27SDimitry Andric }; 4106c3fb27SDimitry Andric 4206c3fb27SDimitry Andric template<typename ...T> 4306c3fb27SDimitry Andric void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) { 4406c3fb27SDimitry Andric FoldingSetNodeIDBuilder Builder = {ID}; 4506c3fb27SDimitry Andric Builder(K); 4606c3fb27SDimitry Andric int VisitInOrder[] = { 4706c3fb27SDimitry Andric (Builder(V), 0) ..., 4806c3fb27SDimitry Andric 0 // Avoid empty array if there are no arguments. 4906c3fb27SDimitry Andric }; 5006c3fb27SDimitry Andric (void)VisitInOrder; 5106c3fb27SDimitry Andric } 5206c3fb27SDimitry Andric 5306c3fb27SDimitry Andric // FIXME: Convert this to a generic lambda when possible. 5406c3fb27SDimitry Andric template<typename NodeT> struct ProfileSpecificNode { 5506c3fb27SDimitry Andric FoldingSetNodeID &ID; 5606c3fb27SDimitry Andric template<typename ...T> void operator()(T ...V) { 5706c3fb27SDimitry Andric profileCtor(ID, NodeKind<NodeT>::Kind, V...); 5806c3fb27SDimitry Andric } 5906c3fb27SDimitry Andric }; 6006c3fb27SDimitry Andric 6106c3fb27SDimitry Andric struct ProfileNode { 6206c3fb27SDimitry Andric FoldingSetNodeID &ID; 6306c3fb27SDimitry Andric template<typename NodeT> void operator()(const NodeT *N) { 6406c3fb27SDimitry Andric N->match(ProfileSpecificNode<NodeT>{ID}); 6506c3fb27SDimitry Andric } 6606c3fb27SDimitry Andric }; 6706c3fb27SDimitry Andric 6806c3fb27SDimitry Andric template<> void ProfileNode::operator()(const ForwardTemplateReference *N) { 6906c3fb27SDimitry Andric llvm_unreachable("should never canonicalize a ForwardTemplateReference"); 7006c3fb27SDimitry Andric } 7106c3fb27SDimitry Andric 7206c3fb27SDimitry Andric void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) { 7306c3fb27SDimitry Andric N->visit(ProfileNode{ID}); 7406c3fb27SDimitry Andric } 7506c3fb27SDimitry Andric 7606c3fb27SDimitry Andric class FoldingNodeAllocator { 7706c3fb27SDimitry Andric class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode { 7806c3fb27SDimitry Andric public: 7906c3fb27SDimitry Andric // 'Node' in this context names the injected-class-name of the base class. 8006c3fb27SDimitry Andric itanium_demangle::Node *getNode() { 8106c3fb27SDimitry Andric return reinterpret_cast<itanium_demangle::Node *>(this + 1); 8206c3fb27SDimitry Andric } 8306c3fb27SDimitry Andric void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); } 8406c3fb27SDimitry Andric }; 8506c3fb27SDimitry Andric 8606c3fb27SDimitry Andric BumpPtrAllocator RawAlloc; 8706c3fb27SDimitry Andric llvm::FoldingSet<NodeHeader> Nodes; 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric public: 9006c3fb27SDimitry Andric void reset() {} 9106c3fb27SDimitry Andric 9206c3fb27SDimitry Andric template <typename T, typename... Args> 9306c3fb27SDimitry Andric std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) { 9406c3fb27SDimitry Andric // FIXME: Don't canonicalize forward template references for now, because 9506c3fb27SDimitry Andric // they contain state (the resolved template node) that's not known at their 9606c3fb27SDimitry Andric // point of creation. 9706c3fb27SDimitry Andric if (std::is_same<T, ForwardTemplateReference>::value) { 9806c3fb27SDimitry Andric // Note that we don't use if-constexpr here and so we must still write 9906c3fb27SDimitry Andric // this code in a generic form. 10006c3fb27SDimitry Andric return {new (RawAlloc.Allocate(sizeof(T), alignof(T))) 10106c3fb27SDimitry Andric T(std::forward<Args>(As)...), 10206c3fb27SDimitry Andric true}; 10306c3fb27SDimitry Andric } 10406c3fb27SDimitry Andric 10506c3fb27SDimitry Andric llvm::FoldingSetNodeID ID; 10606c3fb27SDimitry Andric profileCtor(ID, NodeKind<T>::Kind, As...); 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric void *InsertPos; 10906c3fb27SDimitry Andric if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos)) 11006c3fb27SDimitry Andric return {static_cast<T*>(Existing->getNode()), false}; 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric if (!CreateNewNodes) 11306c3fb27SDimitry Andric return {nullptr, true}; 11406c3fb27SDimitry Andric 11506c3fb27SDimitry Andric static_assert(alignof(T) <= alignof(NodeHeader), 11606c3fb27SDimitry Andric "underaligned node header for specific node kind"); 11706c3fb27SDimitry Andric void *Storage = 11806c3fb27SDimitry Andric RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader)); 11906c3fb27SDimitry Andric NodeHeader *New = new (Storage) NodeHeader; 12006c3fb27SDimitry Andric T *Result = new (New->getNode()) T(std::forward<Args>(As)...); 12106c3fb27SDimitry Andric Nodes.InsertNode(New, InsertPos); 12206c3fb27SDimitry Andric return {Result, true}; 12306c3fb27SDimitry Andric } 12406c3fb27SDimitry Andric 12506c3fb27SDimitry Andric template<typename T, typename... Args> 12606c3fb27SDimitry Andric Node *makeNode(Args &&...As) { 12706c3fb27SDimitry Andric return getOrCreateNode<T>(true, std::forward<Args>(As)...).first; 12806c3fb27SDimitry Andric } 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric void *allocateNodeArray(size_t sz) { 13106c3fb27SDimitry Andric return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *)); 13206c3fb27SDimitry Andric } 13306c3fb27SDimitry Andric }; 13406c3fb27SDimitry Andric 13506c3fb27SDimitry Andric class CanonicalizerAllocator : public FoldingNodeAllocator { 13606c3fb27SDimitry Andric Node *MostRecentlyCreated = nullptr; 13706c3fb27SDimitry Andric Node *TrackedNode = nullptr; 13806c3fb27SDimitry Andric bool TrackedNodeIsUsed = false; 13906c3fb27SDimitry Andric bool CreateNewNodes = true; 14006c3fb27SDimitry Andric llvm::SmallDenseMap<Node*, Node*, 32> Remappings; 14106c3fb27SDimitry Andric 14206c3fb27SDimitry Andric template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) { 14306c3fb27SDimitry Andric std::pair<Node *, bool> Result = 14406c3fb27SDimitry Andric getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...); 14506c3fb27SDimitry Andric if (Result.second) { 14606c3fb27SDimitry Andric // Node is new. Make a note of that. 14706c3fb27SDimitry Andric MostRecentlyCreated = Result.first; 14806c3fb27SDimitry Andric } else if (Result.first) { 14906c3fb27SDimitry Andric // Node is pre-existing; check if it's in our remapping table. 15006c3fb27SDimitry Andric if (auto *N = Remappings.lookup(Result.first)) { 15106c3fb27SDimitry Andric Result.first = N; 152*5f757f3fSDimitry Andric assert(!Remappings.contains(Result.first) && 15306c3fb27SDimitry Andric "should never need multiple remap steps"); 15406c3fb27SDimitry Andric } 15506c3fb27SDimitry Andric if (Result.first == TrackedNode) 15606c3fb27SDimitry Andric TrackedNodeIsUsed = true; 15706c3fb27SDimitry Andric } 15806c3fb27SDimitry Andric return Result.first; 15906c3fb27SDimitry Andric } 16006c3fb27SDimitry Andric 16106c3fb27SDimitry Andric /// Helper to allow makeNode to be partially-specialized on T. 16206c3fb27SDimitry Andric template<typename T> struct MakeNodeImpl { 16306c3fb27SDimitry Andric CanonicalizerAllocator &Self; 16406c3fb27SDimitry Andric template<typename ...Args> Node *make(Args &&...As) { 16506c3fb27SDimitry Andric return Self.makeNodeSimple<T>(std::forward<Args>(As)...); 16606c3fb27SDimitry Andric } 16706c3fb27SDimitry Andric }; 16806c3fb27SDimitry Andric 16906c3fb27SDimitry Andric public: 17006c3fb27SDimitry Andric template<typename T, typename ...Args> Node *makeNode(Args &&...As) { 17106c3fb27SDimitry Andric return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...); 17206c3fb27SDimitry Andric } 17306c3fb27SDimitry Andric 17406c3fb27SDimitry Andric void reset() { MostRecentlyCreated = nullptr; } 17506c3fb27SDimitry Andric 17606c3fb27SDimitry Andric void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; } 17706c3fb27SDimitry Andric 17806c3fb27SDimitry Andric void addRemapping(Node *A, Node *B) { 17906c3fb27SDimitry Andric // Note, we don't need to check whether B is also remapped, because if it 18006c3fb27SDimitry Andric // was we would have already remapped it when building it. 18106c3fb27SDimitry Andric Remappings.insert(std::make_pair(A, B)); 18206c3fb27SDimitry Andric } 18306c3fb27SDimitry Andric 18406c3fb27SDimitry Andric bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; } 18506c3fb27SDimitry Andric 18606c3fb27SDimitry Andric void trackUsesOf(Node *N) { 18706c3fb27SDimitry Andric TrackedNode = N; 18806c3fb27SDimitry Andric TrackedNodeIsUsed = false; 18906c3fb27SDimitry Andric } 19006c3fb27SDimitry Andric bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } 19106c3fb27SDimitry Andric }; 19206c3fb27SDimitry Andric 19306c3fb27SDimitry Andric // FIXME: Also expand built-in substitutions? 19406c3fb27SDimitry Andric 19506c3fb27SDimitry Andric using CanonicalizingDemangler = 19606c3fb27SDimitry Andric itanium_demangle::ManglingParser<CanonicalizerAllocator>; 19706c3fb27SDimitry Andric } // namespace 19806c3fb27SDimitry Andric 19906c3fb27SDimitry Andric struct ItaniumManglingCanonicalizer::Impl { 20006c3fb27SDimitry Andric CanonicalizingDemangler Demangler = {nullptr, nullptr}; 20106c3fb27SDimitry Andric }; 20206c3fb27SDimitry Andric 20306c3fb27SDimitry Andric ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {} 20406c3fb27SDimitry Andric ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; } 20506c3fb27SDimitry Andric 20606c3fb27SDimitry Andric ItaniumManglingCanonicalizer::EquivalenceError 20706c3fb27SDimitry Andric ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First, 20806c3fb27SDimitry Andric StringRef Second) { 20906c3fb27SDimitry Andric auto &Alloc = P->Demangler.ASTAllocator; 21006c3fb27SDimitry Andric Alloc.setCreateNewNodes(true); 21106c3fb27SDimitry Andric 21206c3fb27SDimitry Andric auto Parse = [&](StringRef Str) { 21306c3fb27SDimitry Andric P->Demangler.reset(Str.begin(), Str.end()); 21406c3fb27SDimitry Andric Node *N = nullptr; 21506c3fb27SDimitry Andric switch (Kind) { 21606c3fb27SDimitry Andric // A <name>, with minor extensions to allow arbitrary namespace and 21706c3fb27SDimitry Andric // template names that can't easily be written as <name>s. 21806c3fb27SDimitry Andric case FragmentKind::Name: 21906c3fb27SDimitry Andric // Very special case: allow "St" as a shorthand for "3std". It's not 22006c3fb27SDimitry Andric // valid as a <name> mangling, but is nonetheless the most natural 22106c3fb27SDimitry Andric // way to name the 'std' namespace. 22206c3fb27SDimitry Andric if (Str.size() == 2 && P->Demangler.consumeIf("St")) 22306c3fb27SDimitry Andric N = P->Demangler.make<itanium_demangle::NameType>("std"); 22406c3fb27SDimitry Andric // We permit substitutions to name templates without their template 22506c3fb27SDimitry Andric // arguments. This mostly just falls out, as almost all template names 22606c3fb27SDimitry Andric // are valid as <name>s, but we also want to parse <substitution>s as 22706c3fb27SDimitry Andric // <name>s, even though they're not. 228*5f757f3fSDimitry Andric else if (Str.starts_with("S")) 22906c3fb27SDimitry Andric // Parse the substitution and optional following template arguments. 23006c3fb27SDimitry Andric N = P->Demangler.parseType(); 23106c3fb27SDimitry Andric else 23206c3fb27SDimitry Andric N = P->Demangler.parseName(); 23306c3fb27SDimitry Andric break; 23406c3fb27SDimitry Andric 23506c3fb27SDimitry Andric // A <type>. 23606c3fb27SDimitry Andric case FragmentKind::Type: 23706c3fb27SDimitry Andric N = P->Demangler.parseType(); 23806c3fb27SDimitry Andric break; 23906c3fb27SDimitry Andric 24006c3fb27SDimitry Andric // An <encoding>. 24106c3fb27SDimitry Andric case FragmentKind::Encoding: 24206c3fb27SDimitry Andric N = P->Demangler.parseEncoding(); 24306c3fb27SDimitry Andric break; 24406c3fb27SDimitry Andric } 24506c3fb27SDimitry Andric 24606c3fb27SDimitry Andric // If we have trailing junk, the mangling is invalid. 24706c3fb27SDimitry Andric if (P->Demangler.numLeft() != 0) 24806c3fb27SDimitry Andric N = nullptr; 24906c3fb27SDimitry Andric 25006c3fb27SDimitry Andric // If any node was created after N, then we cannot safely remap it because 25106c3fb27SDimitry Andric // it might already be in use by another node. 25206c3fb27SDimitry Andric return std::make_pair(N, Alloc.isMostRecentlyCreated(N)); 25306c3fb27SDimitry Andric }; 25406c3fb27SDimitry Andric 25506c3fb27SDimitry Andric Node *FirstNode, *SecondNode; 25606c3fb27SDimitry Andric bool FirstIsNew, SecondIsNew; 25706c3fb27SDimitry Andric 25806c3fb27SDimitry Andric std::tie(FirstNode, FirstIsNew) = Parse(First); 25906c3fb27SDimitry Andric if (!FirstNode) 26006c3fb27SDimitry Andric return EquivalenceError::InvalidFirstMangling; 26106c3fb27SDimitry Andric 26206c3fb27SDimitry Andric Alloc.trackUsesOf(FirstNode); 26306c3fb27SDimitry Andric std::tie(SecondNode, SecondIsNew) = Parse(Second); 26406c3fb27SDimitry Andric if (!SecondNode) 26506c3fb27SDimitry Andric return EquivalenceError::InvalidSecondMangling; 26606c3fb27SDimitry Andric 26706c3fb27SDimitry Andric // If they're already equivalent, there's nothing to do. 26806c3fb27SDimitry Andric if (FirstNode == SecondNode) 26906c3fb27SDimitry Andric return EquivalenceError::Success; 27006c3fb27SDimitry Andric 27106c3fb27SDimitry Andric if (FirstIsNew && !Alloc.trackedNodeIsUsed()) 27206c3fb27SDimitry Andric Alloc.addRemapping(FirstNode, SecondNode); 27306c3fb27SDimitry Andric else if (SecondIsNew) 27406c3fb27SDimitry Andric Alloc.addRemapping(SecondNode, FirstNode); 27506c3fb27SDimitry Andric else 27606c3fb27SDimitry Andric return EquivalenceError::ManglingAlreadyUsed; 27706c3fb27SDimitry Andric 27806c3fb27SDimitry Andric return EquivalenceError::Success; 27906c3fb27SDimitry Andric } 28006c3fb27SDimitry Andric 28106c3fb27SDimitry Andric static ItaniumManglingCanonicalizer::Key 28206c3fb27SDimitry Andric parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling, 28306c3fb27SDimitry Andric bool CreateNewNodes) { 28406c3fb27SDimitry Andric Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes); 28506c3fb27SDimitry Andric Demangler.reset(Mangling.begin(), Mangling.end()); 28606c3fb27SDimitry Andric // Attempt demangling only for names that look like C++ mangled names. 28706c3fb27SDimitry Andric // Otherwise, treat them as extern "C" names. We permit the latter to 28806c3fb27SDimitry Andric // be remapped by (eg) 28906c3fb27SDimitry Andric // encoding 6memcpy 7memmove 29006c3fb27SDimitry Andric // consistent with how they are encoded as local-names inside a C++ mangling. 29106c3fb27SDimitry Andric Node *N; 292*5f757f3fSDimitry Andric if (Mangling.starts_with("_Z") || Mangling.starts_with("__Z") || 293*5f757f3fSDimitry Andric Mangling.starts_with("___Z") || Mangling.starts_with("____Z")) 29406c3fb27SDimitry Andric N = Demangler.parse(); 29506c3fb27SDimitry Andric else 29606c3fb27SDimitry Andric N = Demangler.make<itanium_demangle::NameType>( 29706c3fb27SDimitry Andric std::string_view(Mangling.data(), Mangling.size())); 29806c3fb27SDimitry Andric return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N); 29906c3fb27SDimitry Andric } 30006c3fb27SDimitry Andric 30106c3fb27SDimitry Andric ItaniumManglingCanonicalizer::Key 30206c3fb27SDimitry Andric ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) { 30306c3fb27SDimitry Andric return parseMaybeMangledName(P->Demangler, Mangling, true); 30406c3fb27SDimitry Andric } 30506c3fb27SDimitry Andric 30606c3fb27SDimitry Andric ItaniumManglingCanonicalizer::Key 30706c3fb27SDimitry Andric ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { 30806c3fb27SDimitry Andric return parseMaybeMangledName(P->Demangler, Mangling, false); 30906c3fb27SDimitry Andric } 310