xref: /freebsd/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/Hashing.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Compiler.h"
16 
17 #include "llvm/DebugInfo/CodeView/CVRecord.h"
18 #include "llvm/DebugInfo/CodeView/TypeCollection.h"
19 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
20 
21 #include "llvm/Support/FormatProviders.h"
22 
23 #include <type_traits>
24 
25 namespace llvm {
26 class raw_ostream;
27 namespace codeview {
28 
29 /// A locally hashed type represents a straightforward hash code of a serialized
30 /// record.  The record is simply serialized, and then the bytes are hashed by
31 /// a standard algorithm.  This is sufficient for the case of de-duplicating
32 /// records within a single sequence of types, because if two records both have
33 /// a back-reference to the same type in the same stream, they will both have
34 /// the same numeric value for the TypeIndex of the back reference.
35 struct LocallyHashedType {
36   hash_code Hash;
37   ArrayRef<uint8_t> RecordData;
38 
39   /// Given a type, compute its local hash.
40   LLVM_ABI static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
41 
42   /// Given a sequence of types, compute all of the local hashes.
43   template <typename Range>
hashTypesLocallyHashedType44   static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
45     std::vector<LocallyHashedType> Hashes;
46     Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
47     for (const auto &R : Records)
48       Hashes.push_back(hashType(R));
49 
50     return Hashes;
51   }
52 
53   static std::vector<LocallyHashedType>
hashTypeCollectionLocallyHashedType54   hashTypeCollection(TypeCollection &Types) {
55     std::vector<LocallyHashedType> Hashes;
56     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
57       Hashes.push_back(hashType(Type.RecordData));
58     });
59     return Hashes;
60   }
61 };
62 
63 enum class GlobalTypeHashAlg : uint16_t {
64   SHA1 = 0, // standard 20-byte SHA1 hash
65   SHA1_8,   // last 8-bytes of standard SHA1 hash
66   BLAKE3,   // truncated 8-bytes BLAKE3
67 };
68 
69 /// A globally hashed type represents a hash value that is sufficient to
70 /// uniquely identify a record across multiple type streams or type sequences.
71 /// This works by, for any given record A which references B, replacing the
72 /// TypeIndex that refers to B with a previously-computed global hash for B.  As
73 /// this is a recursive algorithm (e.g. the global hash of B also depends on the
74 /// global hashes of the types that B refers to), a global hash can uniquely
75 /// identify that A occurs in another stream that has a completely
76 /// different graph structure.  Although the hash itself is slower to compute,
77 /// probing is much faster with a globally hashed type, because the hash itself
78 /// is considered "as good as" the original type.  Since type records can be
79 /// quite large, this makes the equality comparison of the hash much faster than
80 /// equality comparison of a full record.
81 struct GloballyHashedType {
82   GloballyHashedType() = default;
GloballyHashedTypeGloballyHashedType83   GloballyHashedType(StringRef H)
84       : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
GloballyHashedTypeGloballyHashedType85   GloballyHashedType(ArrayRef<uint8_t> H) {
86     assert(H.size() == 8);
87     ::memcpy(Hash.data(), H.data(), 8);
88   }
89   std::array<uint8_t, 8> Hash;
90 
emptyGloballyHashedType91   bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
92 
93   friend inline bool operator==(const GloballyHashedType &L,
94                                 const GloballyHashedType &R) {
95     return L.Hash == R.Hash;
96   }
97 
98   friend inline bool operator!=(const GloballyHashedType &L,
99                                 const GloballyHashedType &R) {
100     return !(L.Hash == R.Hash);
101   }
102 
103   /// Given a sequence of bytes representing a record, compute a global hash for
104   /// this record.  Due to the nature of global hashes incorporating the hashes
105   /// of referenced records, this function requires a list of types and ids
106   /// that RecordData might reference, indexable by TypeIndex.
107   LLVM_ABI static GloballyHashedType
108   hashType(ArrayRef<uint8_t> RecordData,
109            ArrayRef<GloballyHashedType> PreviousTypes,
110            ArrayRef<GloballyHashedType> PreviousIds);
111 
112   /// Given a sequence of bytes representing a record, compute a global hash for
113   /// this record.  Due to the nature of global hashes incorporating the hashes
114   /// of referenced records, this function requires a list of types and ids
115   /// that RecordData might reference, indexable by TypeIndex.
hashTypeGloballyHashedType116   static GloballyHashedType hashType(CVType Type,
117                                      ArrayRef<GloballyHashedType> PreviousTypes,
118                                      ArrayRef<GloballyHashedType> PreviousIds) {
119     return hashType(Type.RecordData, PreviousTypes, PreviousIds);
120   }
121 
122   /// Given a sequence of combined type and ID records, compute global hashes
123   /// for each of them, returning the results in a vector of hashed types.
124   template <typename Range>
hashTypesGloballyHashedType125   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
126     std::vector<GloballyHashedType> Hashes;
127     bool UnresolvedRecords = false;
128     for (const auto &R : Records) {
129       GloballyHashedType H = hashType(R, Hashes, Hashes);
130       if (H.empty())
131         UnresolvedRecords = true;
132       Hashes.push_back(H);
133     }
134 
135     // In some rare cases, there might be records with forward references in the
136     // stream. Several passes might be needed to fully hash each record in the
137     // Type stream. However this occurs on very small OBJs generated by MASM,
138     // with a dozen records at most. Therefore this codepath isn't
139     // time-critical, as it isn't taken in 99% of cases.
140     while (UnresolvedRecords) {
141       UnresolvedRecords = false;
142       auto HashIt = Hashes.begin();
143       for (const auto &R : Records) {
144         if (HashIt->empty()) {
145           GloballyHashedType H = hashType(R, Hashes, Hashes);
146           if (H.empty())
147             UnresolvedRecords = true;
148           else
149             *HashIt = H;
150         }
151         ++HashIt;
152       }
153     }
154 
155     return Hashes;
156   }
157 
158   /// Given a sequence of combined type and ID records, compute global hashes
159   /// for each of them, returning the results in a vector of hashed types.
160   template <typename Range>
161   static std::vector<GloballyHashedType>
hashIdsGloballyHashedType162   hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
163     std::vector<GloballyHashedType> IdHashes;
164     for (const auto &R : Records)
165       IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
166 
167     return IdHashes;
168   }
169 
170   static std::vector<GloballyHashedType>
hashTypeCollectionGloballyHashedType171   hashTypeCollection(TypeCollection &Types) {
172     std::vector<GloballyHashedType> Hashes;
173     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
174       Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
175     });
176     return Hashes;
177   }
178 };
179 static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
180               "GloballyHashedType must be trivially copyable so that we can "
181               "reinterpret_cast arrays of hash data to arrays of "
182               "GloballyHashedType");
183 } // namespace codeview
184 
185 template <> struct DenseMapInfo<codeview::LocallyHashedType> {
186   LLVM_ABI static codeview::LocallyHashedType Empty;
187   LLVM_ABI static codeview::LocallyHashedType Tombstone;
188 
189   static codeview::LocallyHashedType getEmptyKey() { return Empty; }
190 
191   static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
192 
193   static unsigned getHashValue(codeview::LocallyHashedType Val) {
194     return Val.Hash;
195   }
196 
197   static bool isEqual(codeview::LocallyHashedType LHS,
198                       codeview::LocallyHashedType RHS) {
199     if (LHS.Hash != RHS.Hash)
200       return false;
201     return LHS.RecordData == RHS.RecordData;
202   }
203 };
204 
205 template <> struct DenseMapInfo<codeview::GloballyHashedType> {
206   LLVM_ABI static codeview::GloballyHashedType Empty;
207   LLVM_ABI static codeview::GloballyHashedType Tombstone;
208 
209   static codeview::GloballyHashedType getEmptyKey() { return Empty; }
210 
211   static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
212 
213   static unsigned getHashValue(codeview::GloballyHashedType Val) {
214     return *reinterpret_cast<const unsigned *>(Val.Hash.data());
215   }
216 
217   static bool isEqual(codeview::GloballyHashedType LHS,
218                       codeview::GloballyHashedType RHS) {
219     return LHS == RHS;
220   }
221 };
222 
223 template <> struct format_provider<codeview::LocallyHashedType> {
224 public:
225   static void format(const codeview::LocallyHashedType &V,
226                      llvm::raw_ostream &Stream, StringRef Style) {
227     write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
228   }
229 };
230 
231 template <> struct format_provider<codeview::GloballyHashedType> {
232 public:
233   static void format(const codeview::GloballyHashedType &V,
234                      llvm::raw_ostream &Stream, StringRef Style) {
235     for (uint8_t B : V.Hash) {
236       write_hex(Stream, B, HexPrintStyle::Upper, 2);
237     }
238   }
239 };
240 
241 } // namespace llvm
242 
243 #endif
244