1 //===- TypeHashing.h ---------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 10 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/Hashing.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/Compiler.h" 16 17 #include "llvm/DebugInfo/CodeView/CVRecord.h" 18 #include "llvm/DebugInfo/CodeView/TypeCollection.h" 19 #include "llvm/DebugInfo/CodeView/TypeIndex.h" 20 21 #include "llvm/Support/FormatProviders.h" 22 23 #include <type_traits> 24 25 namespace llvm { 26 class raw_ostream; 27 namespace codeview { 28 29 /// A locally hashed type represents a straightforward hash code of a serialized 30 /// record. The record is simply serialized, and then the bytes are hashed by 31 /// a standard algorithm. This is sufficient for the case of de-duplicating 32 /// records within a single sequence of types, because if two records both have 33 /// a back-reference to the same type in the same stream, they will both have 34 /// the same numeric value for the TypeIndex of the back reference. 35 struct LocallyHashedType { 36 hash_code Hash; 37 ArrayRef<uint8_t> RecordData; 38 39 /// Given a type, compute its local hash. 40 LLVM_ABI static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); 41 42 /// Given a sequence of types, compute all of the local hashes. 43 template <typename Range> hashTypesLocallyHashedType44 static std::vector<LocallyHashedType> hashTypes(Range &&Records) { 45 std::vector<LocallyHashedType> Hashes; 46 Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); 47 for (const auto &R : Records) 48 Hashes.push_back(hashType(R)); 49 50 return Hashes; 51 } 52 53 static std::vector<LocallyHashedType> hashTypeCollectionLocallyHashedType54 hashTypeCollection(TypeCollection &Types) { 55 std::vector<LocallyHashedType> Hashes; 56 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 57 Hashes.push_back(hashType(Type.RecordData)); 58 }); 59 return Hashes; 60 } 61 }; 62 63 enum class GlobalTypeHashAlg : uint16_t { 64 SHA1 = 0, // standard 20-byte SHA1 hash 65 SHA1_8, // last 8-bytes of standard SHA1 hash 66 BLAKE3, // truncated 8-bytes BLAKE3 67 }; 68 69 /// A globally hashed type represents a hash value that is sufficient to 70 /// uniquely identify a record across multiple type streams or type sequences. 71 /// This works by, for any given record A which references B, replacing the 72 /// TypeIndex that refers to B with a previously-computed global hash for B. As 73 /// this is a recursive algorithm (e.g. the global hash of B also depends on the 74 /// global hashes of the types that B refers to), a global hash can uniquely 75 /// identify that A occurs in another stream that has a completely 76 /// different graph structure. Although the hash itself is slower to compute, 77 /// probing is much faster with a globally hashed type, because the hash itself 78 /// is considered "as good as" the original type. Since type records can be 79 /// quite large, this makes the equality comparison of the hash much faster than 80 /// equality comparison of a full record. 81 struct GloballyHashedType { 82 GloballyHashedType() = default; GloballyHashedTypeGloballyHashedType83 GloballyHashedType(StringRef H) 84 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} GloballyHashedTypeGloballyHashedType85 GloballyHashedType(ArrayRef<uint8_t> H) { 86 assert(H.size() == 8); 87 ::memcpy(Hash.data(), H.data(), 8); 88 } 89 std::array<uint8_t, 8> Hash; 90 emptyGloballyHashedType91 bool empty() const { return *(const uint64_t*)Hash.data() == 0; } 92 93 friend inline bool operator==(const GloballyHashedType &L, 94 const GloballyHashedType &R) { 95 return L.Hash == R.Hash; 96 } 97 98 friend inline bool operator!=(const GloballyHashedType &L, 99 const GloballyHashedType &R) { 100 return !(L.Hash == R.Hash); 101 } 102 103 /// Given a sequence of bytes representing a record, compute a global hash for 104 /// this record. Due to the nature of global hashes incorporating the hashes 105 /// of referenced records, this function requires a list of types and ids 106 /// that RecordData might reference, indexable by TypeIndex. 107 LLVM_ABI static GloballyHashedType 108 hashType(ArrayRef<uint8_t> RecordData, 109 ArrayRef<GloballyHashedType> PreviousTypes, 110 ArrayRef<GloballyHashedType> PreviousIds); 111 112 /// Given a sequence of bytes representing a record, compute a global hash for 113 /// this record. Due to the nature of global hashes incorporating the hashes 114 /// of referenced records, this function requires a list of types and ids 115 /// that RecordData might reference, indexable by TypeIndex. hashTypeGloballyHashedType116 static GloballyHashedType hashType(CVType Type, 117 ArrayRef<GloballyHashedType> PreviousTypes, 118 ArrayRef<GloballyHashedType> PreviousIds) { 119 return hashType(Type.RecordData, PreviousTypes, PreviousIds); 120 } 121 122 /// Given a sequence of combined type and ID records, compute global hashes 123 /// for each of them, returning the results in a vector of hashed types. 124 template <typename Range> hashTypesGloballyHashedType125 static std::vector<GloballyHashedType> hashTypes(Range &&Records) { 126 std::vector<GloballyHashedType> Hashes; 127 bool UnresolvedRecords = false; 128 for (const auto &R : Records) { 129 GloballyHashedType H = hashType(R, Hashes, Hashes); 130 if (H.empty()) 131 UnresolvedRecords = true; 132 Hashes.push_back(H); 133 } 134 135 // In some rare cases, there might be records with forward references in the 136 // stream. Several passes might be needed to fully hash each record in the 137 // Type stream. However this occurs on very small OBJs generated by MASM, 138 // with a dozen records at most. Therefore this codepath isn't 139 // time-critical, as it isn't taken in 99% of cases. 140 while (UnresolvedRecords) { 141 UnresolvedRecords = false; 142 auto HashIt = Hashes.begin(); 143 for (const auto &R : Records) { 144 if (HashIt->empty()) { 145 GloballyHashedType H = hashType(R, Hashes, Hashes); 146 if (H.empty()) 147 UnresolvedRecords = true; 148 else 149 *HashIt = H; 150 } 151 ++HashIt; 152 } 153 } 154 155 return Hashes; 156 } 157 158 /// Given a sequence of combined type and ID records, compute global hashes 159 /// for each of them, returning the results in a vector of hashed types. 160 template <typename Range> 161 static std::vector<GloballyHashedType> hashIdsGloballyHashedType162 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { 163 std::vector<GloballyHashedType> IdHashes; 164 for (const auto &R : Records) 165 IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); 166 167 return IdHashes; 168 } 169 170 static std::vector<GloballyHashedType> hashTypeCollectionGloballyHashedType171 hashTypeCollection(TypeCollection &Types) { 172 std::vector<GloballyHashedType> Hashes; 173 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { 174 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); 175 }); 176 return Hashes; 177 } 178 }; 179 static_assert(std::is_trivially_copyable<GloballyHashedType>::value, 180 "GloballyHashedType must be trivially copyable so that we can " 181 "reinterpret_cast arrays of hash data to arrays of " 182 "GloballyHashedType"); 183 } // namespace codeview 184 185 template <> struct DenseMapInfo<codeview::LocallyHashedType> { 186 LLVM_ABI static codeview::LocallyHashedType Empty; 187 LLVM_ABI static codeview::LocallyHashedType Tombstone; 188 189 static codeview::LocallyHashedType getEmptyKey() { return Empty; } 190 191 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } 192 193 static unsigned getHashValue(codeview::LocallyHashedType Val) { 194 return Val.Hash; 195 } 196 197 static bool isEqual(codeview::LocallyHashedType LHS, 198 codeview::LocallyHashedType RHS) { 199 if (LHS.Hash != RHS.Hash) 200 return false; 201 return LHS.RecordData == RHS.RecordData; 202 } 203 }; 204 205 template <> struct DenseMapInfo<codeview::GloballyHashedType> { 206 LLVM_ABI static codeview::GloballyHashedType Empty; 207 LLVM_ABI static codeview::GloballyHashedType Tombstone; 208 209 static codeview::GloballyHashedType getEmptyKey() { return Empty; } 210 211 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } 212 213 static unsigned getHashValue(codeview::GloballyHashedType Val) { 214 return *reinterpret_cast<const unsigned *>(Val.Hash.data()); 215 } 216 217 static bool isEqual(codeview::GloballyHashedType LHS, 218 codeview::GloballyHashedType RHS) { 219 return LHS == RHS; 220 } 221 }; 222 223 template <> struct format_provider<codeview::LocallyHashedType> { 224 public: 225 static void format(const codeview::LocallyHashedType &V, 226 llvm::raw_ostream &Stream, StringRef Style) { 227 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); 228 } 229 }; 230 231 template <> struct format_provider<codeview::GloballyHashedType> { 232 public: 233 static void format(const codeview::GloballyHashedType &V, 234 llvm::raw_ostream &Stream, StringRef Style) { 235 for (uint8_t B : V.Hash) { 236 write_hex(Stream, B, HexPrintStyle::Upper, 2); 237 } 238 } 239 }; 240 241 } // namespace llvm 242 243 #endif 244