1 //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/StructuralHash.h" 10 #include "llvm/ADT/Hashing.h" 11 #include "llvm/IR/Function.h" 12 #include "llvm/IR/GlobalVariable.h" 13 #include "llvm/IR/InstrTypes.h" 14 #include "llvm/IR/Instructions.h" 15 #include "llvm/IR/IntrinsicInst.h" 16 #include "llvm/IR/Module.h" 17 18 using namespace llvm; 19 20 namespace { 21 22 // Basic hashing mechanism to detect structural change to the IR, used to verify 23 // pass return status consistency with actual change. In addition to being used 24 // by the MergeFunctions pass. 25 26 class StructuralHashImpl { 27 uint64_t Hash; 28 29 void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } 30 31 // This will produce different values on 32-bit and 64-bit systens as 32 // hash_combine returns a size_t. However, this is only used for 33 // detailed hashing which, in-tree, only needs to distinguish between 34 // differences in functions. 35 template <typename T> void hashArbitaryType(const T &V) { 36 hash(hash_combine(V)); 37 } 38 39 void hashType(Type *ValueType) { 40 hash(ValueType->getTypeID()); 41 if (ValueType->isIntegerTy()) 42 hash(ValueType->getIntegerBitWidth()); 43 } 44 45 public: 46 StructuralHashImpl() : Hash(4) {} 47 48 void updateOperand(Value *Operand) { 49 hashType(Operand->getType()); 50 51 // The cases enumerated below are not exhaustive and are only aimed to 52 // get decent coverage over the function. 53 if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) { 54 hashArbitaryType(ConstInt->getValue()); 55 } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) { 56 hashArbitaryType(ConstFP->getValue()); 57 } else if (Argument *Arg = dyn_cast<Argument>(Operand)) { 58 hash(Arg->getArgNo()); 59 } else if (Function *Func = dyn_cast<Function>(Operand)) { 60 // Hashing the name will be deterministic as LLVM's hashing infrastructure 61 // has explicit support for hashing strings and will not simply hash 62 // the pointer. 63 hashArbitaryType(Func->getName()); 64 } 65 } 66 67 void updateInstruction(const Instruction &Inst, bool DetailedHash) { 68 hash(Inst.getOpcode()); 69 70 if (!DetailedHash) 71 return; 72 73 hashType(Inst.getType()); 74 75 // Handle additional properties of specific instructions that cause 76 // semantic differences in the IR. 77 if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) 78 hash(ComparisonInstruction->getPredicate()); 79 80 for (const auto &Op : Inst.operands()) 81 updateOperand(Op); 82 } 83 84 // A function hash is calculated by considering only the number of arguments 85 // and whether a function is varargs, the order of basic blocks (given by the 86 // successors of each basic block in depth first order), and the order of 87 // opcodes of each instruction within each of these basic blocks. This mirrors 88 // the strategy FunctionComparator::compare() uses to compare functions by 89 // walking the BBs in depth first order and comparing each instruction in 90 // sequence. Because this hash currently does not look at the operands, it is 91 // insensitive to things such as the target of calls and the constants used in 92 // the function, which makes it useful when possibly merging functions which 93 // are the same modulo constants and call targets. 94 // 95 // Note that different users of StructuralHash will want different behavior 96 // out of it (i.e., MergeFunctions will want something different from PM 97 // expensive checks for pass modification status). When modifying this 98 // function, most changes should be gated behind an option and enabled 99 // selectively. 100 void update(const Function &F, bool DetailedHash) { 101 // Declarations don't affect analyses. 102 if (F.isDeclaration()) 103 return; 104 105 hash(0x62642d6b6b2d6b72); // Function header 106 107 hash(F.isVarArg()); 108 hash(F.arg_size()); 109 110 SmallVector<const BasicBlock *, 8> BBs; 111 SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 112 113 // Walk the blocks in the same order as 114 // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the 115 // function "structure." (BB and opcode sequence) 116 BBs.push_back(&F.getEntryBlock()); 117 VisitedBBs.insert(BBs[0]); 118 while (!BBs.empty()) { 119 const BasicBlock *BB = BBs.pop_back_val(); 120 121 // This random value acts as a block header, as otherwise the partition of 122 // opcodes into BBs wouldn't affect the hash, only the order of the 123 // opcodes 124 hash(45798); 125 for (auto &Inst : *BB) 126 updateInstruction(Inst, DetailedHash); 127 128 for (const BasicBlock *Succ : successors(BB)) 129 if (VisitedBBs.insert(Succ).second) 130 BBs.push_back(Succ); 131 } 132 } 133 134 void update(const GlobalVariable &GV) { 135 // Declarations and used/compiler.used don't affect analyses. 136 // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, 137 // we ignore anything with the `.llvm` prefix 138 if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) 139 return; 140 hash(23456); // Global header 141 hash(GV.getValueType()->getTypeID()); 142 } 143 144 void update(const Module &M, bool DetailedHash) { 145 for (const GlobalVariable &GV : M.globals()) 146 update(GV); 147 for (const Function &F : M) 148 update(F, DetailedHash); 149 } 150 151 uint64_t getHash() const { return Hash; } 152 }; 153 154 } // namespace 155 156 IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { 157 StructuralHashImpl H; 158 H.update(F, DetailedHash); 159 return H.getHash(); 160 } 161 162 IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { 163 StructuralHashImpl H; 164 H.update(M, DetailedHash); 165 return H.getHash(); 166 } 167