106c3fb27SDimitry Andric //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric 9e8d8bef9SDimitry Andric #include "llvm/IR/StructuralHash.h" 10*5f757f3fSDimitry Andric #include "llvm/ADT/Hashing.h" 11e8d8bef9SDimitry Andric #include "llvm/IR/Function.h" 1206c3fb27SDimitry Andric #include "llvm/IR/GlobalVariable.h" 13*5f757f3fSDimitry Andric #include "llvm/IR/InstrTypes.h" 14*5f757f3fSDimitry Andric #include "llvm/IR/Instructions.h" 15*5f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 16e8d8bef9SDimitry Andric #include "llvm/IR/Module.h" 17e8d8bef9SDimitry Andric 18e8d8bef9SDimitry Andric using namespace llvm; 19e8d8bef9SDimitry Andric 2006c3fb27SDimitry Andric namespace { 21e8d8bef9SDimitry Andric 22e8d8bef9SDimitry Andric // Basic hashing mechanism to detect structural change to the IR, used to verify 23*5f757f3fSDimitry Andric // pass return status consistency with actual change. In addition to being used 24*5f757f3fSDimitry Andric // by the MergeFunctions pass. 25e8d8bef9SDimitry Andric 2606c3fb27SDimitry Andric class StructuralHashImpl { 27*5f757f3fSDimitry Andric uint64_t Hash; 28e8d8bef9SDimitry Andric 29*5f757f3fSDimitry Andric void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } 30*5f757f3fSDimitry Andric 31*5f757f3fSDimitry Andric // This will produce different values on 32-bit and 64-bit systens as 32*5f757f3fSDimitry Andric // hash_combine returns a size_t. However, this is only used for 33*5f757f3fSDimitry Andric // detailed hashing which, in-tree, only needs to distinguish between 34*5f757f3fSDimitry Andric // differences in functions. 35*5f757f3fSDimitry Andric template <typename T> void hashArbitaryType(const T &V) { 36*5f757f3fSDimitry Andric hash(hash_combine(V)); 37*5f757f3fSDimitry Andric } 38*5f757f3fSDimitry Andric 39*5f757f3fSDimitry Andric void hashType(Type *ValueType) { 40*5f757f3fSDimitry Andric hash(ValueType->getTypeID()); 41*5f757f3fSDimitry Andric if (ValueType->isIntegerTy()) 42*5f757f3fSDimitry Andric hash(ValueType->getIntegerBitWidth()); 43*5f757f3fSDimitry Andric } 44e8d8bef9SDimitry Andric 45e8d8bef9SDimitry Andric public: 4606c3fb27SDimitry Andric StructuralHashImpl() : Hash(4) {} 47e8d8bef9SDimitry Andric 48*5f757f3fSDimitry Andric void updateOperand(Value *Operand) { 49*5f757f3fSDimitry Andric hashType(Operand->getType()); 50*5f757f3fSDimitry Andric 51*5f757f3fSDimitry Andric // The cases enumerated below are not exhaustive and are only aimed to 52*5f757f3fSDimitry Andric // get decent coverage over the function. 53*5f757f3fSDimitry Andric if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) { 54*5f757f3fSDimitry Andric hashArbitaryType(ConstInt->getValue()); 55*5f757f3fSDimitry Andric } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) { 56*5f757f3fSDimitry Andric hashArbitaryType(ConstFP->getValue()); 57*5f757f3fSDimitry Andric } else if (Argument *Arg = dyn_cast<Argument>(Operand)) { 58*5f757f3fSDimitry Andric hash(Arg->getArgNo()); 59*5f757f3fSDimitry Andric } else if (Function *Func = dyn_cast<Function>(Operand)) { 60*5f757f3fSDimitry Andric // Hashing the name will be deterministic as LLVM's hashing infrastructure 61*5f757f3fSDimitry Andric // has explicit support for hashing strings and will not simply hash 62*5f757f3fSDimitry Andric // the pointer. 63*5f757f3fSDimitry Andric hashArbitaryType(Func->getName()); 64*5f757f3fSDimitry Andric } 65*5f757f3fSDimitry Andric } 66*5f757f3fSDimitry Andric 67*5f757f3fSDimitry Andric void updateInstruction(const Instruction &Inst, bool DetailedHash) { 68*5f757f3fSDimitry Andric hash(Inst.getOpcode()); 69*5f757f3fSDimitry Andric 70*5f757f3fSDimitry Andric if (!DetailedHash) 71*5f757f3fSDimitry Andric return; 72*5f757f3fSDimitry Andric 73*5f757f3fSDimitry Andric hashType(Inst.getType()); 74*5f757f3fSDimitry Andric 75*5f757f3fSDimitry Andric // Handle additional properties of specific instructions that cause 76*5f757f3fSDimitry Andric // semantic differences in the IR. 77*5f757f3fSDimitry Andric if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) 78*5f757f3fSDimitry Andric hash(ComparisonInstruction->getPredicate()); 79*5f757f3fSDimitry Andric 80*5f757f3fSDimitry Andric for (const auto &Op : Inst.operands()) 81*5f757f3fSDimitry Andric updateOperand(Op); 82*5f757f3fSDimitry Andric } 83*5f757f3fSDimitry Andric 84*5f757f3fSDimitry Andric // A function hash is calculated by considering only the number of arguments 85*5f757f3fSDimitry Andric // and whether a function is varargs, the order of basic blocks (given by the 86*5f757f3fSDimitry Andric // successors of each basic block in depth first order), and the order of 87*5f757f3fSDimitry Andric // opcodes of each instruction within each of these basic blocks. This mirrors 88*5f757f3fSDimitry Andric // the strategy FunctionComparator::compare() uses to compare functions by 89*5f757f3fSDimitry Andric // walking the BBs in depth first order and comparing each instruction in 90*5f757f3fSDimitry Andric // sequence. Because this hash currently does not look at the operands, it is 91*5f757f3fSDimitry Andric // insensitive to things such as the target of calls and the constants used in 92*5f757f3fSDimitry Andric // the function, which makes it useful when possibly merging functions which 93*5f757f3fSDimitry Andric // are the same modulo constants and call targets. 94*5f757f3fSDimitry Andric // 95*5f757f3fSDimitry Andric // Note that different users of StructuralHash will want different behavior 96*5f757f3fSDimitry Andric // out of it (i.e., MergeFunctions will want something different from PM 97*5f757f3fSDimitry Andric // expensive checks for pass modification status). When modifying this 98*5f757f3fSDimitry Andric // function, most changes should be gated behind an option and enabled 99*5f757f3fSDimitry Andric // selectively. 100*5f757f3fSDimitry Andric void update(const Function &F, bool DetailedHash) { 10106c3fb27SDimitry Andric // Declarations don't affect analyses. 10206c3fb27SDimitry Andric if (F.isDeclaration()) 103e8d8bef9SDimitry Andric return; 104e8d8bef9SDimitry Andric 105*5f757f3fSDimitry Andric hash(0x62642d6b6b2d6b72); // Function header 10606c3fb27SDimitry Andric 10706c3fb27SDimitry Andric hash(F.isVarArg()); 10806c3fb27SDimitry Andric hash(F.arg_size()); 109e8d8bef9SDimitry Andric 110e8d8bef9SDimitry Andric SmallVector<const BasicBlock *, 8> BBs; 111e8d8bef9SDimitry Andric SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 112e8d8bef9SDimitry Andric 113*5f757f3fSDimitry Andric // Walk the blocks in the same order as 114*5f757f3fSDimitry Andric // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the 115*5f757f3fSDimitry Andric // function "structure." (BB and opcode sequence) 116e8d8bef9SDimitry Andric BBs.push_back(&F.getEntryBlock()); 117e8d8bef9SDimitry Andric VisitedBBs.insert(BBs[0]); 118e8d8bef9SDimitry Andric while (!BBs.empty()) { 119e8d8bef9SDimitry Andric const BasicBlock *BB = BBs.pop_back_val(); 120*5f757f3fSDimitry Andric 121*5f757f3fSDimitry Andric // This random value acts as a block header, as otherwise the partition of 122*5f757f3fSDimitry Andric // opcodes into BBs wouldn't affect the hash, only the order of the 123*5f757f3fSDimitry Andric // opcodes 124*5f757f3fSDimitry Andric hash(45798); 125e8d8bef9SDimitry Andric for (auto &Inst : *BB) 126*5f757f3fSDimitry Andric updateInstruction(Inst, DetailedHash); 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric const Instruction *Term = BB->getTerminator(); 129e8d8bef9SDimitry Andric for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 130e8d8bef9SDimitry Andric if (!VisitedBBs.insert(Term->getSuccessor(i)).second) 131e8d8bef9SDimitry Andric continue; 132e8d8bef9SDimitry Andric BBs.push_back(Term->getSuccessor(i)); 133e8d8bef9SDimitry Andric } 134e8d8bef9SDimitry Andric } 135e8d8bef9SDimitry Andric } 136e8d8bef9SDimitry Andric 13706c3fb27SDimitry Andric void update(const GlobalVariable &GV) { 13806c3fb27SDimitry Andric // Declarations and used/compiler.used don't affect analyses. 13906c3fb27SDimitry Andric // Since there are several `llvm.*` metadata, like `llvm.embedded.object`, 14006c3fb27SDimitry Andric // we ignore anything with the `.llvm` prefix 14106c3fb27SDimitry Andric if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) 14206c3fb27SDimitry Andric return; 14306c3fb27SDimitry Andric hash(23456); // Global header 14406c3fb27SDimitry Andric hash(GV.getValueType()->getTypeID()); 14506c3fb27SDimitry Andric } 14606c3fb27SDimitry Andric 147*5f757f3fSDimitry Andric void update(const Module &M, bool DetailedHash) { 14806c3fb27SDimitry Andric for (const GlobalVariable &GV : M.globals()) 14906c3fb27SDimitry Andric update(GV); 150e8d8bef9SDimitry Andric for (const Function &F : M) 151*5f757f3fSDimitry Andric update(F, DetailedHash); 152e8d8bef9SDimitry Andric } 153e8d8bef9SDimitry Andric 154e8d8bef9SDimitry Andric uint64_t getHash() const { return Hash; } 155e8d8bef9SDimitry Andric }; 156e8d8bef9SDimitry Andric 15706c3fb27SDimitry Andric } // namespace 158e8d8bef9SDimitry Andric 159*5f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { 16006c3fb27SDimitry Andric StructuralHashImpl H; 161*5f757f3fSDimitry Andric H.update(F, DetailedHash); 162e8d8bef9SDimitry Andric return H.getHash(); 163e8d8bef9SDimitry Andric } 164e8d8bef9SDimitry Andric 165*5f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { 16606c3fb27SDimitry Andric StructuralHashImpl H; 167*5f757f3fSDimitry Andric H.update(M, DetailedHash); 168e8d8bef9SDimitry Andric return H.getHash(); 169e8d8bef9SDimitry Andric } 170