xref: /freebsd/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
106c3fb27SDimitry Andric //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric 
9e8d8bef9SDimitry Andric #include "llvm/IR/StructuralHash.h"
10*5f757f3fSDimitry Andric #include "llvm/ADT/Hashing.h"
11e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
1206c3fb27SDimitry Andric #include "llvm/IR/GlobalVariable.h"
13*5f757f3fSDimitry Andric #include "llvm/IR/InstrTypes.h"
14*5f757f3fSDimitry Andric #include "llvm/IR/Instructions.h"
15*5f757f3fSDimitry Andric #include "llvm/IR/IntrinsicInst.h"
16e8d8bef9SDimitry Andric #include "llvm/IR/Module.h"
17e8d8bef9SDimitry Andric 
18e8d8bef9SDimitry Andric using namespace llvm;
19e8d8bef9SDimitry Andric 
2006c3fb27SDimitry Andric namespace {
21e8d8bef9SDimitry Andric 
22e8d8bef9SDimitry Andric // Basic hashing mechanism to detect structural change to the IR, used to verify
23*5f757f3fSDimitry Andric // pass return status consistency with actual change. In addition to being used
24*5f757f3fSDimitry Andric // by the MergeFunctions pass.
25e8d8bef9SDimitry Andric 
2606c3fb27SDimitry Andric class StructuralHashImpl {
27*5f757f3fSDimitry Andric   uint64_t Hash;
28e8d8bef9SDimitry Andric 
29*5f757f3fSDimitry Andric   void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
30*5f757f3fSDimitry Andric 
31*5f757f3fSDimitry Andric   // This will produce different values on 32-bit and 64-bit systens as
32*5f757f3fSDimitry Andric   // hash_combine returns a size_t. However, this is only used for
33*5f757f3fSDimitry Andric   // detailed hashing which, in-tree, only needs to distinguish between
34*5f757f3fSDimitry Andric   // differences in functions.
35*5f757f3fSDimitry Andric   template <typename T> void hashArbitaryType(const T &V) {
36*5f757f3fSDimitry Andric     hash(hash_combine(V));
37*5f757f3fSDimitry Andric   }
38*5f757f3fSDimitry Andric 
39*5f757f3fSDimitry Andric   void hashType(Type *ValueType) {
40*5f757f3fSDimitry Andric     hash(ValueType->getTypeID());
41*5f757f3fSDimitry Andric     if (ValueType->isIntegerTy())
42*5f757f3fSDimitry Andric       hash(ValueType->getIntegerBitWidth());
43*5f757f3fSDimitry Andric   }
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric public:
4606c3fb27SDimitry Andric   StructuralHashImpl() : Hash(4) {}
47e8d8bef9SDimitry Andric 
48*5f757f3fSDimitry Andric   void updateOperand(Value *Operand) {
49*5f757f3fSDimitry Andric     hashType(Operand->getType());
50*5f757f3fSDimitry Andric 
51*5f757f3fSDimitry Andric     // The cases enumerated below are not exhaustive and are only aimed to
52*5f757f3fSDimitry Andric     // get decent coverage over the function.
53*5f757f3fSDimitry Andric     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54*5f757f3fSDimitry Andric       hashArbitaryType(ConstInt->getValue());
55*5f757f3fSDimitry Andric     } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56*5f757f3fSDimitry Andric       hashArbitaryType(ConstFP->getValue());
57*5f757f3fSDimitry Andric     } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58*5f757f3fSDimitry Andric       hash(Arg->getArgNo());
59*5f757f3fSDimitry Andric     } else if (Function *Func = dyn_cast<Function>(Operand)) {
60*5f757f3fSDimitry Andric       // Hashing the name will be deterministic as LLVM's hashing infrastructure
61*5f757f3fSDimitry Andric       // has explicit support for hashing strings and will not simply hash
62*5f757f3fSDimitry Andric       // the pointer.
63*5f757f3fSDimitry Andric       hashArbitaryType(Func->getName());
64*5f757f3fSDimitry Andric     }
65*5f757f3fSDimitry Andric   }
66*5f757f3fSDimitry Andric 
67*5f757f3fSDimitry Andric   void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68*5f757f3fSDimitry Andric     hash(Inst.getOpcode());
69*5f757f3fSDimitry Andric 
70*5f757f3fSDimitry Andric     if (!DetailedHash)
71*5f757f3fSDimitry Andric       return;
72*5f757f3fSDimitry Andric 
73*5f757f3fSDimitry Andric     hashType(Inst.getType());
74*5f757f3fSDimitry Andric 
75*5f757f3fSDimitry Andric     // Handle additional properties of specific instructions that cause
76*5f757f3fSDimitry Andric     // semantic differences in the IR.
77*5f757f3fSDimitry Andric     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78*5f757f3fSDimitry Andric       hash(ComparisonInstruction->getPredicate());
79*5f757f3fSDimitry Andric 
80*5f757f3fSDimitry Andric     for (const auto &Op : Inst.operands())
81*5f757f3fSDimitry Andric       updateOperand(Op);
82*5f757f3fSDimitry Andric   }
83*5f757f3fSDimitry Andric 
84*5f757f3fSDimitry Andric   // A function hash is calculated by considering only the number of arguments
85*5f757f3fSDimitry Andric   // and whether a function is varargs, the order of basic blocks (given by the
86*5f757f3fSDimitry Andric   // successors of each basic block in depth first order), and the order of
87*5f757f3fSDimitry Andric   // opcodes of each instruction within each of these basic blocks. This mirrors
88*5f757f3fSDimitry Andric   // the strategy FunctionComparator::compare() uses to compare functions by
89*5f757f3fSDimitry Andric   // walking the BBs in depth first order and comparing each instruction in
90*5f757f3fSDimitry Andric   // sequence. Because this hash currently does not look at the operands, it is
91*5f757f3fSDimitry Andric   // insensitive to things such as the target of calls and the constants used in
92*5f757f3fSDimitry Andric   // the function, which makes it useful when possibly merging functions which
93*5f757f3fSDimitry Andric   // are the same modulo constants and call targets.
94*5f757f3fSDimitry Andric   //
95*5f757f3fSDimitry Andric   // Note that different users of StructuralHash will want different behavior
96*5f757f3fSDimitry Andric   // out of it (i.e., MergeFunctions will want something different from PM
97*5f757f3fSDimitry Andric   // expensive checks for pass modification status). When modifying this
98*5f757f3fSDimitry Andric   // function, most changes should be gated behind an option and enabled
99*5f757f3fSDimitry Andric   // selectively.
100*5f757f3fSDimitry Andric   void update(const Function &F, bool DetailedHash) {
10106c3fb27SDimitry Andric     // Declarations don't affect analyses.
10206c3fb27SDimitry Andric     if (F.isDeclaration())
103e8d8bef9SDimitry Andric       return;
104e8d8bef9SDimitry Andric 
105*5f757f3fSDimitry Andric     hash(0x62642d6b6b2d6b72); // Function header
10606c3fb27SDimitry Andric 
10706c3fb27SDimitry Andric     hash(F.isVarArg());
10806c3fb27SDimitry Andric     hash(F.arg_size());
109e8d8bef9SDimitry Andric 
110e8d8bef9SDimitry Andric     SmallVector<const BasicBlock *, 8> BBs;
111e8d8bef9SDimitry Andric     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
112e8d8bef9SDimitry Andric 
113*5f757f3fSDimitry Andric     // Walk the blocks in the same order as
114*5f757f3fSDimitry Andric     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115*5f757f3fSDimitry Andric     // function "structure." (BB and opcode sequence)
116e8d8bef9SDimitry Andric     BBs.push_back(&F.getEntryBlock());
117e8d8bef9SDimitry Andric     VisitedBBs.insert(BBs[0]);
118e8d8bef9SDimitry Andric     while (!BBs.empty()) {
119e8d8bef9SDimitry Andric       const BasicBlock *BB = BBs.pop_back_val();
120*5f757f3fSDimitry Andric 
121*5f757f3fSDimitry Andric       // This random value acts as a block header, as otherwise the partition of
122*5f757f3fSDimitry Andric       // opcodes into BBs wouldn't affect the hash, only the order of the
123*5f757f3fSDimitry Andric       // opcodes
124*5f757f3fSDimitry Andric       hash(45798);
125e8d8bef9SDimitry Andric       for (auto &Inst : *BB)
126*5f757f3fSDimitry Andric         updateInstruction(Inst, DetailedHash);
127e8d8bef9SDimitry Andric 
128e8d8bef9SDimitry Andric       const Instruction *Term = BB->getTerminator();
129e8d8bef9SDimitry Andric       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
130e8d8bef9SDimitry Andric         if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
131e8d8bef9SDimitry Andric           continue;
132e8d8bef9SDimitry Andric         BBs.push_back(Term->getSuccessor(i));
133e8d8bef9SDimitry Andric       }
134e8d8bef9SDimitry Andric     }
135e8d8bef9SDimitry Andric   }
136e8d8bef9SDimitry Andric 
13706c3fb27SDimitry Andric   void update(const GlobalVariable &GV) {
13806c3fb27SDimitry Andric     // Declarations and used/compiler.used don't affect analyses.
13906c3fb27SDimitry Andric     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
14006c3fb27SDimitry Andric     // we ignore anything with the `.llvm` prefix
14106c3fb27SDimitry Andric     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
14206c3fb27SDimitry Andric       return;
14306c3fb27SDimitry Andric     hash(23456); // Global header
14406c3fb27SDimitry Andric     hash(GV.getValueType()->getTypeID());
14506c3fb27SDimitry Andric   }
14606c3fb27SDimitry Andric 
147*5f757f3fSDimitry Andric   void update(const Module &M, bool DetailedHash) {
14806c3fb27SDimitry Andric     for (const GlobalVariable &GV : M.globals())
14906c3fb27SDimitry Andric       update(GV);
150e8d8bef9SDimitry Andric     for (const Function &F : M)
151*5f757f3fSDimitry Andric       update(F, DetailedHash);
152e8d8bef9SDimitry Andric   }
153e8d8bef9SDimitry Andric 
154e8d8bef9SDimitry Andric   uint64_t getHash() const { return Hash; }
155e8d8bef9SDimitry Andric };
156e8d8bef9SDimitry Andric 
15706c3fb27SDimitry Andric } // namespace
158e8d8bef9SDimitry Andric 
159*5f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
16006c3fb27SDimitry Andric   StructuralHashImpl H;
161*5f757f3fSDimitry Andric   H.update(F, DetailedHash);
162e8d8bef9SDimitry Andric   return H.getHash();
163e8d8bef9SDimitry Andric }
164e8d8bef9SDimitry Andric 
165*5f757f3fSDimitry Andric IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
16606c3fb27SDimitry Andric   StructuralHashImpl H;
167*5f757f3fSDimitry Andric   H.update(M, DetailedHash);
168e8d8bef9SDimitry Andric   return H.getHash();
169e8d8bef9SDimitry Andric }
170