xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global value numbering to eliminate fully redundant
10 // instructions.  It also performs simple dead load elimination.
11 //
12 // Note that this pass does the value numbering itself; it does not use the
13 // ValueNumbering analysis passes.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/AliasAnalysis.h"
29 #include "llvm/Analysis/AssumeBundleQueries.h"
30 #include "llvm/Analysis/AssumptionCache.h"
31 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/DomTreeUpdater.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
35 #include "llvm/Analysis/InstructionSimplify.h"
36 #include "llvm/Analysis/Loads.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/MemoryBuiltins.h"
39 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
40 #include "llvm/Analysis/MemorySSA.h"
41 #include "llvm/Analysis/MemorySSAUpdater.h"
42 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
43 #include "llvm/Analysis/PHITransAddr.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/ValueTracking.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/Value.h"
65 #include "llvm/InitializePasses.h"
66 #include "llvm/Pass.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/raw_ostream.h"
72 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
75 #include "llvm/Transforms/Utils/SSAUpdater.h"
76 #include "llvm/Transforms/Utils/VNCoercion.h"
77 #include <algorithm>
78 #include <cassert>
79 #include <cstdint>
80 #include <optional>
81 #include <utility>
82 
83 using namespace llvm;
84 using namespace llvm::gvn;
85 using namespace llvm::VNCoercion;
86 using namespace PatternMatch;
87 
88 #define DEBUG_TYPE "gvn"
89 
90 STATISTIC(NumGVNInstr, "Number of instructions deleted");
91 STATISTIC(NumGVNLoad, "Number of loads deleted");
92 STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93 STATISTIC(NumGVNBlocks, "Number of blocks merged");
94 STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95 STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96 STATISTIC(NumPRELoad, "Number of loads PRE'd");
97 STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98 STATISTIC(NumPRELoadMoved2CEPred,
99           "Number of loads moved to predecessor of a critical edge in PRE");
100 
101 STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102           "Number of blocks speculated as available in "
103           "IsValueFullyAvailableInBlock(), max");
104 STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105           "Number of times we we reached gvn-max-block-speculations cut-off "
106           "preventing further exploration");
107 
108 static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
109 static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
110 static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111                                             cl::init(true));
112 static cl::opt<bool>
113 GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114                                 cl::init(false));
115 static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
116 
117 static cl::opt<uint32_t> MaxNumDeps(
118     "gvn-max-num-deps", cl::Hidden, cl::init(100),
119     cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
120 
121 // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
122 static cl::opt<uint32_t> MaxBBSpeculations(
123     "gvn-max-block-speculations", cl::Hidden, cl::init(600),
124     cl::desc("Max number of blocks we're willing to speculate on (and recurse "
125              "into) when deducing if a value is fully available or not in GVN "
126              "(default = 600)"));
127 
128 static cl::opt<uint32_t> MaxNumVisitedInsts(
129     "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
130     cl::desc("Max number of visited instructions when trying to find "
131              "dominating value of select dependency (default = 100)"));
132 
133 static cl::opt<uint32_t> MaxNumInsnsPerBlock(
134     "gvn-max-num-insns", cl::Hidden, cl::init(100),
135     cl::desc("Max number of instructions to scan in each basic block in GVN "
136              "(default = 100)"));
137 
138 struct llvm::GVNPass::Expression {
139   uint32_t opcode;
140   bool commutative = false;
141   // The type is not necessarily the result type of the expression, it may be
142   // any additional type needed to disambiguate the expression.
143   Type *type = nullptr;
144   SmallVector<uint32_t, 4> varargs;
145 
Expressionllvm::GVNPass::Expression146   Expression(uint32_t o = ~2U) : opcode(o) {}
147 
operator ==llvm::GVNPass::Expression148   bool operator==(const Expression &other) const {
149     if (opcode != other.opcode)
150       return false;
151     if (opcode == ~0U || opcode == ~1U)
152       return true;
153     if (type != other.type)
154       return false;
155     if (varargs != other.varargs)
156       return false;
157     return true;
158   }
159 
hash_value(const Expression & Value)160   friend hash_code hash_value(const Expression &Value) {
161     return hash_combine(
162         Value.opcode, Value.type,
163         hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
164   }
165 };
166 
167 namespace llvm {
168 
169 template <> struct DenseMapInfo<GVNPass::Expression> {
getEmptyKeyllvm::DenseMapInfo170   static inline GVNPass::Expression getEmptyKey() { return ~0U; }
getTombstoneKeyllvm::DenseMapInfo171   static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
172 
getHashValuellvm::DenseMapInfo173   static unsigned getHashValue(const GVNPass::Expression &e) {
174     using llvm::hash_value;
175 
176     return static_cast<unsigned>(hash_value(e));
177   }
178 
isEqualllvm::DenseMapInfo179   static bool isEqual(const GVNPass::Expression &LHS,
180                       const GVNPass::Expression &RHS) {
181     return LHS == RHS;
182   }
183 };
184 
185 } // end namespace llvm
186 
187 /// Represents a particular available value that we know how to materialize.
188 /// Materialization of an AvailableValue never fails.  An AvailableValue is
189 /// implicitly associated with a rematerialization point which is the
190 /// location of the instruction from which it was formed.
191 struct llvm::gvn::AvailableValue {
192   enum class ValType {
193     SimpleVal, // A simple offsetted value that is accessed.
194     LoadVal,   // A value produced by a load.
195     MemIntrin, // A memory intrinsic which is loaded from.
196     UndefVal,  // A UndefValue representing a value from dead block (which
197                // is not yet physically removed from the CFG).
198     SelectVal, // A pointer select which is loaded from and for which the load
199                // can be replace by a value select.
200   };
201 
202   /// Val - The value that is live out of the block.
203   Value *Val;
204   /// Kind of the live-out value.
205   ValType Kind;
206 
207   /// Offset - The byte offset in Val that is interesting for the load query.
208   unsigned Offset = 0;
209   /// V1, V2 - The dominating non-clobbered values of SelectVal.
210   Value *V1 = nullptr, *V2 = nullptr;
211 
getllvm::gvn::AvailableValue212   static AvailableValue get(Value *V, unsigned Offset = 0) {
213     AvailableValue Res;
214     Res.Val = V;
215     Res.Kind = ValType::SimpleVal;
216     Res.Offset = Offset;
217     return Res;
218   }
219 
getMIllvm::gvn::AvailableValue220   static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
221     AvailableValue Res;
222     Res.Val = MI;
223     Res.Kind = ValType::MemIntrin;
224     Res.Offset = Offset;
225     return Res;
226   }
227 
getLoadllvm::gvn::AvailableValue228   static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
229     AvailableValue Res;
230     Res.Val = Load;
231     Res.Kind = ValType::LoadVal;
232     Res.Offset = Offset;
233     return Res;
234   }
235 
getUndefllvm::gvn::AvailableValue236   static AvailableValue getUndef() {
237     AvailableValue Res;
238     Res.Val = nullptr;
239     Res.Kind = ValType::UndefVal;
240     Res.Offset = 0;
241     return Res;
242   }
243 
getSelectllvm::gvn::AvailableValue244   static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
245     AvailableValue Res;
246     Res.Val = Sel;
247     Res.Kind = ValType::SelectVal;
248     Res.Offset = 0;
249     Res.V1 = V1;
250     Res.V2 = V2;
251     return Res;
252   }
253 
isSimpleValuellvm::gvn::AvailableValue254   bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
isCoercedLoadValuellvm::gvn::AvailableValue255   bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
isMemIntrinValuellvm::gvn::AvailableValue256   bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
isUndefValuellvm::gvn::AvailableValue257   bool isUndefValue() const { return Kind == ValType::UndefVal; }
isSelectValuellvm::gvn::AvailableValue258   bool isSelectValue() const { return Kind == ValType::SelectVal; }
259 
getSimpleValuellvm::gvn::AvailableValue260   Value *getSimpleValue() const {
261     assert(isSimpleValue() && "Wrong accessor");
262     return Val;
263   }
264 
getCoercedLoadValuellvm::gvn::AvailableValue265   LoadInst *getCoercedLoadValue() const {
266     assert(isCoercedLoadValue() && "Wrong accessor");
267     return cast<LoadInst>(Val);
268   }
269 
getMemIntrinValuellvm::gvn::AvailableValue270   MemIntrinsic *getMemIntrinValue() const {
271     assert(isMemIntrinValue() && "Wrong accessor");
272     return cast<MemIntrinsic>(Val);
273   }
274 
getSelectValuellvm::gvn::AvailableValue275   SelectInst *getSelectValue() const {
276     assert(isSelectValue() && "Wrong accessor");
277     return cast<SelectInst>(Val);
278   }
279 
280   /// Emit code at the specified insertion point to adjust the value defined
281   /// here to the specified type. This handles various coercion cases.
282   Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
283                                   GVNPass &gvn) const;
284 };
285 
286 /// Represents an AvailableValue which can be rematerialized at the end of
287 /// the associated BasicBlock.
288 struct llvm::gvn::AvailableValueInBlock {
289   /// BB - The basic block in question.
290   BasicBlock *BB = nullptr;
291 
292   /// AV - The actual available value
293   AvailableValue AV;
294 
getllvm::gvn::AvailableValueInBlock295   static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
296     AvailableValueInBlock Res;
297     Res.BB = BB;
298     Res.AV = std::move(AV);
299     return Res;
300   }
301 
getllvm::gvn::AvailableValueInBlock302   static AvailableValueInBlock get(BasicBlock *BB, Value *V,
303                                    unsigned Offset = 0) {
304     return get(BB, AvailableValue::get(V, Offset));
305   }
306 
getUndefllvm::gvn::AvailableValueInBlock307   static AvailableValueInBlock getUndef(BasicBlock *BB) {
308     return get(BB, AvailableValue::getUndef());
309   }
310 
getSelectllvm::gvn::AvailableValueInBlock311   static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
312                                          Value *V1, Value *V2) {
313     return get(BB, AvailableValue::getSelect(Sel, V1, V2));
314   }
315 
316   /// Emit code at the end of this block to adjust the value defined here to
317   /// the specified type. This handles various coercion cases.
MaterializeAdjustedValuellvm::gvn::AvailableValueInBlock318   Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
319     return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
320   }
321 };
322 
323 //===----------------------------------------------------------------------===//
324 //                     ValueTable Internal Functions
325 //===----------------------------------------------------------------------===//
326 
createExpr(Instruction * I)327 GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
328   Expression e;
329   e.type = I->getType();
330   e.opcode = I->getOpcode();
331   if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
332     // gc.relocate is 'special' call: its second and third operands are
333     // not real values, but indices into statepoint's argument list.
334     // Use the refered to values for purposes of identity.
335     e.varargs.push_back(lookupOrAdd(GCR->getOperand(0)));
336     e.varargs.push_back(lookupOrAdd(GCR->getBasePtr()));
337     e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
338   } else {
339     for (Use &Op : I->operands())
340       e.varargs.push_back(lookupOrAdd(Op));
341   }
342   if (I->isCommutative()) {
343     // Ensure that commutative instructions that only differ by a permutation
344     // of their operands get the same value number by sorting the operand value
345     // numbers.  Since commutative operands are the 1st two operands it is more
346     // efficient to sort by hand rather than using, say, std::sort.
347     assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
348     if (e.varargs[0] > e.varargs[1])
349       std::swap(e.varargs[0], e.varargs[1]);
350     e.commutative = true;
351   }
352 
353   if (auto *C = dyn_cast<CmpInst>(I)) {
354     // Sort the operand value numbers so x<y and y>x get the same value number.
355     CmpInst::Predicate Predicate = C->getPredicate();
356     if (e.varargs[0] > e.varargs[1]) {
357       std::swap(e.varargs[0], e.varargs[1]);
358       Predicate = CmpInst::getSwappedPredicate(Predicate);
359     }
360     e.opcode = (C->getOpcode() << 8) | Predicate;
361     e.commutative = true;
362   } else if (auto *E = dyn_cast<InsertValueInst>(I)) {
363     e.varargs.append(E->idx_begin(), E->idx_end());
364   } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
365     ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
366     e.varargs.append(ShuffleMask.begin(), ShuffleMask.end());
367   }
368 
369   return e;
370 }
371 
createCmpExpr(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)372 GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
373     unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
374   assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
375          "Not a comparison!");
376   Expression e;
377   e.type = CmpInst::makeCmpResultType(LHS->getType());
378   e.varargs.push_back(lookupOrAdd(LHS));
379   e.varargs.push_back(lookupOrAdd(RHS));
380 
381   // Sort the operand value numbers so x<y and y>x get the same value number.
382   if (e.varargs[0] > e.varargs[1]) {
383     std::swap(e.varargs[0], e.varargs[1]);
384     Predicate = CmpInst::getSwappedPredicate(Predicate);
385   }
386   e.opcode = (Opcode << 8) | Predicate;
387   e.commutative = true;
388   return e;
389 }
390 
391 GVNPass::Expression
createExtractvalueExpr(ExtractValueInst * EI)392 GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
393   assert(EI && "Not an ExtractValueInst?");
394   Expression e;
395   e.type = EI->getType();
396   e.opcode = 0;
397 
398   WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
399   if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
400     // EI is an extract from one of our with.overflow intrinsics. Synthesize
401     // a semantically equivalent expression instead of an extract value
402     // expression.
403     e.opcode = WO->getBinaryOp();
404     e.varargs.push_back(lookupOrAdd(WO->getLHS()));
405     e.varargs.push_back(lookupOrAdd(WO->getRHS()));
406     return e;
407   }
408 
409   // Not a recognised intrinsic. Fall back to producing an extract value
410   // expression.
411   e.opcode = EI->getOpcode();
412   for (Use &Op : EI->operands())
413     e.varargs.push_back(lookupOrAdd(Op));
414 
415   append_range(e.varargs, EI->indices());
416 
417   return e;
418 }
419 
createGEPExpr(GetElementPtrInst * GEP)420 GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
421   Expression E;
422   Type *PtrTy = GEP->getType()->getScalarType();
423   const DataLayout &DL = GEP->getDataLayout();
424   unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
425   MapVector<Value *, APInt> VariableOffsets;
426   APInt ConstantOffset(BitWidth, 0);
427   if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
428     // Convert into offset representation, to recognize equivalent address
429     // calculations that use different type encoding.
430     LLVMContext &Context = GEP->getContext();
431     E.opcode = GEP->getOpcode();
432     E.type = nullptr;
433     E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
434     for (const auto &Pair : VariableOffsets) {
435       E.varargs.push_back(lookupOrAdd(Pair.first));
436       E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
437     }
438     if (!ConstantOffset.isZero())
439       E.varargs.push_back(
440           lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
441   } else {
442     // If converting to offset representation fails (for scalable vectors),
443     // fall back to type-based implementation:
444     E.opcode = GEP->getOpcode();
445     E.type = GEP->getSourceElementType();
446     for (Use &Op : GEP->operands())
447       E.varargs.push_back(lookupOrAdd(Op));
448   }
449   return E;
450 }
451 
452 //===----------------------------------------------------------------------===//
453 //                     ValueTable External Functions
454 //===----------------------------------------------------------------------===//
455 
456 GVNPass::ValueTable::ValueTable() = default;
457 GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
458 GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
459 GVNPass::ValueTable::~ValueTable() = default;
460 GVNPass::ValueTable &
461 GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
462 
463 /// add - Insert a value into the table with a specified value number.
add(Value * V,uint32_t num)464 void GVNPass::ValueTable::add(Value *V, uint32_t num) {
465   valueNumbering.insert(std::make_pair(V, num));
466   if (PHINode *PN = dyn_cast<PHINode>(V))
467     NumberingPhi[num] = PN;
468 }
469 
lookupOrAddCall(CallInst * C)470 uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
471   // FIXME: Currently the calls which may access the thread id may
472   // be considered as not accessing the memory. But this is
473   // problematic for coroutines, since coroutines may resume in a
474   // different thread. So we disable the optimization here for the
475   // correctness. However, it may block many other correct
476   // optimizations. Revert this one when we detect the memory
477   // accessing kind more precisely.
478   if (C->getFunction()->isPresplitCoroutine()) {
479     valueNumbering[C] = nextValueNumber;
480     return nextValueNumber++;
481   }
482 
483   // Do not combine convergent calls since they implicitly depend on the set of
484   // threads that is currently executing, and they might be in different basic
485   // blocks.
486   if (C->isConvergent()) {
487     valueNumbering[C] = nextValueNumber;
488     return nextValueNumber++;
489   }
490 
491   if (AA->doesNotAccessMemory(C)) {
492     Expression exp = createExpr(C);
493     uint32_t e = assignExpNewValueNum(exp).first;
494     valueNumbering[C] = e;
495     return e;
496   }
497 
498   if (MD && AA->onlyReadsMemory(C)) {
499     Expression exp = createExpr(C);
500     auto ValNum = assignExpNewValueNum(exp);
501     if (ValNum.second) {
502       valueNumbering[C] = ValNum.first;
503       return ValNum.first;
504     }
505 
506     MemDepResult local_dep = MD->getDependency(C);
507 
508     if (!local_dep.isDef() && !local_dep.isNonLocal()) {
509       valueNumbering[C] =  nextValueNumber;
510       return nextValueNumber++;
511     }
512 
513     if (local_dep.isDef()) {
514       // For masked load/store intrinsics, the local_dep may actually be
515       // a normal load or store instruction.
516       CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
517 
518       if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
519         valueNumbering[C] = nextValueNumber;
520         return nextValueNumber++;
521       }
522 
523       for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
524         uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
525         uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
526         if (c_vn != cd_vn) {
527           valueNumbering[C] = nextValueNumber;
528           return nextValueNumber++;
529         }
530       }
531 
532       uint32_t v = lookupOrAdd(local_cdep);
533       valueNumbering[C] = v;
534       return v;
535     }
536 
537     // Non-local case.
538     const MemoryDependenceResults::NonLocalDepInfo &deps =
539         MD->getNonLocalCallDependency(C);
540     // FIXME: Move the checking logic to MemDep!
541     CallInst* cdep = nullptr;
542 
543     // Check to see if we have a single dominating call instruction that is
544     // identical to C.
545     for (const NonLocalDepEntry &I : deps) {
546       if (I.getResult().isNonLocal())
547         continue;
548 
549       // We don't handle non-definitions.  If we already have a call, reject
550       // instruction dependencies.
551       if (!I.getResult().isDef() || cdep != nullptr) {
552         cdep = nullptr;
553         break;
554       }
555 
556       CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
557       // FIXME: All duplicated with non-local case.
558       if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
559         cdep = NonLocalDepCall;
560         continue;
561       }
562 
563       cdep = nullptr;
564       break;
565     }
566 
567     if (!cdep) {
568       valueNumbering[C] = nextValueNumber;
569       return nextValueNumber++;
570     }
571 
572     if (cdep->arg_size() != C->arg_size()) {
573       valueNumbering[C] = nextValueNumber;
574       return nextValueNumber++;
575     }
576     for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
577       uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
578       uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
579       if (c_vn != cd_vn) {
580         valueNumbering[C] = nextValueNumber;
581         return nextValueNumber++;
582       }
583     }
584 
585     uint32_t v = lookupOrAdd(cdep);
586     valueNumbering[C] = v;
587     return v;
588   }
589 
590   valueNumbering[C] = nextValueNumber;
591   return nextValueNumber++;
592 }
593 
594 /// Returns true if a value number exists for the specified value.
exists(Value * V) const595 bool GVNPass::ValueTable::exists(Value *V) const {
596   return valueNumbering.contains(V);
597 }
598 
599 /// lookup_or_add - Returns the value number for the specified value, assigning
600 /// it a new number if it did not have one before.
lookupOrAdd(Value * V)601 uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
602   DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
603   if (VI != valueNumbering.end())
604     return VI->second;
605 
606   auto *I = dyn_cast<Instruction>(V);
607   if (!I) {
608     valueNumbering[V] = nextValueNumber;
609     return nextValueNumber++;
610   }
611 
612   Expression exp;
613   switch (I->getOpcode()) {
614     case Instruction::Call:
615       return lookupOrAddCall(cast<CallInst>(I));
616     case Instruction::FNeg:
617     case Instruction::Add:
618     case Instruction::FAdd:
619     case Instruction::Sub:
620     case Instruction::FSub:
621     case Instruction::Mul:
622     case Instruction::FMul:
623     case Instruction::UDiv:
624     case Instruction::SDiv:
625     case Instruction::FDiv:
626     case Instruction::URem:
627     case Instruction::SRem:
628     case Instruction::FRem:
629     case Instruction::Shl:
630     case Instruction::LShr:
631     case Instruction::AShr:
632     case Instruction::And:
633     case Instruction::Or:
634     case Instruction::Xor:
635     case Instruction::ICmp:
636     case Instruction::FCmp:
637     case Instruction::Trunc:
638     case Instruction::ZExt:
639     case Instruction::SExt:
640     case Instruction::FPToUI:
641     case Instruction::FPToSI:
642     case Instruction::UIToFP:
643     case Instruction::SIToFP:
644     case Instruction::FPTrunc:
645     case Instruction::FPExt:
646     case Instruction::PtrToInt:
647     case Instruction::IntToPtr:
648     case Instruction::AddrSpaceCast:
649     case Instruction::BitCast:
650     case Instruction::Select:
651     case Instruction::Freeze:
652     case Instruction::ExtractElement:
653     case Instruction::InsertElement:
654     case Instruction::ShuffleVector:
655     case Instruction::InsertValue:
656       exp = createExpr(I);
657       break;
658     case Instruction::GetElementPtr:
659       exp = createGEPExpr(cast<GetElementPtrInst>(I));
660       break;
661     case Instruction::ExtractValue:
662       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
663       break;
664     case Instruction::PHI:
665       valueNumbering[V] = nextValueNumber;
666       NumberingPhi[nextValueNumber] = cast<PHINode>(V);
667       return nextValueNumber++;
668     default:
669       valueNumbering[V] = nextValueNumber;
670       return nextValueNumber++;
671   }
672 
673   uint32_t e = assignExpNewValueNum(exp).first;
674   valueNumbering[V] = e;
675   return e;
676 }
677 
678 /// Returns the value number of the specified value. Fails if
679 /// the value has not yet been numbered.
lookup(Value * V,bool Verify) const680 uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
681   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
682   if (Verify) {
683     assert(VI != valueNumbering.end() && "Value not numbered?");
684     return VI->second;
685   }
686   return (VI != valueNumbering.end()) ? VI->second : 0;
687 }
688 
689 /// Returns the value number of the given comparison,
690 /// assigning it a new number if it did not have one before.  Useful when
691 /// we deduced the result of a comparison, but don't immediately have an
692 /// instruction realizing that comparison to hand.
lookupOrAddCmp(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)693 uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
694                                              CmpInst::Predicate Predicate,
695                                              Value *LHS, Value *RHS) {
696   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
697   return assignExpNewValueNum(exp).first;
698 }
699 
700 /// Remove all entries from the ValueTable.
clear()701 void GVNPass::ValueTable::clear() {
702   valueNumbering.clear();
703   expressionNumbering.clear();
704   NumberingPhi.clear();
705   PhiTranslateTable.clear();
706   nextValueNumber = 1;
707   Expressions.clear();
708   ExprIdx.clear();
709   nextExprNumber = 0;
710 }
711 
712 /// Remove a value from the value numbering.
erase(Value * V)713 void GVNPass::ValueTable::erase(Value *V) {
714   uint32_t Num = valueNumbering.lookup(V);
715   valueNumbering.erase(V);
716   // If V is PHINode, V <--> value number is an one-to-one mapping.
717   if (isa<PHINode>(V))
718     NumberingPhi.erase(Num);
719 }
720 
721 /// verifyRemoved - Verify that the value is removed from all internal data
722 /// structures.
verifyRemoved(const Value * V) const723 void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
724   assert(!valueNumbering.contains(V) &&
725          "Inst still occurs in value numbering map!");
726 }
727 
728 //===----------------------------------------------------------------------===//
729 //                     LeaderMap External Functions
730 //===----------------------------------------------------------------------===//
731 
732 /// Push a new Value to the LeaderTable onto the list for its value number.
insert(uint32_t N,Value * V,const BasicBlock * BB)733 void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
734   LeaderListNode &Curr = NumToLeaders[N];
735   if (!Curr.Entry.Val) {
736     Curr.Entry.Val = V;
737     Curr.Entry.BB = BB;
738     return;
739   }
740 
741   LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
742   Node->Entry.Val = V;
743   Node->Entry.BB = BB;
744   Node->Next = Curr.Next;
745   Curr.Next = Node;
746 }
747 
748 /// Scan the list of values corresponding to a given
749 /// value number, and remove the given instruction if encountered.
erase(uint32_t N,Instruction * I,const BasicBlock * BB)750 void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
751                                const BasicBlock *BB) {
752   LeaderListNode *Prev = nullptr;
753   LeaderListNode *Curr = &NumToLeaders[N];
754 
755   while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
756     Prev = Curr;
757     Curr = Curr->Next;
758   }
759 
760   if (!Curr)
761     return;
762 
763   if (Prev) {
764     Prev->Next = Curr->Next;
765   } else {
766     if (!Curr->Next) {
767       Curr->Entry.Val = nullptr;
768       Curr->Entry.BB = nullptr;
769     } else {
770       LeaderListNode *Next = Curr->Next;
771       Curr->Entry.Val = Next->Entry.Val;
772       Curr->Entry.BB = Next->Entry.BB;
773       Curr->Next = Next->Next;
774     }
775   }
776 }
777 
verifyRemoved(const Value * V) const778 void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
779   // Walk through the value number scope to make sure the instruction isn't
780   // ferreted away in it.
781   for (const auto &I : NumToLeaders) {
782     (void)I;
783     assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
784     assert(
785         std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
786                      [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
787         "Inst still in value numbering scope!");
788   }
789 }
790 
791 //===----------------------------------------------------------------------===//
792 //                                GVN Pass
793 //===----------------------------------------------------------------------===//
794 
isPREEnabled() const795 bool GVNPass::isPREEnabled() const {
796   return Options.AllowPRE.value_or(GVNEnablePRE);
797 }
798 
isLoadPREEnabled() const799 bool GVNPass::isLoadPREEnabled() const {
800   return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
801 }
802 
isLoadInLoopPREEnabled() const803 bool GVNPass::isLoadInLoopPREEnabled() const {
804   return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
805 }
806 
isLoadPRESplitBackedgeEnabled() const807 bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
808   return Options.AllowLoadPRESplitBackedge.value_or(
809       GVNEnableSplitBackedgeInLoadPRE);
810 }
811 
isMemDepEnabled() const812 bool GVNPass::isMemDepEnabled() const {
813   return Options.AllowMemDep.value_or(GVNEnableMemDep);
814 }
815 
run(Function & F,FunctionAnalysisManager & AM)816 PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
817   // FIXME: The order of evaluation of these 'getResult' calls is very
818   // significant! Re-ordering these variables will cause GVN when run alone to
819   // be less effective! We should fix memdep and basic-aa to not exhibit this
820   // behavior, but until then don't change the order here.
821   auto &AC = AM.getResult<AssumptionAnalysis>(F);
822   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
823   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
824   auto &AA = AM.getResult<AAManager>(F);
825   auto *MemDep =
826       isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
827   auto &LI = AM.getResult<LoopAnalysis>(F);
828   auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
829   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
830   bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
831                          MSSA ? &MSSA->getMSSA() : nullptr);
832   if (!Changed)
833     return PreservedAnalyses::all();
834   PreservedAnalyses PA;
835   PA.preserve<DominatorTreeAnalysis>();
836   PA.preserve<TargetLibraryAnalysis>();
837   if (MSSA)
838     PA.preserve<MemorySSAAnalysis>();
839   PA.preserve<LoopAnalysis>();
840   return PA;
841 }
842 
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)843 void GVNPass::printPipeline(
844     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
845   static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
846       OS, MapClassName2PassName);
847 
848   OS << '<';
849   if (Options.AllowPRE != std::nullopt)
850     OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
851   if (Options.AllowLoadPRE != std::nullopt)
852     OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
853   if (Options.AllowLoadPRESplitBackedge != std::nullopt)
854     OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
855        << "split-backedge-load-pre;";
856   if (Options.AllowMemDep != std::nullopt)
857     OS << (*Options.AllowMemDep ? "" : "no-") << "memdep";
858   OS << '>';
859 }
860 
861 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump(DenseMap<uint32_t,Value * > & d) const862 LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
863   errs() << "{\n";
864   for (auto &I : d) {
865     errs() << I.first << "\n";
866     I.second->dump();
867   }
868   errs() << "}\n";
869 }
870 #endif
871 
872 enum class AvailabilityState : char {
873   /// We know the block *is not* fully available. This is a fixpoint.
874   Unavailable = 0,
875   /// We know the block *is* fully available. This is a fixpoint.
876   Available = 1,
877   /// We do not know whether the block is fully available or not,
878   /// but we are currently speculating that it will be.
879   /// If it would have turned out that the block was, in fact, not fully
880   /// available, this would have been cleaned up into an Unavailable.
881   SpeculativelyAvailable = 2,
882 };
883 
884 /// Return true if we can prove that the value
885 /// we're analyzing is fully available in the specified block.  As we go, keep
886 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
887 /// map is actually a tri-state map with the following values:
888 ///   0) we know the block *is not* fully available.
889 ///   1) we know the block *is* fully available.
890 ///   2) we do not know whether the block is fully available or not, but we are
891 ///      currently speculating that it will be.
IsValueFullyAvailableInBlock(BasicBlock * BB,DenseMap<BasicBlock *,AvailabilityState> & FullyAvailableBlocks)892 static bool IsValueFullyAvailableInBlock(
893     BasicBlock *BB,
894     DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
895   SmallVector<BasicBlock *, 32> Worklist;
896   std::optional<BasicBlock *> UnavailableBB;
897 
898   // The number of times we didn't find an entry for a block in a map and
899   // optimistically inserted an entry marking block as speculatively available.
900   unsigned NumNewNewSpeculativelyAvailableBBs = 0;
901 
902 #ifndef NDEBUG
903   SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
904   SmallVector<BasicBlock *, 32> AvailableBBs;
905 #endif
906 
907   Worklist.emplace_back(BB);
908   while (!Worklist.empty()) {
909     BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
910     // Optimistically assume that the block is Speculatively Available and check
911     // to see if we already know about this block in one lookup.
912     std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
913         FullyAvailableBlocks.try_emplace(
914             CurrBB, AvailabilityState::SpeculativelyAvailable);
915     AvailabilityState &State = IV.first->second;
916 
917     // Did the entry already exist for this block?
918     if (!IV.second) {
919       if (State == AvailabilityState::Unavailable) {
920         UnavailableBB = CurrBB;
921         break; // Backpropagate unavailability info.
922       }
923 
924 #ifndef NDEBUG
925       AvailableBBs.emplace_back(CurrBB);
926 #endif
927       continue; // Don't recurse further, but continue processing worklist.
928     }
929 
930     // No entry found for block.
931     ++NumNewNewSpeculativelyAvailableBBs;
932     bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
933 
934     // If we have exhausted our budget, mark this block as unavailable.
935     // Also, if this block has no predecessors, the value isn't live-in here.
936     if (OutOfBudget || pred_empty(CurrBB)) {
937       MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
938       State = AvailabilityState::Unavailable;
939       UnavailableBB = CurrBB;
940       break; // Backpropagate unavailability info.
941     }
942 
943     // Tentatively consider this block as speculatively available.
944 #ifndef NDEBUG
945     NewSpeculativelyAvailableBBs.insert(CurrBB);
946 #endif
947     // And further recurse into block's predecessors, in depth-first order!
948     Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
949   }
950 
951 #if LLVM_ENABLE_STATS
952   IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
953       NumNewNewSpeculativelyAvailableBBs);
954 #endif
955 
956   // If the block isn't marked as fixpoint yet
957   // (the Unavailable and Available states are fixpoints)
958   auto MarkAsFixpointAndEnqueueSuccessors =
959       [&](BasicBlock *BB, AvailabilityState FixpointState) {
960         auto It = FullyAvailableBlocks.find(BB);
961         if (It == FullyAvailableBlocks.end())
962           return; // Never queried this block, leave as-is.
963         switch (AvailabilityState &State = It->second) {
964         case AvailabilityState::Unavailable:
965         case AvailabilityState::Available:
966           return; // Don't backpropagate further, continue processing worklist.
967         case AvailabilityState::SpeculativelyAvailable: // Fix it!
968           State = FixpointState;
969 #ifndef NDEBUG
970           assert(NewSpeculativelyAvailableBBs.erase(BB) &&
971                  "Found a speculatively available successor leftover?");
972 #endif
973           // Queue successors for further processing.
974           Worklist.append(succ_begin(BB), succ_end(BB));
975           return;
976         }
977       };
978 
979   if (UnavailableBB) {
980     // Okay, we have encountered an unavailable block.
981     // Mark speculatively available blocks reachable from UnavailableBB as
982     // unavailable as well. Paths are terminated when they reach blocks not in
983     // FullyAvailableBlocks or they are not marked as speculatively available.
984     Worklist.clear();
985     Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
986     while (!Worklist.empty())
987       MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
988                                          AvailabilityState::Unavailable);
989   }
990 
991 #ifndef NDEBUG
992   Worklist.clear();
993   for (BasicBlock *AvailableBB : AvailableBBs)
994     Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
995   while (!Worklist.empty())
996     MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
997                                        AvailabilityState::Available);
998 
999   assert(NewSpeculativelyAvailableBBs.empty() &&
1000          "Must have fixed all the new speculatively available blocks.");
1001 #endif
1002 
1003   return !UnavailableBB;
1004 }
1005 
1006 /// If the specified OldValue exists in ValuesPerBlock, replace its value with
1007 /// NewValue.
replaceValuesPerBlockEntry(SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,Value * OldValue,Value * NewValue)1008 static void replaceValuesPerBlockEntry(
1009     SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1010     Value *NewValue) {
1011   for (AvailableValueInBlock &V : ValuesPerBlock) {
1012     if (V.AV.Val == OldValue)
1013       V.AV.Val = NewValue;
1014     if (V.AV.isSelectValue()) {
1015       if (V.AV.V1 == OldValue)
1016         V.AV.V1 = NewValue;
1017       if (V.AV.V2 == OldValue)
1018         V.AV.V2 = NewValue;
1019     }
1020   }
1021 }
1022 
1023 /// Given a set of loads specified by ValuesPerBlock,
1024 /// construct SSA form, allowing us to eliminate Load.  This returns the value
1025 /// that should be used at Load's definition site.
1026 static Value *
ConstructSSAForLoadSet(LoadInst * Load,SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,GVNPass & gvn)1027 ConstructSSAForLoadSet(LoadInst *Load,
1028                        SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1029                        GVNPass &gvn) {
1030   // Check for the fully redundant, dominating load case.  In this case, we can
1031   // just use the dominating value directly.
1032   if (ValuesPerBlock.size() == 1 &&
1033       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1034                                                Load->getParent())) {
1035     assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1036            "Dead BB dominate this block");
1037     return ValuesPerBlock[0].MaterializeAdjustedValue(Load, gvn);
1038   }
1039 
1040   // Otherwise, we have to construct SSA form.
1041   SmallVector<PHINode*, 8> NewPHIs;
1042   SSAUpdater SSAUpdate(&NewPHIs);
1043   SSAUpdate.Initialize(Load->getType(), Load->getName());
1044 
1045   for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1046     BasicBlock *BB = AV.BB;
1047 
1048     if (AV.AV.isUndefValue())
1049       continue;
1050 
1051     if (SSAUpdate.HasValueForBlock(BB))
1052       continue;
1053 
1054     // If the value is the load that we will be eliminating, and the block it's
1055     // available in is the block that the load is in, then don't add it as
1056     // SSAUpdater will resolve the value to the relevant phi which may let it
1057     // avoid phi construction entirely if there's actually only one value.
1058     if (BB == Load->getParent() &&
1059         ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1060          (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1061       continue;
1062 
1063     SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load, gvn));
1064   }
1065 
1066   // Perform PHI construction.
1067   return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1068 }
1069 
MaterializeAdjustedValue(LoadInst * Load,Instruction * InsertPt,GVNPass & gvn) const1070 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1071                                                 Instruction *InsertPt,
1072                                                 GVNPass &gvn) const {
1073   Value *Res;
1074   Type *LoadTy = Load->getType();
1075   const DataLayout &DL = Load->getDataLayout();
1076   if (isSimpleValue()) {
1077     Res = getSimpleValue();
1078     if (Res->getType() != LoadTy) {
1079       Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1080 
1081       LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1082                         << "  " << *getSimpleValue() << '\n'
1083                         << *Res << '\n'
1084                         << "\n\n\n");
1085     }
1086   } else if (isCoercedLoadValue()) {
1087     LoadInst *CoercedLoad = getCoercedLoadValue();
1088     if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1089       Res = CoercedLoad;
1090       combineMetadataForCSE(CoercedLoad, Load, false);
1091     } else {
1092       Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
1093       // We are adding a new user for this load, for which the original
1094       // metadata may not hold. Additionally, the new load may have a different
1095       // size and type, so their metadata cannot be combined in any
1096       // straightforward way.
1097       // Drop all metadata that is not known to cause immediate UB on violation,
1098       // unless the load has !noundef, in which case all metadata violations
1099       // will be promoted to UB.
1100       // TODO: We can combine noalias/alias.scope metadata here, because it is
1101       // independent of the load type.
1102       if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1103         CoercedLoad->dropUnknownNonDebugMetadata(
1104             {LLVMContext::MD_dereferenceable,
1105              LLVMContext::MD_dereferenceable_or_null,
1106              LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1107       LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1108                         << "  " << *getCoercedLoadValue() << '\n'
1109                         << *Res << '\n'
1110                         << "\n\n\n");
1111     }
1112   } else if (isMemIntrinValue()) {
1113     Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1114                                  InsertPt, DL);
1115     LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1116                       << "  " << *getMemIntrinValue() << '\n'
1117                       << *Res << '\n'
1118                       << "\n\n\n");
1119   } else if (isSelectValue()) {
1120     // Introduce a new value select for a load from an eligible pointer select.
1121     SelectInst *Sel = getSelectValue();
1122     assert(V1 && V2 && "both value operands of the select must be present");
1123     Res =
1124         SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
1125   } else {
1126     llvm_unreachable("Should not materialize value from dead block");
1127   }
1128   assert(Res && "failed to materialize?");
1129   return Res;
1130 }
1131 
isLifetimeStart(const Instruction * Inst)1132 static bool isLifetimeStart(const Instruction *Inst) {
1133   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1134     return II->getIntrinsicID() == Intrinsic::lifetime_start;
1135   return false;
1136 }
1137 
1138 /// Assuming To can be reached from both From and Between, does Between lie on
1139 /// every path from From to To?
liesBetween(const Instruction * From,Instruction * Between,const Instruction * To,DominatorTree * DT)1140 static bool liesBetween(const Instruction *From, Instruction *Between,
1141                         const Instruction *To, DominatorTree *DT) {
1142   if (From->getParent() == Between->getParent())
1143     return DT->dominates(From, Between);
1144   SmallSet<BasicBlock *, 1> Exclusion;
1145   Exclusion.insert(Between->getParent());
1146   return !isPotentiallyReachable(From, To, &Exclusion, DT);
1147 }
1148 
1149 /// Try to locate the three instruction involved in a missed
1150 /// load-elimination case that is due to an intervening store.
reportMayClobberedLoad(LoadInst * Load,MemDepResult DepInfo,DominatorTree * DT,OptimizationRemarkEmitter * ORE)1151 static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1152                                    DominatorTree *DT,
1153                                    OptimizationRemarkEmitter *ORE) {
1154   using namespace ore;
1155 
1156   Instruction *OtherAccess = nullptr;
1157 
1158   OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1159   R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1160     << setExtraArgs();
1161 
1162   for (auto *U : Load->getPointerOperand()->users()) {
1163     if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1164       auto *I = cast<Instruction>(U);
1165       if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1166         // Use the most immediately dominating value
1167         if (OtherAccess) {
1168           if (DT->dominates(OtherAccess, I))
1169             OtherAccess = I;
1170           else
1171             assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1172         } else
1173           OtherAccess = I;
1174       }
1175     }
1176   }
1177 
1178   if (!OtherAccess) {
1179     // There is no dominating use, check if we can find a closest non-dominating
1180     // use that lies between any other potentially available use and Load.
1181     for (auto *U : Load->getPointerOperand()->users()) {
1182       if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1183         auto *I = cast<Instruction>(U);
1184         if (I->getFunction() == Load->getFunction() &&
1185             isPotentiallyReachable(I, Load, nullptr, DT)) {
1186           if (OtherAccess) {
1187             if (liesBetween(OtherAccess, I, Load, DT)) {
1188               OtherAccess = I;
1189             } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1190               // These uses are both partially available at Load were it not for
1191               // the clobber, but neither lies strictly after the other.
1192               OtherAccess = nullptr;
1193               break;
1194             } // else: keep current OtherAccess since it lies between U and Load
1195           } else {
1196             OtherAccess = I;
1197           }
1198         }
1199       }
1200     }
1201   }
1202 
1203   if (OtherAccess)
1204     R << " in favor of " << NV("OtherAccess", OtherAccess);
1205 
1206   R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1207 
1208   ORE->emit(R);
1209 }
1210 
1211 // Find non-clobbered value for Loc memory location in extended basic block
1212 // (chain of basic blocks with single predecessors) starting From instruction.
findDominatingValue(const MemoryLocation & Loc,Type * LoadTy,Instruction * From,AAResults * AA)1213 static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1214                                   Instruction *From, AAResults *AA) {
1215   uint32_t NumVisitedInsts = 0;
1216   BasicBlock *FromBB = From->getParent();
1217   BatchAAResults BatchAA(*AA);
1218   for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1219     for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1220          Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
1221       // Stop the search if limit is reached.
1222       if (++NumVisitedInsts > MaxNumVisitedInsts)
1223         return nullptr;
1224       if (isModSet(BatchAA.getModRefInfo(Inst, Loc)))
1225         return nullptr;
1226       if (auto *LI = dyn_cast<LoadInst>(Inst))
1227         if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1228           return LI;
1229     }
1230   return nullptr;
1231 }
1232 
1233 std::optional<AvailableValue>
AnalyzeLoadAvailability(LoadInst * Load,MemDepResult DepInfo,Value * Address)1234 GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1235                                  Value *Address) {
1236   assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1237   assert(DepInfo.isLocal() && "expected a local dependence");
1238 
1239   Instruction *DepInst = DepInfo.getInst();
1240 
1241   const DataLayout &DL = Load->getDataLayout();
1242   if (DepInfo.isClobber()) {
1243     // If the dependence is to a store that writes to a superset of the bits
1244     // read by the load, we can extract the bits we need for the load from the
1245     // stored value.
1246     if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1247       // Can't forward from non-atomic to atomic without violating memory model.
1248       if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1249         int Offset =
1250             analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1251         if (Offset != -1)
1252           return AvailableValue::get(DepSI->getValueOperand(), Offset);
1253       }
1254     }
1255 
1256     // Check to see if we have something like this:
1257     //    load i32* P
1258     //    load i8* (P+1)
1259     // if we have this, replace the later with an extraction from the former.
1260     if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1261       // If this is a clobber and L is the first instruction in its block, then
1262       // we have the first instruction in the entry block.
1263       // Can't forward from non-atomic to atomic without violating memory model.
1264       if (DepLoad != Load && Address &&
1265           Load->isAtomic() <= DepLoad->isAtomic()) {
1266         Type *LoadType = Load->getType();
1267         int Offset = -1;
1268 
1269         // If MD reported clobber, check it was nested.
1270         if (DepInfo.isClobber() &&
1271             canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
1272           const auto ClobberOff = MD->getClobberOffset(DepLoad);
1273           // GVN has no deal with a negative offset.
1274           Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1275                        ? -1
1276                        : *ClobberOff;
1277         }
1278         if (Offset == -1)
1279           Offset =
1280               analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1281         if (Offset != -1)
1282           return AvailableValue::getLoad(DepLoad, Offset);
1283       }
1284     }
1285 
1286     // If the clobbering value is a memset/memcpy/memmove, see if we can
1287     // forward a value on from it.
1288     if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1289       if (Address && !Load->isAtomic()) {
1290         int Offset = analyzeLoadFromClobberingMemInst(Load->getType(), Address,
1291                                                       DepMI, DL);
1292         if (Offset != -1)
1293           return AvailableValue::getMI(DepMI, Offset);
1294       }
1295     }
1296 
1297     // Nothing known about this clobber, have to be conservative
1298     LLVM_DEBUG(
1299         // fast print dep, using operator<< on instruction is too slow.
1300         dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1301         dbgs() << " is clobbered by " << *DepInst << '\n';);
1302     if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1303       reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1304 
1305     return std::nullopt;
1306   }
1307   assert(DepInfo.isDef() && "follows from above");
1308 
1309   // Loading the alloca -> undef.
1310   // Loading immediately after lifetime begin -> undef.
1311   if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1312     return AvailableValue::get(UndefValue::get(Load->getType()));
1313 
1314   if (Constant *InitVal =
1315           getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1316     return AvailableValue::get(InitVal);
1317 
1318   if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1319     // Reject loads and stores that are to the same address but are of
1320     // different types if we have to. If the stored value is convertable to
1321     // the loaded value, we can reuse it.
1322     if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1323                                          DL))
1324       return std::nullopt;
1325 
1326     // Can't forward from non-atomic to atomic without violating memory model.
1327     if (S->isAtomic() < Load->isAtomic())
1328       return std::nullopt;
1329 
1330     return AvailableValue::get(S->getValueOperand());
1331   }
1332 
1333   if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1334     // If the types mismatch and we can't handle it, reject reuse of the load.
1335     // If the stored value is larger or equal to the loaded value, we can reuse
1336     // it.
1337     if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL))
1338       return std::nullopt;
1339 
1340     // Can't forward from non-atomic to atomic without violating memory model.
1341     if (LD->isAtomic() < Load->isAtomic())
1342       return std::nullopt;
1343 
1344     return AvailableValue::getLoad(LD);
1345   }
1346 
1347   // Check if load with Addr dependent from select can be converted to select
1348   // between load values. There must be no instructions between the found
1349   // loads and DepInst that may clobber the loads.
1350   if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1351     assert(Sel->getType() == Load->getPointerOperandType());
1352     auto Loc = MemoryLocation::get(Load);
1353     Value *V1 =
1354         findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()),
1355                             Load->getType(), DepInst, getAliasAnalysis());
1356     if (!V1)
1357       return std::nullopt;
1358     Value *V2 =
1359         findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()),
1360                             Load->getType(), DepInst, getAliasAnalysis());
1361     if (!V2)
1362       return std::nullopt;
1363     return AvailableValue::getSelect(Sel, V1, V2);
1364   }
1365 
1366   // Unknown def - must be conservative
1367   LLVM_DEBUG(
1368       // fast print dep, using operator<< on instruction is too slow.
1369       dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1370       dbgs() << " has unknown def " << *DepInst << '\n';);
1371   return std::nullopt;
1372 }
1373 
AnalyzeLoadAvailability(LoadInst * Load,LoadDepVect & Deps,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1374 void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1375                                       AvailValInBlkVect &ValuesPerBlock,
1376                                       UnavailBlkVect &UnavailableBlocks) {
1377   // Filter out useless results (non-locals, etc).  Keep track of the blocks
1378   // where we have a value available in repl, also keep track of whether we see
1379   // dependencies that produce an unknown value for the load (such as a call
1380   // that could potentially clobber the load).
1381   for (const auto &Dep : Deps) {
1382     BasicBlock *DepBB = Dep.getBB();
1383     MemDepResult DepInfo = Dep.getResult();
1384 
1385     if (DeadBlocks.count(DepBB)) {
1386       // Dead dependent mem-op disguise as a load evaluating the same value
1387       // as the load in question.
1388       ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1389       continue;
1390     }
1391 
1392     if (!DepInfo.isLocal()) {
1393       UnavailableBlocks.push_back(DepBB);
1394       continue;
1395     }
1396 
1397     // The address being loaded in this non-local block may not be the same as
1398     // the pointer operand of the load if PHI translation occurs.  Make sure
1399     // to consider the right address.
1400     if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) {
1401       // subtlety: because we know this was a non-local dependency, we know
1402       // it's safe to materialize anywhere between the instruction within
1403       // DepInfo and the end of it's block.
1404       ValuesPerBlock.push_back(
1405           AvailableValueInBlock::get(DepBB, std::move(*AV)));
1406     } else {
1407       UnavailableBlocks.push_back(DepBB);
1408     }
1409   }
1410 
1411   assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1412          "post condition violation");
1413 }
1414 
1415 /// Given the following code, v1 is partially available on some edges, but not
1416 /// available on the edge from PredBB. This function tries to find if there is
1417 /// another identical load in the other successor of PredBB.
1418 ///
1419 ///      v0 = load %addr
1420 ///      br %LoadBB
1421 ///
1422 ///   LoadBB:
1423 ///      v1 = load %addr
1424 ///      ...
1425 ///
1426 ///   PredBB:
1427 ///      ...
1428 ///      br %cond, label %LoadBB, label %SuccBB
1429 ///
1430 ///   SuccBB:
1431 ///      v2 = load %addr
1432 ///      ...
1433 ///
findLoadToHoistIntoPred(BasicBlock * Pred,BasicBlock * LoadBB,LoadInst * Load)1434 LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1435                                            LoadInst *Load) {
1436   // For simplicity we handle a Pred has 2 successors only.
1437   auto *Term = Pred->getTerminator();
1438   if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1439     return nullptr;
1440   auto *SuccBB = Term->getSuccessor(0);
1441   if (SuccBB == LoadBB)
1442     SuccBB = Term->getSuccessor(1);
1443   if (!SuccBB->getSinglePredecessor())
1444     return nullptr;
1445 
1446   unsigned int NumInsts = MaxNumInsnsPerBlock;
1447   for (Instruction &Inst : *SuccBB) {
1448     if (Inst.isDebugOrPseudoInst())
1449       continue;
1450     if (--NumInsts == 0)
1451       return nullptr;
1452 
1453     if (!Inst.isIdenticalTo(Load))
1454       continue;
1455 
1456     MemDepResult Dep = MD->getDependency(&Inst);
1457     // If an identical load doesn't depends on any local instructions, it can
1458     // be safely moved to PredBB.
1459     // Also check for the implicit control flow instructions. See the comments
1460     // in PerformLoadPRE for details.
1461     if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1462       return cast<LoadInst>(&Inst);
1463 
1464     // Otherwise there is something in the same BB clobbers the memory, we can't
1465     // move this and later load to PredBB.
1466     return nullptr;
1467   }
1468 
1469   return nullptr;
1470 }
1471 
eliminatePartiallyRedundantLoad(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,MapVector<BasicBlock *,Value * > & AvailableLoads,MapVector<BasicBlock *,LoadInst * > * CriticalEdgePredAndLoad)1472 void GVNPass::eliminatePartiallyRedundantLoad(
1473     LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1474     MapVector<BasicBlock *, Value *> &AvailableLoads,
1475     MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1476   for (const auto &AvailableLoad : AvailableLoads) {
1477     BasicBlock *UnavailableBlock = AvailableLoad.first;
1478     Value *LoadPtr = AvailableLoad.second;
1479 
1480     auto *NewLoad = new LoadInst(
1481         Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1482         Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1483         UnavailableBlock->getTerminator()->getIterator());
1484     NewLoad->setDebugLoc(Load->getDebugLoc());
1485     if (MSSAU) {
1486       auto *NewAccess = MSSAU->createMemoryAccessInBB(
1487           NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1488       if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1489         MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1490       else
1491         MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1492     }
1493 
1494     // Transfer the old load's AA tags to the new load.
1495     AAMDNodes Tags = Load->getAAMetadata();
1496     if (Tags)
1497       NewLoad->setAAMetadata(Tags);
1498 
1499     if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1500       NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1501     if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1502       NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1503     if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1504       NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1505     if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1506       if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1507         NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1508 
1509     // We do not propagate the old load's debug location, because the new
1510     // load now lives in a different BB, and we want to avoid a jumpy line
1511     // table.
1512     // FIXME: How do we retain source locations without causing poor debugging
1513     // behavior?
1514 
1515     // Add the newly created load.
1516     ValuesPerBlock.push_back(
1517         AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1518     MD->invalidateCachedPointerInfo(LoadPtr);
1519     LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1520 
1521     // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1522     // load instruction with the new created load instruction.
1523     if (CriticalEdgePredAndLoad) {
1524       auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
1525       if (I != CriticalEdgePredAndLoad->end()) {
1526         ++NumPRELoadMoved2CEPred;
1527         ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1528         LoadInst *OldLoad = I->second;
1529         combineMetadataForCSE(NewLoad, OldLoad, false);
1530         OldLoad->replaceAllUsesWith(NewLoad);
1531         replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1532         if (uint32_t ValNo = VN.lookup(OldLoad, false))
1533           LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1534         VN.erase(OldLoad);
1535         removeInstruction(OldLoad);
1536       }
1537     }
1538   }
1539 
1540   // Perform PHI construction.
1541   Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1542   // ConstructSSAForLoadSet is responsible for combining metadata.
1543   ICF->removeUsersOf(Load);
1544   Load->replaceAllUsesWith(V);
1545   if (isa<PHINode>(V))
1546     V->takeName(Load);
1547   if (Instruction *I = dyn_cast<Instruction>(V))
1548     I->setDebugLoc(Load->getDebugLoc());
1549   if (V->getType()->isPtrOrPtrVectorTy())
1550     MD->invalidateCachedPointerInfo(V);
1551   markInstructionForDeletion(Load);
1552   ORE->emit([&]() {
1553     return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1554            << "load eliminated by PRE";
1555   });
1556 }
1557 
PerformLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1558 bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1559                              UnavailBlkVect &UnavailableBlocks) {
1560   // Okay, we have *some* definitions of the value.  This means that the value
1561   // is available in some of our (transitive) predecessors.  Lets think about
1562   // doing PRE of this load.  This will involve inserting a new load into the
1563   // predecessor when it's not available.  We could do this in general, but
1564   // prefer to not increase code size.  As such, we only do this when we know
1565   // that we only have to insert *one* load (which means we're basically moving
1566   // the load, not inserting a new one).
1567 
1568   SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1569                                         UnavailableBlocks.end());
1570 
1571   // Let's find the first basic block with more than one predecessor.  Walk
1572   // backwards through predecessors if needed.
1573   BasicBlock *LoadBB = Load->getParent();
1574   BasicBlock *TmpBB = LoadBB;
1575 
1576   // Check that there is no implicit control flow instructions above our load in
1577   // its block. If there is an instruction that doesn't always pass the
1578   // execution to the following instruction, then moving through it may become
1579   // invalid. For example:
1580   //
1581   // int arr[LEN];
1582   // int index = ???;
1583   // ...
1584   // guard(0 <= index && index < LEN);
1585   // use(arr[index]);
1586   //
1587   // It is illegal to move the array access to any point above the guard,
1588   // because if the index is out of bounds we should deoptimize rather than
1589   // access the array.
1590   // Check that there is no guard in this block above our instruction.
1591   bool MustEnsureSafetyOfSpeculativeExecution =
1592       ICF->isDominatedByICFIFromSameBlock(Load);
1593 
1594   while (TmpBB->getSinglePredecessor()) {
1595     TmpBB = TmpBB->getSinglePredecessor();
1596     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1597       return false;
1598     if (Blockers.count(TmpBB))
1599       return false;
1600 
1601     // If any of these blocks has more than one successor (i.e. if the edge we
1602     // just traversed was critical), then there are other paths through this
1603     // block along which the load may not be anticipated.  Hoisting the load
1604     // above this block would be adding the load to execution paths along
1605     // which it was not previously executed.
1606     if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1607       return false;
1608 
1609     // Check that there is no implicit control flow in a block above.
1610     MustEnsureSafetyOfSpeculativeExecution =
1611         MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1612   }
1613 
1614   assert(TmpBB);
1615   LoadBB = TmpBB;
1616 
1617   // Check to see how many predecessors have the loaded value fully
1618   // available.
1619   MapVector<BasicBlock *, Value *> PredLoads;
1620   DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1621   for (const AvailableValueInBlock &AV : ValuesPerBlock)
1622     FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1623   for (BasicBlock *UnavailableBB : UnavailableBlocks)
1624     FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1625 
1626   // The edge from Pred to LoadBB is a critical edge will be splitted.
1627   SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1628   // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1629   // contains a load can be moved to Pred. This data structure maps the Pred to
1630   // the movable load.
1631   MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1632   for (BasicBlock *Pred : predecessors(LoadBB)) {
1633     // If any predecessor block is an EH pad that does not allow non-PHI
1634     // instructions before the terminator, we can't PRE the load.
1635     if (Pred->getTerminator()->isEHPad()) {
1636       LLVM_DEBUG(
1637           dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1638                  << Pred->getName() << "': " << *Load << '\n');
1639       return false;
1640     }
1641 
1642     if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1643       continue;
1644     }
1645 
1646     if (Pred->getTerminator()->getNumSuccessors() != 1) {
1647       if (isa<IndirectBrInst>(Pred->getTerminator())) {
1648         LLVM_DEBUG(
1649             dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1650                    << Pred->getName() << "': " << *Load << '\n');
1651         return false;
1652       }
1653 
1654       if (LoadBB->isEHPad()) {
1655         LLVM_DEBUG(
1656             dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1657                    << Pred->getName() << "': " << *Load << '\n');
1658         return false;
1659       }
1660 
1661       // Do not split backedge as it will break the canonical loop form.
1662       if (!isLoadPRESplitBackedgeEnabled())
1663         if (DT->dominates(LoadBB, Pred)) {
1664           LLVM_DEBUG(
1665               dbgs()
1666               << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1667               << Pred->getName() << "': " << *Load << '\n');
1668           return false;
1669         }
1670 
1671       if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1672         CriticalEdgePredAndLoad[Pred] = LI;
1673       else
1674         CriticalEdgePredSplit.push_back(Pred);
1675     } else {
1676       // Only add the predecessors that will not be split for now.
1677       PredLoads[Pred] = nullptr;
1678     }
1679   }
1680 
1681   // Decide whether PRE is profitable for this load.
1682   unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1683   unsigned NumUnavailablePreds = NumInsertPreds +
1684       CriticalEdgePredAndLoad.size();
1685   assert(NumUnavailablePreds != 0 &&
1686          "Fully available value should already be eliminated!");
1687   (void)NumUnavailablePreds;
1688 
1689   // If we need to insert new load in multiple predecessors, reject it.
1690   // FIXME: If we could restructure the CFG, we could make a common pred with
1691   // all the preds that don't have an available Load and insert a new load into
1692   // that one block.
1693   if (NumInsertPreds > 1)
1694       return false;
1695 
1696   // Now we know where we will insert load. We must ensure that it is safe
1697   // to speculatively execute the load at that points.
1698   if (MustEnsureSafetyOfSpeculativeExecution) {
1699     if (CriticalEdgePredSplit.size())
1700       if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
1701         return false;
1702     for (auto &PL : PredLoads)
1703       if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1704                                         DT))
1705         return false;
1706     for (auto &CEP : CriticalEdgePredAndLoad)
1707       if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1708                                         DT))
1709         return false;
1710   }
1711 
1712   // Split critical edges, and update the unavailable predecessors accordingly.
1713   for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1714     BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1715     assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1716     PredLoads[NewPred] = nullptr;
1717     LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1718                       << LoadBB->getName() << '\n');
1719   }
1720 
1721   for (auto &CEP : CriticalEdgePredAndLoad)
1722     PredLoads[CEP.first] = nullptr;
1723 
1724   // Check if the load can safely be moved to all the unavailable predecessors.
1725   bool CanDoPRE = true;
1726   const DataLayout &DL = Load->getDataLayout();
1727   SmallVector<Instruction*, 8> NewInsts;
1728   for (auto &PredLoad : PredLoads) {
1729     BasicBlock *UnavailablePred = PredLoad.first;
1730 
1731     // Do PHI translation to get its value in the predecessor if necessary.  The
1732     // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1733     // We do the translation for each edge we skipped by going from Load's block
1734     // to LoadBB, otherwise we might miss pieces needing translation.
1735 
1736     // If all preds have a single successor, then we know it is safe to insert
1737     // the load on the pred (?!?), so we can insert code to materialize the
1738     // pointer if it is not available.
1739     Value *LoadPtr = Load->getPointerOperand();
1740     BasicBlock *Cur = Load->getParent();
1741     while (Cur != LoadBB) {
1742       PHITransAddr Address(LoadPtr, DL, AC);
1743       LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1744                                                *DT, NewInsts);
1745       if (!LoadPtr) {
1746         CanDoPRE = false;
1747         break;
1748       }
1749       Cur = Cur->getSinglePredecessor();
1750     }
1751 
1752     if (LoadPtr) {
1753       PHITransAddr Address(LoadPtr, DL, AC);
1754       LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1755                                                NewInsts);
1756     }
1757     // If we couldn't find or insert a computation of this phi translated value,
1758     // we fail PRE.
1759     if (!LoadPtr) {
1760       LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1761                         << *Load->getPointerOperand() << "\n");
1762       CanDoPRE = false;
1763       break;
1764     }
1765 
1766     PredLoad.second = LoadPtr;
1767   }
1768 
1769   if (!CanDoPRE) {
1770     while (!NewInsts.empty()) {
1771       // Erase instructions generated by the failed PHI translation before
1772       // trying to number them. PHI translation might insert instructions
1773       // in basic blocks other than the current one, and we delete them
1774       // directly, as markInstructionForDeletion only allows removing from the
1775       // current basic block.
1776       NewInsts.pop_back_val()->eraseFromParent();
1777     }
1778     // HINT: Don't revert the edge-splitting as following transformation may
1779     // also need to split these critical edges.
1780     return !CriticalEdgePredSplit.empty();
1781   }
1782 
1783   // Okay, we can eliminate this load by inserting a reload in the predecessor
1784   // and using PHI construction to get the value in the other predecessors, do
1785   // it.
1786   LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1787   LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1788                                            << " INSTS: " << *NewInsts.back()
1789                                            << '\n');
1790 
1791   // Assign value numbers to the new instructions.
1792   for (Instruction *I : NewInsts) {
1793     // Instructions that have been inserted in predecessor(s) to materialize
1794     // the load address do not retain their original debug locations. Doing
1795     // so could lead to confusing (but correct) source attributions.
1796     I->updateLocationAfterHoist();
1797 
1798     // FIXME: We really _ought_ to insert these value numbers into their
1799     // parent's availability map.  However, in doing so, we risk getting into
1800     // ordering issues.  If a block hasn't been processed yet, we would be
1801     // marking a value as AVAIL-IN, which isn't what we intend.
1802     VN.lookupOrAdd(I);
1803   }
1804 
1805   eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1806                                   &CriticalEdgePredAndLoad);
1807   ++NumPRELoad;
1808   return true;
1809 }
1810 
performLoopLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1811 bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1812                                  AvailValInBlkVect &ValuesPerBlock,
1813                                  UnavailBlkVect &UnavailableBlocks) {
1814   const Loop *L = LI->getLoopFor(Load->getParent());
1815   // TODO: Generalize to other loop blocks that dominate the latch.
1816   if (!L || L->getHeader() != Load->getParent())
1817     return false;
1818 
1819   BasicBlock *Preheader = L->getLoopPreheader();
1820   BasicBlock *Latch = L->getLoopLatch();
1821   if (!Preheader || !Latch)
1822     return false;
1823 
1824   Value *LoadPtr = Load->getPointerOperand();
1825   // Must be available in preheader.
1826   if (!L->isLoopInvariant(LoadPtr))
1827     return false;
1828 
1829   // We plan to hoist the load to preheader without introducing a new fault.
1830   // In order to do it, we need to prove that we cannot side-exit the loop
1831   // once loop header is first entered before execution of the load.
1832   if (ICF->isDominatedByICFIFromSameBlock(Load))
1833     return false;
1834 
1835   BasicBlock *LoopBlock = nullptr;
1836   for (auto *Blocker : UnavailableBlocks) {
1837     // Blockers from outside the loop are handled in preheader.
1838     if (!L->contains(Blocker))
1839       continue;
1840 
1841     // Only allow one loop block. Loop header is not less frequently executed
1842     // than each loop block, and likely it is much more frequently executed. But
1843     // in case of multiple loop blocks, we need extra information (such as block
1844     // frequency info) to understand whether it is profitable to PRE into
1845     // multiple loop blocks.
1846     if (LoopBlock)
1847       return false;
1848 
1849     // Do not sink into inner loops. This may be non-profitable.
1850     if (L != LI->getLoopFor(Blocker))
1851       return false;
1852 
1853     // Blocks that dominate the latch execute on every single iteration, maybe
1854     // except the last one. So PREing into these blocks doesn't make much sense
1855     // in most cases. But the blocks that do not necessarily execute on each
1856     // iteration are sometimes much colder than the header, and this is when
1857     // PRE is potentially profitable.
1858     if (DT->dominates(Blocker, Latch))
1859       return false;
1860 
1861     // Make sure that the terminator itself doesn't clobber.
1862     if (Blocker->getTerminator()->mayWriteToMemory())
1863       return false;
1864 
1865     LoopBlock = Blocker;
1866   }
1867 
1868   if (!LoopBlock)
1869     return false;
1870 
1871   // Make sure the memory at this pointer cannot be freed, therefore we can
1872   // safely reload from it after clobber.
1873   if (LoadPtr->canBeFreed())
1874     return false;
1875 
1876   // TODO: Support critical edge splitting if blocker has more than 1 successor.
1877   MapVector<BasicBlock *, Value *> AvailableLoads;
1878   AvailableLoads[LoopBlock] = LoadPtr;
1879   AvailableLoads[Preheader] = LoadPtr;
1880 
1881   LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1882   eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1883                                   /*CriticalEdgePredAndLoad*/ nullptr);
1884   ++NumPRELoopLoad;
1885   return true;
1886 }
1887 
reportLoadElim(LoadInst * Load,Value * AvailableValue,OptimizationRemarkEmitter * ORE)1888 static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1889                            OptimizationRemarkEmitter *ORE) {
1890   using namespace ore;
1891 
1892   ORE->emit([&]() {
1893     return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1894            << "load of type " << NV("Type", Load->getType()) << " eliminated"
1895            << setExtraArgs() << " in favor of "
1896            << NV("InfavorOfValue", AvailableValue);
1897   });
1898 }
1899 
1900 /// Attempt to eliminate a load whose dependencies are
1901 /// non-local by performing PHI construction.
processNonLocalLoad(LoadInst * Load)1902 bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1903   // non-local speculations are not allowed under asan.
1904   if (Load->getParent()->getParent()->hasFnAttribute(
1905           Attribute::SanitizeAddress) ||
1906       Load->getParent()->getParent()->hasFnAttribute(
1907           Attribute::SanitizeHWAddress))
1908     return false;
1909 
1910   // Step 1: Find the non-local dependencies of the load.
1911   LoadDepVect Deps;
1912   MD->getNonLocalPointerDependency(Load, Deps);
1913 
1914   // If we had to process more than one hundred blocks to find the
1915   // dependencies, this load isn't worth worrying about.  Optimizing
1916   // it will be too expensive.
1917   unsigned NumDeps = Deps.size();
1918   if (NumDeps > MaxNumDeps)
1919     return false;
1920 
1921   // If we had a phi translation failure, we'll have a single entry which is a
1922   // clobber in the current block.  Reject this early.
1923   if (NumDeps == 1 &&
1924       !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1925     LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
1926                dbgs() << " has unknown dependencies\n";);
1927     return false;
1928   }
1929 
1930   bool Changed = false;
1931   // If this load follows a GEP, see if we can PRE the indices before analyzing.
1932   if (GetElementPtrInst *GEP =
1933           dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
1934     for (Use &U : GEP->indices())
1935       if (Instruction *I = dyn_cast<Instruction>(U.get()))
1936         Changed |= performScalarPRE(I);
1937   }
1938 
1939   // Step 2: Analyze the availability of the load
1940   AvailValInBlkVect ValuesPerBlock;
1941   UnavailBlkVect UnavailableBlocks;
1942   AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
1943 
1944   // If we have no predecessors that produce a known value for this load, exit
1945   // early.
1946   if (ValuesPerBlock.empty())
1947     return Changed;
1948 
1949   // Step 3: Eliminate fully redundancy.
1950   //
1951   // If all of the instructions we depend on produce a known value for this
1952   // load, then it is fully redundant and we can use PHI insertion to compute
1953   // its value.  Insert PHIs and remove the fully redundant value now.
1954   if (UnavailableBlocks.empty()) {
1955     LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
1956 
1957     // Perform PHI construction.
1958     Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1959     // ConstructSSAForLoadSet is responsible for combining metadata.
1960     ICF->removeUsersOf(Load);
1961     Load->replaceAllUsesWith(V);
1962 
1963     if (isa<PHINode>(V))
1964       V->takeName(Load);
1965     if (Instruction *I = dyn_cast<Instruction>(V))
1966       // If instruction I has debug info, then we should not update it.
1967       // Also, if I has a null DebugLoc, then it is still potentially incorrect
1968       // to propagate Load's DebugLoc because Load may not post-dominate I.
1969       if (Load->getDebugLoc() && Load->getParent() == I->getParent())
1970         I->setDebugLoc(Load->getDebugLoc());
1971     if (V->getType()->isPtrOrPtrVectorTy())
1972       MD->invalidateCachedPointerInfo(V);
1973     markInstructionForDeletion(Load);
1974     ++NumGVNLoad;
1975     reportLoadElim(Load, V, ORE);
1976     return true;
1977   }
1978 
1979   // Step 4: Eliminate partial redundancy.
1980   if (!isPREEnabled() || !isLoadPREEnabled())
1981     return Changed;
1982   if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
1983     return Changed;
1984 
1985   if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
1986       PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
1987     return true;
1988 
1989   return Changed;
1990 }
1991 
impliesEquivalanceIfTrue(CmpInst * Cmp)1992 static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
1993   if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
1994     return true;
1995 
1996   // Floating point comparisons can be equal, but not equivalent.  Cases:
1997   // NaNs for unordered operators
1998   // +0.0 vs 0.0 for all operators
1999   if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
2000       (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
2001        Cmp->getFastMathFlags().noNaNs())) {
2002       Value *LHS = Cmp->getOperand(0);
2003       Value *RHS = Cmp->getOperand(1);
2004       // If we can prove either side non-zero, then equality must imply
2005       // equivalence.
2006       // FIXME: We should do this optimization if 'no signed zeros' is
2007       // applicable via an instruction-level fast-math-flag or some other
2008       // indicator that relaxed FP semantics are being used.
2009       if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2010         return true;
2011       if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2012         return true;
2013       // TODO: Handle vector floating point constants
2014   }
2015   return false;
2016 }
2017 
impliesEquivalanceIfFalse(CmpInst * Cmp)2018 static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
2019   if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
2020     return true;
2021 
2022   // Floating point comparisons can be equal, but not equivelent.  Cases:
2023   // NaNs for unordered operators
2024   // +0.0 vs 0.0 for all operators
2025   if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
2026        Cmp->getFastMathFlags().noNaNs()) ||
2027       Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
2028       Value *LHS = Cmp->getOperand(0);
2029       Value *RHS = Cmp->getOperand(1);
2030       // If we can prove either side non-zero, then equality must imply
2031       // equivalence.
2032       // FIXME: We should do this optimization if 'no signed zeros' is
2033       // applicable via an instruction-level fast-math-flag or some other
2034       // indicator that relaxed FP semantics are being used.
2035       if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2036         return true;
2037       if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2038         return true;
2039       // TODO: Handle vector floating point constants
2040   }
2041   return false;
2042 }
2043 
2044 
hasUsersIn(Value * V,BasicBlock * BB)2045 static bool hasUsersIn(Value *V, BasicBlock *BB) {
2046   return llvm::any_of(V->users(), [BB](User *U) {
2047     auto *I = dyn_cast<Instruction>(U);
2048     return I && I->getParent() == BB;
2049   });
2050 }
2051 
processAssumeIntrinsic(AssumeInst * IntrinsicI)2052 bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2053   Value *V = IntrinsicI->getArgOperand(0);
2054 
2055   if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2056     if (Cond->isZero()) {
2057       Type *Int8Ty = Type::getInt8Ty(V->getContext());
2058       Type *PtrTy = PointerType::get(V->getContext(), 0);
2059       // Insert a new store to null instruction before the load to indicate that
2060       // this code is not reachable.  FIXME: We could insert unreachable
2061       // instruction directly because we can modify the CFG.
2062       auto *NewS =
2063           new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
2064                         IntrinsicI->getIterator());
2065       if (MSSAU) {
2066         const MemoryUseOrDef *FirstNonDom = nullptr;
2067         const auto *AL =
2068             MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2069 
2070         // If there are accesses in the current basic block, find the first one
2071         // that does not come before NewS. The new memory access is inserted
2072         // after the found access or before the terminator if no such access is
2073         // found.
2074         if (AL) {
2075           for (const auto &Acc : *AL) {
2076             if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2077               if (!Current->getMemoryInst()->comesBefore(NewS)) {
2078                 FirstNonDom = Current;
2079                 break;
2080               }
2081           }
2082         }
2083 
2084         auto *NewDef =
2085             FirstNonDom ? MSSAU->createMemoryAccessBefore(
2086                               NewS, nullptr,
2087                               const_cast<MemoryUseOrDef *>(FirstNonDom))
2088                         : MSSAU->createMemoryAccessInBB(
2089                               NewS, nullptr,
2090                               NewS->getParent(), MemorySSA::BeforeTerminator);
2091 
2092         MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2093       }
2094     }
2095     if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2096       markInstructionForDeletion(IntrinsicI);
2097       return true;
2098     }
2099     return false;
2100   }
2101 
2102   if (isa<Constant>(V)) {
2103     // If it's not false, and constant, it must evaluate to true. This means our
2104     // assume is assume(true), and thus, pointless, and we don't want to do
2105     // anything more here.
2106     return false;
2107   }
2108 
2109   Constant *True = ConstantInt::getTrue(V->getContext());
2110   bool Changed = false;
2111 
2112   for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
2113     BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
2114 
2115     // This property is only true in dominated successors, propagateEquality
2116     // will check dominance for us.
2117     Changed |= propagateEquality(V, True, Edge, false);
2118   }
2119 
2120   // We can replace assume value with true, which covers cases like this:
2121   // call void @llvm.assume(i1 %cmp)
2122   // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2123   ReplaceOperandsWithMap[V] = True;
2124 
2125   // Similarly, after assume(!NotV) we know that NotV == false.
2126   Value *NotV;
2127   if (match(V, m_Not(m_Value(NotV))))
2128     ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
2129 
2130   // If we find an equality fact, canonicalize all dominated uses in this block
2131   // to one of the two values.  We heuristically choice the "oldest" of the
2132   // two where age is determined by value number. (Note that propagateEquality
2133   // above handles the cross block case.)
2134   //
2135   // Key case to cover are:
2136   // 1)
2137   // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2138   // call void @llvm.assume(i1 %cmp)
2139   // ret float %0 ; will change it to ret float 3.000000e+00
2140   // 2)
2141   // %load = load float, float* %addr
2142   // %cmp = fcmp oeq float %load, %0
2143   // call void @llvm.assume(i1 %cmp)
2144   // ret float %load ; will change it to ret float %0
2145   if (auto *CmpI = dyn_cast<CmpInst>(V)) {
2146     if (impliesEquivalanceIfTrue(CmpI)) {
2147       Value *CmpLHS = CmpI->getOperand(0);
2148       Value *CmpRHS = CmpI->getOperand(1);
2149       // Heuristically pick the better replacement -- the choice of heuristic
2150       // isn't terribly important here, but the fact we canonicalize on some
2151       // replacement is for exposing other simplifications.
2152       // TODO: pull this out as a helper function and reuse w/existing
2153       // (slightly different) logic.
2154       if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
2155         std::swap(CmpLHS, CmpRHS);
2156       if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
2157         std::swap(CmpLHS, CmpRHS);
2158       if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
2159           (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
2160         // Move the 'oldest' value to the right-hand side, using the value
2161         // number as a proxy for age.
2162         uint32_t LVN = VN.lookupOrAdd(CmpLHS);
2163         uint32_t RVN = VN.lookupOrAdd(CmpRHS);
2164         if (LVN < RVN)
2165           std::swap(CmpLHS, CmpRHS);
2166       }
2167 
2168       // Handle degenerate case where we either haven't pruned a dead path or a
2169       // removed a trivial assume yet.
2170       if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
2171         return Changed;
2172 
2173       LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2174                  << *CmpLHS << " with "
2175                  << *CmpRHS << " in block "
2176                  << IntrinsicI->getParent()->getName() << "\n");
2177 
2178 
2179       // Setup the replacement map - this handles uses within the same block
2180       if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
2181         ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
2182 
2183       // NOTE: The non-block local cases are handled by the call to
2184       // propagateEquality above; this block is just about handling the block
2185       // local cases.  TODO: There's a bunch of logic in propagateEqualiy which
2186       // isn't duplicated for the block local case, can we share it somehow?
2187     }
2188   }
2189   return Changed;
2190 }
2191 
patchAndReplaceAllUsesWith(Instruction * I,Value * Repl)2192 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2193   patchReplacementInstruction(I, Repl);
2194   I->replaceAllUsesWith(Repl);
2195 }
2196 
2197 /// Attempt to eliminate a load, first by eliminating it
2198 /// locally, and then attempting non-local elimination if that fails.
processLoad(LoadInst * L)2199 bool GVNPass::processLoad(LoadInst *L) {
2200   if (!MD)
2201     return false;
2202 
2203   // This code hasn't been audited for ordered or volatile memory access
2204   if (!L->isUnordered())
2205     return false;
2206 
2207   if (L->use_empty()) {
2208     markInstructionForDeletion(L);
2209     return true;
2210   }
2211 
2212   // ... to a pointer that has been loaded from before...
2213   MemDepResult Dep = MD->getDependency(L);
2214 
2215   // If it is defined in another block, try harder.
2216   if (Dep.isNonLocal())
2217     return processNonLocalLoad(L);
2218 
2219   // Only handle the local case below
2220   if (!Dep.isLocal()) {
2221     // This might be a NonFuncLocal or an Unknown
2222     LLVM_DEBUG(
2223         // fast print dep, using operator<< on instruction is too slow.
2224         dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2225         dbgs() << " has unknown dependence\n";);
2226     return false;
2227   }
2228 
2229   auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand());
2230   if (!AV)
2231     return false;
2232 
2233   Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
2234 
2235   // MaterializeAdjustedValue is responsible for combining metadata.
2236   ICF->removeUsersOf(L);
2237   L->replaceAllUsesWith(AvailableValue);
2238   markInstructionForDeletion(L);
2239   if (MSSAU)
2240     MSSAU->removeMemoryAccess(L);
2241   ++NumGVNLoad;
2242   reportLoadElim(L, AvailableValue, ORE);
2243   // Tell MDA to reexamine the reused pointer since we might have more
2244   // information after forwarding it.
2245   if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2246     MD->invalidateCachedPointerInfo(AvailableValue);
2247   return true;
2248 }
2249 
2250 /// Return a pair the first field showing the value number of \p Exp and the
2251 /// second field showing whether it is a value number newly created.
2252 std::pair<uint32_t, bool>
assignExpNewValueNum(Expression & Exp)2253 GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2254   uint32_t &e = expressionNumbering[Exp];
2255   bool CreateNewValNum = !e;
2256   if (CreateNewValNum) {
2257     Expressions.push_back(Exp);
2258     if (ExprIdx.size() < nextValueNumber + 1)
2259       ExprIdx.resize(nextValueNumber * 2);
2260     e = nextValueNumber;
2261     ExprIdx[nextValueNumber++] = nextExprNumber++;
2262   }
2263   return {e, CreateNewValNum};
2264 }
2265 
2266 /// Return whether all the values related with the same \p num are
2267 /// defined in \p BB.
areAllValsInBB(uint32_t Num,const BasicBlock * BB,GVNPass & Gvn)2268 bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2269                                          GVNPass &Gvn) {
2270   return all_of(
2271       Gvn.LeaderTable.getLeaders(Num),
2272       [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2273 }
2274 
2275 /// Wrap phiTranslateImpl to provide caching functionality.
phiTranslate(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2276 uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2277                                            const BasicBlock *PhiBlock,
2278                                            uint32_t Num, GVNPass &Gvn) {
2279   auto FindRes = PhiTranslateTable.find({Num, Pred});
2280   if (FindRes != PhiTranslateTable.end())
2281     return FindRes->second;
2282   uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
2283   PhiTranslateTable.insert({{Num, Pred}, NewNum});
2284   return NewNum;
2285 }
2286 
2287 // Return true if the value number \p Num and NewNum have equal value.
2288 // Return false if the result is unknown.
areCallValsEqual(uint32_t Num,uint32_t NewNum,const BasicBlock * Pred,const BasicBlock * PhiBlock,GVNPass & Gvn)2289 bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2290                                            const BasicBlock *Pred,
2291                                            const BasicBlock *PhiBlock,
2292                                            GVNPass &Gvn) {
2293   CallInst *Call = nullptr;
2294   auto Leaders = Gvn.LeaderTable.getLeaders(Num);
2295   for (const auto &Entry : Leaders) {
2296     Call = dyn_cast<CallInst>(Entry.Val);
2297     if (Call && Call->getParent() == PhiBlock)
2298       break;
2299   }
2300 
2301   if (AA->doesNotAccessMemory(Call))
2302     return true;
2303 
2304   if (!MD || !AA->onlyReadsMemory(Call))
2305     return false;
2306 
2307   MemDepResult local_dep = MD->getDependency(Call);
2308   if (!local_dep.isNonLocal())
2309     return false;
2310 
2311   const MemoryDependenceResults::NonLocalDepInfo &deps =
2312       MD->getNonLocalCallDependency(Call);
2313 
2314   // Check to see if the Call has no function local clobber.
2315   for (const NonLocalDepEntry &D : deps) {
2316     if (D.getResult().isNonFuncLocal())
2317       return true;
2318   }
2319   return false;
2320 }
2321 
2322 /// Translate value number \p Num using phis, so that it has the values of
2323 /// the phis in BB.
phiTranslateImpl(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2324 uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2325                                                const BasicBlock *PhiBlock,
2326                                                uint32_t Num, GVNPass &Gvn) {
2327   if (PHINode *PN = NumberingPhi[Num]) {
2328     for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
2329       if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
2330         if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
2331           return TransVal;
2332     }
2333     return Num;
2334   }
2335 
2336   // If there is any value related with Num is defined in a BB other than
2337   // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2338   // a backedge. We can do an early exit in that case to save compile time.
2339   if (!areAllValsInBB(Num, PhiBlock, Gvn))
2340     return Num;
2341 
2342   if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2343     return Num;
2344   Expression Exp = Expressions[ExprIdx[Num]];
2345 
2346   for (unsigned i = 0; i < Exp.varargs.size(); i++) {
2347     // For InsertValue and ExtractValue, some varargs are index numbers
2348     // instead of value numbers. Those index numbers should not be
2349     // translated.
2350     if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
2351         (i > 0 && Exp.opcode == Instruction::ExtractValue) ||
2352         (i > 1 && Exp.opcode == Instruction::ShuffleVector))
2353       continue;
2354     Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
2355   }
2356 
2357   if (Exp.commutative) {
2358     assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
2359     if (Exp.varargs[0] > Exp.varargs[1]) {
2360       std::swap(Exp.varargs[0], Exp.varargs[1]);
2361       uint32_t Opcode = Exp.opcode >> 8;
2362       if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2363         Exp.opcode = (Opcode << 8) |
2364                      CmpInst::getSwappedPredicate(
2365                          static_cast<CmpInst::Predicate>(Exp.opcode & 255));
2366     }
2367   }
2368 
2369   if (uint32_t NewNum = expressionNumbering[Exp]) {
2370     if (Exp.opcode == Instruction::Call && NewNum != Num)
2371       return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num;
2372     return NewNum;
2373   }
2374   return Num;
2375 }
2376 
2377 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2378 /// again.
eraseTranslateCacheEntry(uint32_t Num,const BasicBlock & CurrBlock)2379 void GVNPass::ValueTable::eraseTranslateCacheEntry(
2380     uint32_t Num, const BasicBlock &CurrBlock) {
2381   for (const BasicBlock *Pred : predecessors(&CurrBlock))
2382     PhiTranslateTable.erase({Num, Pred});
2383 }
2384 
2385 // In order to find a leader for a given value number at a
2386 // specific basic block, we first obtain the list of all Values for that number,
2387 // and then scan the list to find one whose block dominates the block in
2388 // question.  This is fast because dominator tree queries consist of only
2389 // a few comparisons of DFS numbers.
findLeader(const BasicBlock * BB,uint32_t num)2390 Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
2391   auto Leaders = LeaderTable.getLeaders(num);
2392   if (Leaders.empty())
2393     return nullptr;
2394 
2395   Value *Val = nullptr;
2396   for (const auto &Entry : Leaders) {
2397     if (DT->dominates(Entry.BB, BB)) {
2398       Val = Entry.Val;
2399       if (isa<Constant>(Val))
2400         return Val;
2401     }
2402   }
2403 
2404   return Val;
2405 }
2406 
2407 /// There is an edge from 'Src' to 'Dst'.  Return
2408 /// true if every path from the entry block to 'Dst' passes via this edge.  In
2409 /// particular 'Dst' must not be reachable via another edge from 'Src'.
isOnlyReachableViaThisEdge(const BasicBlockEdge & E,DominatorTree * DT)2410 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2411                                        DominatorTree *DT) {
2412   // While in theory it is interesting to consider the case in which Dst has
2413   // more than one predecessor, because Dst might be part of a loop which is
2414   // only reachable from Src, in practice it is pointless since at the time
2415   // GVN runs all such loops have preheaders, which means that Dst will have
2416   // been changed to have only one predecessor, namely Src.
2417   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2418   assert((!Pred || Pred == E.getStart()) &&
2419          "No edge between these basic blocks!");
2420   return Pred != nullptr;
2421 }
2422 
assignBlockRPONumber(Function & F)2423 void GVNPass::assignBlockRPONumber(Function &F) {
2424   BlockRPONumber.clear();
2425   uint32_t NextBlockNumber = 1;
2426   ReversePostOrderTraversal<Function *> RPOT(&F);
2427   for (BasicBlock *BB : RPOT)
2428     BlockRPONumber[BB] = NextBlockNumber++;
2429   InvalidBlockRPONumbers = false;
2430 }
2431 
replaceOperandsForInBlockEquality(Instruction * Instr) const2432 bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
2433   bool Changed = false;
2434   for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
2435     Value *Operand = Instr->getOperand(OpNum);
2436     auto it = ReplaceOperandsWithMap.find(Operand);
2437     if (it != ReplaceOperandsWithMap.end()) {
2438       LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
2439                         << *it->second << " in instruction " << *Instr << '\n');
2440       Instr->setOperand(OpNum, it->second);
2441       Changed = true;
2442     }
2443   }
2444   return Changed;
2445 }
2446 
2447 /// The given values are known to be equal in every block
2448 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
2449 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
2450 /// If DominatesByEdge is false, then it means that we will propagate the RHS
2451 /// value starting from the end of Root.Start.
propagateEquality(Value * LHS,Value * RHS,const BasicBlockEdge & Root,bool DominatesByEdge)2452 bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
2453                                 const BasicBlockEdge &Root,
2454                                 bool DominatesByEdge) {
2455   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2456   Worklist.push_back(std::make_pair(LHS, RHS));
2457   bool Changed = false;
2458   // For speed, compute a conservative fast approximation to
2459   // DT->dominates(Root, Root.getEnd());
2460   const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
2461 
2462   while (!Worklist.empty()) {
2463     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2464     LHS = Item.first; RHS = Item.second;
2465 
2466     if (LHS == RHS)
2467       continue;
2468     assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2469 
2470     // Don't try to propagate equalities between constants.
2471     if (isa<Constant>(LHS) && isa<Constant>(RHS))
2472       continue;
2473 
2474     // Prefer a constant on the right-hand side, or an Argument if no constants.
2475     if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
2476       std::swap(LHS, RHS);
2477     assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2478     const DataLayout &DL =
2479         isa<Argument>(LHS)
2480             ? cast<Argument>(LHS)->getParent()->getDataLayout()
2481             : cast<Instruction>(LHS)->getDataLayout();
2482 
2483     // If there is no obvious reason to prefer the left-hand side over the
2484     // right-hand side, ensure the longest lived term is on the right-hand side,
2485     // so the shortest lived term will be replaced by the longest lived.
2486     // This tends to expose more simplifications.
2487     uint32_t LVN = VN.lookupOrAdd(LHS);
2488     if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
2489         (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
2490       // Move the 'oldest' value to the right-hand side, using the value number
2491       // as a proxy for age.
2492       uint32_t RVN = VN.lookupOrAdd(RHS);
2493       if (LVN < RVN) {
2494         std::swap(LHS, RHS);
2495         LVN = RVN;
2496       }
2497     }
2498 
2499     // If value numbering later sees that an instruction in the scope is equal
2500     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
2501     // the invariant that instructions only occur in the leader table for their
2502     // own value number (this is used by removeFromLeaderTable), do not do this
2503     // if RHS is an instruction (if an instruction in the scope is morphed into
2504     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2505     // using the leader table is about compiling faster, not optimizing better).
2506     // The leader table only tracks basic blocks, not edges. Only add to if we
2507     // have the simple case where the edge dominates the end.
2508     if (RootDominatesEnd && !isa<Instruction>(RHS) &&
2509         canReplacePointersIfEqual(LHS, RHS, DL))
2510       LeaderTable.insert(LVN, RHS, Root.getEnd());
2511 
2512     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
2513     // LHS always has at least one use that is not dominated by Root, this will
2514     // never do anything if LHS has only one use.
2515     if (!LHS->hasOneUse()) {
2516       // Create a callback that captures the DL.
2517       auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2518         return canReplacePointersInUseIfEqual(U, To, DL);
2519       };
2520       unsigned NumReplacements =
2521           DominatesByEdge
2522               ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
2523                                            canReplacePointersCallBack)
2524               : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
2525                                            canReplacePointersCallBack);
2526 
2527       if (NumReplacements > 0) {
2528         Changed = true;
2529         NumGVNEqProp += NumReplacements;
2530         // Cached information for anything that uses LHS will be invalid.
2531         if (MD)
2532           MD->invalidateCachedPointerInfo(LHS);
2533       }
2534     }
2535 
2536     // Now try to deduce additional equalities from this one. For example, if
2537     // the known equality was "(A != B)" == "false" then it follows that A and B
2538     // are equal in the scope. Only boolean equalities with an explicit true or
2539     // false RHS are currently supported.
2540     if (!RHS->getType()->isIntegerTy(1))
2541       // Not a boolean equality - bail out.
2542       continue;
2543     ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
2544     if (!CI)
2545       // RHS neither 'true' nor 'false' - bail out.
2546       continue;
2547     // Whether RHS equals 'true'.  Otherwise it equals 'false'.
2548     bool isKnownTrue = CI->isMinusOne();
2549     bool isKnownFalse = !isKnownTrue;
2550 
2551     // If "A && B" is known true then both A and B are known true.  If "A || B"
2552     // is known false then both A and B are known false.
2553     Value *A, *B;
2554     if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
2555         (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
2556       Worklist.push_back(std::make_pair(A, RHS));
2557       Worklist.push_back(std::make_pair(B, RHS));
2558       continue;
2559     }
2560 
2561     // If we are propagating an equality like "(A == B)" == "true" then also
2562     // propagate the equality A == B.  When propagating a comparison such as
2563     // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2564     if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
2565       Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
2566 
2567       // If "A == B" is known true, or "A != B" is known false, then replace
2568       // A with B everywhere in the scope.  For floating point operations, we
2569       // have to be careful since equality does not always imply equivalance.
2570       if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
2571           (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
2572         Worklist.push_back(std::make_pair(Op0, Op1));
2573 
2574       // If "A >= B" is known true, replace "A < B" with false everywhere.
2575       CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2576       Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
2577       // Since we don't have the instruction "A < B" immediately to hand, work
2578       // out the value number that it would have and use that to find an
2579       // appropriate instruction (if any).
2580       uint32_t NextNum = VN.getNextUnusedValueNumber();
2581       uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
2582       // If the number we were assigned was brand new then there is no point in
2583       // looking for an instruction realizing it: there cannot be one!
2584       if (Num < NextNum) {
2585         Value *NotCmp = findLeader(Root.getEnd(), Num);
2586         if (NotCmp && isa<Instruction>(NotCmp)) {
2587           unsigned NumReplacements =
2588               DominatesByEdge
2589                   ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
2590                   : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
2591                                              Root.getStart());
2592           Changed |= NumReplacements > 0;
2593           NumGVNEqProp += NumReplacements;
2594           // Cached information for anything that uses NotCmp will be invalid.
2595           if (MD)
2596             MD->invalidateCachedPointerInfo(NotCmp);
2597         }
2598       }
2599       // Ensure that any instruction in scope that gets the "A < B" value number
2600       // is replaced with false.
2601       // The leader table only tracks basic blocks, not edges. Only add to if we
2602       // have the simple case where the edge dominates the end.
2603       if (RootDominatesEnd)
2604         LeaderTable.insert(Num, NotVal, Root.getEnd());
2605 
2606       continue;
2607     }
2608   }
2609 
2610   return Changed;
2611 }
2612 
2613 /// When calculating availability, handle an instruction
2614 /// by inserting it into the appropriate sets
processInstruction(Instruction * I)2615 bool GVNPass::processInstruction(Instruction *I) {
2616   // Ignore dbg info intrinsics.
2617   if (isa<DbgInfoIntrinsic>(I))
2618     return false;
2619 
2620   // If the instruction can be easily simplified then do so now in preference
2621   // to value numbering it.  Value numbering often exposes redundancies, for
2622   // example if it determines that %y is equal to %x then the instruction
2623   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2624   const DataLayout &DL = I->getDataLayout();
2625   if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
2626     bool Changed = false;
2627     if (!I->use_empty()) {
2628       // Simplification can cause a special instruction to become not special.
2629       // For example, devirtualization to a willreturn function.
2630       ICF->removeUsersOf(I);
2631       I->replaceAllUsesWith(V);
2632       Changed = true;
2633     }
2634     if (isInstructionTriviallyDead(I, TLI)) {
2635       markInstructionForDeletion(I);
2636       Changed = true;
2637     }
2638     if (Changed) {
2639       if (MD && V->getType()->isPtrOrPtrVectorTy())
2640         MD->invalidateCachedPointerInfo(V);
2641       ++NumGVNSimpl;
2642       return true;
2643     }
2644   }
2645 
2646   if (auto *Assume = dyn_cast<AssumeInst>(I))
2647     return processAssumeIntrinsic(Assume);
2648 
2649   if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
2650     if (processLoad(Load))
2651       return true;
2652 
2653     unsigned Num = VN.lookupOrAdd(Load);
2654     LeaderTable.insert(Num, Load, Load->getParent());
2655     return false;
2656   }
2657 
2658   // For conditional branches, we can perform simple conditional propagation on
2659   // the condition value itself.
2660   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
2661     if (!BI->isConditional())
2662       return false;
2663 
2664     if (isa<Constant>(BI->getCondition()))
2665       return processFoldableCondBr(BI);
2666 
2667     Value *BranchCond = BI->getCondition();
2668     BasicBlock *TrueSucc = BI->getSuccessor(0);
2669     BasicBlock *FalseSucc = BI->getSuccessor(1);
2670     // Avoid multiple edges early.
2671     if (TrueSucc == FalseSucc)
2672       return false;
2673 
2674     BasicBlock *Parent = BI->getParent();
2675     bool Changed = false;
2676 
2677     Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
2678     BasicBlockEdge TrueE(Parent, TrueSucc);
2679     Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
2680 
2681     Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
2682     BasicBlockEdge FalseE(Parent, FalseSucc);
2683     Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
2684 
2685     return Changed;
2686   }
2687 
2688   // For switches, propagate the case values into the case destinations.
2689   if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
2690     Value *SwitchCond = SI->getCondition();
2691     BasicBlock *Parent = SI->getParent();
2692     bool Changed = false;
2693 
2694     // Remember how many outgoing edges there are to every successor.
2695     SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2696     for (BasicBlock *Succ : successors(Parent))
2697       ++SwitchEdges[Succ];
2698 
2699     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
2700          i != e; ++i) {
2701       BasicBlock *Dst = i->getCaseSuccessor();
2702       // If there is only a single edge, propagate the case value into it.
2703       if (SwitchEdges.lookup(Dst) == 1) {
2704         BasicBlockEdge E(Parent, Dst);
2705         Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
2706       }
2707     }
2708     return Changed;
2709   }
2710 
2711   // Instructions with void type don't return a value, so there's
2712   // no point in trying to find redundancies in them.
2713   if (I->getType()->isVoidTy())
2714     return false;
2715 
2716   uint32_t NextNum = VN.getNextUnusedValueNumber();
2717   unsigned Num = VN.lookupOrAdd(I);
2718 
2719   // Allocations are always uniquely numbered, so we can save time and memory
2720   // by fast failing them.
2721   if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
2722     LeaderTable.insert(Num, I, I->getParent());
2723     return false;
2724   }
2725 
2726   // If the number we were assigned was a brand new VN, then we don't
2727   // need to do a lookup to see if the number already exists
2728   // somewhere in the domtree: it can't!
2729   if (Num >= NextNum) {
2730     LeaderTable.insert(Num, I, I->getParent());
2731     return false;
2732   }
2733 
2734   // Perform fast-path value-number based elimination of values inherited from
2735   // dominators.
2736   Value *Repl = findLeader(I->getParent(), Num);
2737   if (!Repl) {
2738     // Failure, just remember this instance for future use.
2739     LeaderTable.insert(Num, I, I->getParent());
2740     return false;
2741   }
2742 
2743   if (Repl == I) {
2744     // If I was the result of a shortcut PRE, it might already be in the table
2745     // and the best replacement for itself. Nothing to do.
2746     return false;
2747   }
2748 
2749   // Remove it!
2750   patchAndReplaceAllUsesWith(I, Repl);
2751   if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2752     MD->invalidateCachedPointerInfo(Repl);
2753   markInstructionForDeletion(I);
2754   return true;
2755 }
2756 
2757 /// runOnFunction - This is the main transformation entry point for a function.
runImpl(Function & F,AssumptionCache & RunAC,DominatorTree & RunDT,const TargetLibraryInfo & RunTLI,AAResults & RunAA,MemoryDependenceResults * RunMD,LoopInfo & LI,OptimizationRemarkEmitter * RunORE,MemorySSA * MSSA)2758 bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2759                       const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2760                       MemoryDependenceResults *RunMD, LoopInfo &LI,
2761                       OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2762   AC = &RunAC;
2763   DT = &RunDT;
2764   VN.setDomTree(DT);
2765   TLI = &RunTLI;
2766   VN.setAliasAnalysis(&RunAA);
2767   MD = RunMD;
2768   ImplicitControlFlowTracking ImplicitCFT;
2769   ICF = &ImplicitCFT;
2770   this->LI = &LI;
2771   VN.setMemDep(MD);
2772   ORE = RunORE;
2773   InvalidBlockRPONumbers = true;
2774   MemorySSAUpdater Updater(MSSA);
2775   MSSAU = MSSA ? &Updater : nullptr;
2776 
2777   bool Changed = false;
2778   bool ShouldContinue = true;
2779 
2780   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2781   // Merge unconditional branches, allowing PRE to catch more
2782   // optimization opportunities.
2783   for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
2784     bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
2785     if (removedBlock)
2786       ++NumGVNBlocks;
2787 
2788     Changed |= removedBlock;
2789   }
2790   DTU.flush();
2791 
2792   unsigned Iteration = 0;
2793   while (ShouldContinue) {
2794     LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2795     (void) Iteration;
2796     ShouldContinue = iterateOnFunction(F);
2797     Changed |= ShouldContinue;
2798     ++Iteration;
2799   }
2800 
2801   if (isPREEnabled()) {
2802     // Fabricate val-num for dead-code in order to suppress assertion in
2803     // performPRE().
2804     assignValNumForDeadCode();
2805     bool PREChanged = true;
2806     while (PREChanged) {
2807       PREChanged = performPRE(F);
2808       Changed |= PREChanged;
2809     }
2810   }
2811 
2812   // FIXME: Should perform GVN again after PRE does something.  PRE can move
2813   // computations into blocks where they become fully redundant.  Note that
2814   // we can't do this until PRE's critical edge splitting updates memdep.
2815   // Actually, when this happens, we should just fully integrate PRE into GVN.
2816 
2817   cleanupGlobalSets();
2818   // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2819   // iteration.
2820   DeadBlocks.clear();
2821 
2822   if (MSSA && VerifyMemorySSA)
2823     MSSA->verifyMemorySSA();
2824 
2825   return Changed;
2826 }
2827 
processBlock(BasicBlock * BB)2828 bool GVNPass::processBlock(BasicBlock *BB) {
2829   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2830   // (and incrementing BI before processing an instruction).
2831   assert(InstrsToErase.empty() &&
2832          "We expect InstrsToErase to be empty across iterations");
2833   if (DeadBlocks.count(BB))
2834     return false;
2835 
2836   // Clearing map before every BB because it can be used only for single BB.
2837   ReplaceOperandsWithMap.clear();
2838   bool ChangedFunction = false;
2839 
2840   // Since we may not have visited the input blocks of the phis, we can't
2841   // use our normal hash approach for phis.  Instead, simply look for
2842   // obvious duplicates.  The first pass of GVN will tend to create
2843   // identical phis, and the second or later passes can eliminate them.
2844   SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2845   ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
2846   for (PHINode *PN : PHINodesToRemove) {
2847     VN.erase(PN);
2848     removeInstruction(PN);
2849   }
2850 
2851   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2852        BI != BE;) {
2853     if (!ReplaceOperandsWithMap.empty())
2854       ChangedFunction |= replaceOperandsForInBlockEquality(&*BI);
2855     ChangedFunction |= processInstruction(&*BI);
2856 
2857     if (InstrsToErase.empty()) {
2858       ++BI;
2859       continue;
2860     }
2861 
2862     // If we need some instructions deleted, do it now.
2863     NumGVNInstr += InstrsToErase.size();
2864 
2865     // Avoid iterator invalidation.
2866     bool AtStart = BI == BB->begin();
2867     if (!AtStart)
2868       --BI;
2869 
2870     for (auto *I : InstrsToErase) {
2871       assert(I->getParent() == BB && "Removing instruction from wrong block?");
2872       LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
2873       salvageKnowledge(I, AC);
2874       salvageDebugInfo(*I);
2875       removeInstruction(I);
2876     }
2877     InstrsToErase.clear();
2878 
2879     if (AtStart)
2880       BI = BB->begin();
2881     else
2882       ++BI;
2883   }
2884 
2885   return ChangedFunction;
2886 }
2887 
2888 // Instantiate an expression in a predecessor that lacked it.
performScalarPREInsertion(Instruction * Instr,BasicBlock * Pred,BasicBlock * Curr,unsigned int ValNo)2889 bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2890                                         BasicBlock *Curr, unsigned int ValNo) {
2891   // Because we are going top-down through the block, all value numbers
2892   // will be available in the predecessor by the time we need them.  Any
2893   // that weren't originally present will have been instantiated earlier
2894   // in this loop.
2895   bool success = true;
2896   for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2897     Value *Op = Instr->getOperand(i);
2898     if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2899       continue;
2900     // This could be a newly inserted instruction, in which case, we won't
2901     // find a value number, and should give up before we hurt ourselves.
2902     // FIXME: Rewrite the infrastructure to let it easier to value number
2903     // and process newly inserted instructions.
2904     if (!VN.exists(Op)) {
2905       success = false;
2906       break;
2907     }
2908     uint32_t TValNo =
2909         VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2910     if (Value *V = findLeader(Pred, TValNo)) {
2911       Instr->setOperand(i, V);
2912     } else {
2913       success = false;
2914       break;
2915     }
2916   }
2917 
2918   // Fail out if we encounter an operand that is not available in
2919   // the PRE predecessor.  This is typically because of loads which
2920   // are not value numbered precisely.
2921   if (!success)
2922     return false;
2923 
2924   Instr->insertBefore(Pred->getTerminator());
2925   Instr->setName(Instr->getName() + ".pre");
2926   Instr->setDebugLoc(Instr->getDebugLoc());
2927 
2928   ICF->insertInstructionTo(Instr, Pred);
2929 
2930   unsigned Num = VN.lookupOrAdd(Instr);
2931   VN.add(Instr, Num);
2932 
2933   // Update the availability map to include the new instruction.
2934   LeaderTable.insert(Num, Instr, Pred);
2935   return true;
2936 }
2937 
performScalarPRE(Instruction * CurInst)2938 bool GVNPass::performScalarPRE(Instruction *CurInst) {
2939   if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
2940       isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2941       CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2942       isa<DbgInfoIntrinsic>(CurInst))
2943     return false;
2944 
2945   // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2946   // sinking the compare again, and it would force the code generator to
2947   // move the i1 from processor flags or predicate registers into a general
2948   // purpose register.
2949   if (isa<CmpInst>(CurInst))
2950     return false;
2951 
2952   // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2953   // sinking the addressing mode computation back to its uses. Extending the
2954   // GEP's live range increases the register pressure, and therefore it can
2955   // introduce unnecessary spills.
2956   //
2957   // This doesn't prevent Load PRE. PHI translation will make the GEP available
2958   // to the load by moving it to the predecessor block if necessary.
2959   if (isa<GetElementPtrInst>(CurInst))
2960     return false;
2961 
2962   if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
2963     // We don't currently value number ANY inline asm calls.
2964     if (CallB->isInlineAsm())
2965       return false;
2966   }
2967 
2968   uint32_t ValNo = VN.lookup(CurInst);
2969 
2970   // Look for the predecessors for PRE opportunities.  We're
2971   // only trying to solve the basic diamond case, where
2972   // a value is computed in the successor and one predecessor,
2973   // but not the other.  We also explicitly disallow cases
2974   // where the successor is its own predecessor, because they're
2975   // more complicated to get right.
2976   unsigned NumWith = 0;
2977   unsigned NumWithout = 0;
2978   BasicBlock *PREPred = nullptr;
2979   BasicBlock *CurrentBlock = CurInst->getParent();
2980 
2981   // Update the RPO numbers for this function.
2982   if (InvalidBlockRPONumbers)
2983     assignBlockRPONumber(*CurrentBlock->getParent());
2984 
2985   SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2986   for (BasicBlock *P : predecessors(CurrentBlock)) {
2987     // We're not interested in PRE where blocks with predecessors that are
2988     // not reachable.
2989     if (!DT->isReachableFromEntry(P)) {
2990       NumWithout = 2;
2991       break;
2992     }
2993     // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2994     assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
2995            "Invalid BlockRPONumber map.");
2996     if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
2997       NumWithout = 2;
2998       break;
2999     }
3000 
3001     uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3002     Value *predV = findLeader(P, TValNo);
3003     if (!predV) {
3004       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3005       PREPred = P;
3006       ++NumWithout;
3007     } else if (predV == CurInst) {
3008       /* CurInst dominates this predecessor. */
3009       NumWithout = 2;
3010       break;
3011     } else {
3012       predMap.push_back(std::make_pair(predV, P));
3013       ++NumWith;
3014     }
3015   }
3016 
3017   // Don't do PRE when it might increase code size, i.e. when
3018   // we would need to insert instructions in more than one pred.
3019   if (NumWithout > 1 || NumWith == 0)
3020     return false;
3021 
3022   // We may have a case where all predecessors have the instruction,
3023   // and we just need to insert a phi node. Otherwise, perform
3024   // insertion.
3025   Instruction *PREInstr = nullptr;
3026 
3027   if (NumWithout != 0) {
3028     if (!isSafeToSpeculativelyExecute(CurInst)) {
3029       // It is only valid to insert a new instruction if the current instruction
3030       // is always executed. An instruction with implicit control flow could
3031       // prevent us from doing it. If we cannot speculate the execution, then
3032       // PRE should be prohibited.
3033       if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3034         return false;
3035     }
3036 
3037     // Don't do PRE across indirect branch.
3038     if (isa<IndirectBrInst>(PREPred->getTerminator()))
3039       return false;
3040 
3041     // We can't do PRE safely on a critical edge, so instead we schedule
3042     // the edge to be split and perform the PRE the next time we iterate
3043     // on the function.
3044     unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3045     if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3046       toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3047       return false;
3048     }
3049     // We need to insert somewhere, so let's give it a shot
3050     PREInstr = CurInst->clone();
3051     if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3052       // If we failed insertion, make sure we remove the instruction.
3053 #ifndef NDEBUG
3054       verifyRemoved(PREInstr);
3055 #endif
3056       PREInstr->deleteValue();
3057       return false;
3058     }
3059   }
3060 
3061   // Either we should have filled in the PRE instruction, or we should
3062   // not have needed insertions.
3063   assert(PREInstr != nullptr || NumWithout == 0);
3064 
3065   ++NumGVNPRE;
3066 
3067   // Create a PHI to make the value available in this block.
3068   PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(),
3069                                  CurInst->getName() + ".pre-phi");
3070   Phi->insertBefore(CurrentBlock->begin());
3071   for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
3072     if (Value *V = predMap[i].first) {
3073       // If we use an existing value in this phi, we have to patch the original
3074       // value because the phi will be used to replace a later value.
3075       patchReplacementInstruction(CurInst, V);
3076       Phi->addIncoming(V, predMap[i].second);
3077     } else
3078       Phi->addIncoming(PREInstr, PREPred);
3079   }
3080 
3081   VN.add(Phi, ValNo);
3082   // After creating a new PHI for ValNo, the phi translate result for ValNo will
3083   // be changed, so erase the related stale entries in phi translate cache.
3084   VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3085   LeaderTable.insert(ValNo, Phi, CurrentBlock);
3086   Phi->setDebugLoc(CurInst->getDebugLoc());
3087   CurInst->replaceAllUsesWith(Phi);
3088   if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3089     MD->invalidateCachedPointerInfo(Phi);
3090   VN.erase(CurInst);
3091   LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3092 
3093   LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3094   removeInstruction(CurInst);
3095   ++NumGVNInstr;
3096 
3097   return true;
3098 }
3099 
3100 /// Perform a purely local form of PRE that looks for diamond
3101 /// control flow patterns and attempts to perform simple PRE at the join point.
performPRE(Function & F)3102 bool GVNPass::performPRE(Function &F) {
3103   bool Changed = false;
3104   for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3105     // Nothing to PRE in the entry block.
3106     if (CurrentBlock == &F.getEntryBlock())
3107       continue;
3108 
3109     // Don't perform PRE on an EH pad.
3110     if (CurrentBlock->isEHPad())
3111       continue;
3112 
3113     for (BasicBlock::iterator BI = CurrentBlock->begin(),
3114                               BE = CurrentBlock->end();
3115          BI != BE;) {
3116       Instruction *CurInst = &*BI++;
3117       Changed |= performScalarPRE(CurInst);
3118     }
3119   }
3120 
3121   if (splitCriticalEdges())
3122     Changed = true;
3123 
3124   return Changed;
3125 }
3126 
3127 /// Split the critical edge connecting the given two blocks, and return
3128 /// the block inserted to the critical edge.
splitCriticalEdges(BasicBlock * Pred,BasicBlock * Succ)3129 BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3130   // GVN does not require loop-simplify, do not try to preserve it if it is not
3131   // possible.
3132   BasicBlock *BB = SplitCriticalEdge(
3133       Pred, Succ,
3134       CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3135   if (BB) {
3136     if (MD)
3137       MD->invalidateCachedPredecessors();
3138     InvalidBlockRPONumbers = true;
3139   }
3140   return BB;
3141 }
3142 
3143 /// Split critical edges found during the previous
3144 /// iteration that may enable further optimization.
splitCriticalEdges()3145 bool GVNPass::splitCriticalEdges() {
3146   if (toSplit.empty())
3147     return false;
3148 
3149   bool Changed = false;
3150   do {
3151     std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
3152     Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3153                                  CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3154                nullptr;
3155   } while (!toSplit.empty());
3156   if (Changed) {
3157     if (MD)
3158       MD->invalidateCachedPredecessors();
3159     InvalidBlockRPONumbers = true;
3160   }
3161   return Changed;
3162 }
3163 
3164 /// Executes one iteration of GVN
iterateOnFunction(Function & F)3165 bool GVNPass::iterateOnFunction(Function &F) {
3166   cleanupGlobalSets();
3167 
3168   // Top-down walk of the dominator tree
3169   bool Changed = false;
3170   // Needed for value numbering with phi construction to work.
3171   // RPOT walks the graph in its constructor and will not be invalidated during
3172   // processBlock.
3173   ReversePostOrderTraversal<Function *> RPOT(&F);
3174 
3175   for (BasicBlock *BB : RPOT)
3176     Changed |= processBlock(BB);
3177 
3178   return Changed;
3179 }
3180 
cleanupGlobalSets()3181 void GVNPass::cleanupGlobalSets() {
3182   VN.clear();
3183   LeaderTable.clear();
3184   BlockRPONumber.clear();
3185   ICF->clear();
3186   InvalidBlockRPONumbers = true;
3187 }
3188 
removeInstruction(Instruction * I)3189 void GVNPass::removeInstruction(Instruction *I) {
3190   if (MD) MD->removeInstruction(I);
3191   if (MSSAU)
3192     MSSAU->removeMemoryAccess(I);
3193 #ifndef NDEBUG
3194   verifyRemoved(I);
3195 #endif
3196   ICF->removeInstruction(I);
3197   I->eraseFromParent();
3198 }
3199 
3200 /// Verify that the specified instruction does not occur in our
3201 /// internal data structures.
verifyRemoved(const Instruction * Inst) const3202 void GVNPass::verifyRemoved(const Instruction *Inst) const {
3203   VN.verifyRemoved(Inst);
3204   LeaderTable.verifyRemoved(Inst);
3205 }
3206 
3207 /// BB is declared dead, which implied other blocks become dead as well. This
3208 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3209 /// live successors, update their phi nodes by replacing the operands
3210 /// corresponding to dead blocks with UndefVal.
addDeadBlock(BasicBlock * BB)3211 void GVNPass::addDeadBlock(BasicBlock *BB) {
3212   SmallVector<BasicBlock *, 4> NewDead;
3213   SmallSetVector<BasicBlock *, 4> DF;
3214 
3215   NewDead.push_back(BB);
3216   while (!NewDead.empty()) {
3217     BasicBlock *D = NewDead.pop_back_val();
3218     if (DeadBlocks.count(D))
3219       continue;
3220 
3221     // All blocks dominated by D are dead.
3222     SmallVector<BasicBlock *, 8> Dom;
3223     DT->getDescendants(D, Dom);
3224     DeadBlocks.insert(Dom.begin(), Dom.end());
3225 
3226     // Figure out the dominance-frontier(D).
3227     for (BasicBlock *B : Dom) {
3228       for (BasicBlock *S : successors(B)) {
3229         if (DeadBlocks.count(S))
3230           continue;
3231 
3232         bool AllPredDead = true;
3233         for (BasicBlock *P : predecessors(S))
3234           if (!DeadBlocks.count(P)) {
3235             AllPredDead = false;
3236             break;
3237           }
3238 
3239         if (!AllPredDead) {
3240           // S could be proved dead later on. That is why we don't update phi
3241           // operands at this moment.
3242           DF.insert(S);
3243         } else {
3244           // While S is not dominated by D, it is dead by now. This could take
3245           // place if S already have a dead predecessor before D is declared
3246           // dead.
3247           NewDead.push_back(S);
3248         }
3249       }
3250     }
3251   }
3252 
3253   // For the dead blocks' live successors, update their phi nodes by replacing
3254   // the operands corresponding to dead blocks with UndefVal.
3255   for (BasicBlock *B : DF) {
3256     if (DeadBlocks.count(B))
3257       continue;
3258 
3259     // First, split the critical edges. This might also create additional blocks
3260     // to preserve LoopSimplify form and adjust edges accordingly.
3261     SmallVector<BasicBlock *, 4> Preds(predecessors(B));
3262     for (BasicBlock *P : Preds) {
3263       if (!DeadBlocks.count(P))
3264         continue;
3265 
3266       if (llvm::is_contained(successors(P), B) &&
3267           isCriticalEdge(P->getTerminator(), B)) {
3268         if (BasicBlock *S = splitCriticalEdges(P, B))
3269           DeadBlocks.insert(P = S);
3270       }
3271     }
3272 
3273     // Now poison the incoming values from the dead predecessors.
3274     for (BasicBlock *P : predecessors(B)) {
3275       if (!DeadBlocks.count(P))
3276         continue;
3277       for (PHINode &Phi : B->phis()) {
3278         Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3279         if (MD)
3280           MD->invalidateCachedPointerInfo(&Phi);
3281       }
3282     }
3283   }
3284 }
3285 
3286 // If the given branch is recognized as a foldable branch (i.e. conditional
3287 // branch with constant condition), it will perform following analyses and
3288 // transformation.
3289 //  1) If the dead out-coming edge is a critical-edge, split it. Let
3290 //     R be the target of the dead out-coming edge.
3291 //  1) Identify the set of dead blocks implied by the branch's dead outcoming
3292 //     edge. The result of this step will be {X| X is dominated by R}
3293 //  2) Identify those blocks which haves at least one dead predecessor. The
3294 //     result of this step will be dominance-frontier(R).
3295 //  3) Update the PHIs in DF(R) by replacing the operands corresponding to
3296 //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3297 //
3298 // Return true iff *NEW* dead code are found.
processFoldableCondBr(BranchInst * BI)3299 bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3300   if (!BI || BI->isUnconditional())
3301     return false;
3302 
3303   // If a branch has two identical successors, we cannot declare either dead.
3304   if (BI->getSuccessor(0) == BI->getSuccessor(1))
3305     return false;
3306 
3307   ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3308   if (!Cond)
3309     return false;
3310 
3311   BasicBlock *DeadRoot =
3312       Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3313   if (DeadBlocks.count(DeadRoot))
3314     return false;
3315 
3316   if (!DeadRoot->getSinglePredecessor())
3317     DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3318 
3319   addDeadBlock(DeadRoot);
3320   return true;
3321 }
3322 
3323 // performPRE() will trigger assert if it comes across an instruction without
3324 // associated val-num. As it normally has far more live instructions than dead
3325 // instructions, it makes more sense just to "fabricate" a val-number for the
3326 // dead code than checking if instruction involved is dead or not.
assignValNumForDeadCode()3327 void GVNPass::assignValNumForDeadCode() {
3328   for (BasicBlock *BB : DeadBlocks) {
3329     for (Instruction &Inst : *BB) {
3330       unsigned ValNum = VN.lookupOrAdd(&Inst);
3331       LeaderTable.insert(ValNum, &Inst, BB);
3332     }
3333   }
3334 }
3335 
3336 class llvm::gvn::GVNLegacyPass : public FunctionPass {
3337 public:
3338   static char ID; // Pass identification, replacement for typeid
3339 
GVNLegacyPass(bool NoMemDepAnalysis=!GVNEnableMemDep)3340   explicit GVNLegacyPass(bool NoMemDepAnalysis = !GVNEnableMemDep)
3341       : FunctionPass(ID), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis)) {
3342     initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3343   }
3344 
runOnFunction(Function & F)3345   bool runOnFunction(Function &F) override {
3346     if (skipFunction(F))
3347       return false;
3348 
3349     auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3350     return Impl.runImpl(
3351         F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3352         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3353         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3354         getAnalysis<AAResultsWrapperPass>().getAAResults(),
3355         Impl.isMemDepEnabled()
3356             ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3357             : nullptr,
3358         getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3359         &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3360         MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3361   }
3362 
getAnalysisUsage(AnalysisUsage & AU) const3363   void getAnalysisUsage(AnalysisUsage &AU) const override {
3364     AU.addRequired<AssumptionCacheTracker>();
3365     AU.addRequired<DominatorTreeWrapperPass>();
3366     AU.addRequired<TargetLibraryInfoWrapperPass>();
3367     AU.addRequired<LoopInfoWrapperPass>();
3368     if (Impl.isMemDepEnabled())
3369       AU.addRequired<MemoryDependenceWrapperPass>();
3370     AU.addRequired<AAResultsWrapperPass>();
3371     AU.addPreserved<DominatorTreeWrapperPass>();
3372     AU.addPreserved<GlobalsAAWrapperPass>();
3373     AU.addPreserved<TargetLibraryInfoWrapperPass>();
3374     AU.addPreserved<LoopInfoWrapperPass>();
3375     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3376     AU.addPreserved<MemorySSAWrapperPass>();
3377   }
3378 
3379 private:
3380   GVNPass Impl;
3381 };
3382 
3383 char GVNLegacyPass::ID = 0;
3384 
3385 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)3386 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3387 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3388 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3389 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3390 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3391 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3392 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3393 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3394 
3395 // The public interface to this file...
3396 FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
3397   return new GVNLegacyPass(NoMemDepAnalysis);
3398 }
3399