xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This pass performs global value numbering to eliminate fully redundant
10  // instructions.  It also performs simple dead load elimination.
11  //
12  // Note that this pass does the value numbering itself; it does not use the
13  // ValueNumbering analysis passes.
14  //
15  //===----------------------------------------------------------------------===//
16  
17  #include "llvm/Transforms/Scalar/GVN.h"
18  #include "llvm/ADT/DenseMap.h"
19  #include "llvm/ADT/DepthFirstIterator.h"
20  #include "llvm/ADT/Hashing.h"
21  #include "llvm/ADT/MapVector.h"
22  #include "llvm/ADT/PostOrderIterator.h"
23  #include "llvm/ADT/STLExtras.h"
24  #include "llvm/ADT/SetVector.h"
25  #include "llvm/ADT/SmallPtrSet.h"
26  #include "llvm/ADT/SmallVector.h"
27  #include "llvm/ADT/Statistic.h"
28  #include "llvm/Analysis/AliasAnalysis.h"
29  #include "llvm/Analysis/AssumeBundleQueries.h"
30  #include "llvm/Analysis/AssumptionCache.h"
31  #include "llvm/Analysis/CFG.h"
32  #include "llvm/Analysis/DomTreeUpdater.h"
33  #include "llvm/Analysis/GlobalsModRef.h"
34  #include "llvm/Analysis/InstructionPrecedenceTracking.h"
35  #include "llvm/Analysis/InstructionSimplify.h"
36  #include "llvm/Analysis/Loads.h"
37  #include "llvm/Analysis/LoopInfo.h"
38  #include "llvm/Analysis/MemoryBuiltins.h"
39  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
40  #include "llvm/Analysis/MemorySSA.h"
41  #include "llvm/Analysis/MemorySSAUpdater.h"
42  #include "llvm/Analysis/OptimizationRemarkEmitter.h"
43  #include "llvm/Analysis/PHITransAddr.h"
44  #include "llvm/Analysis/TargetLibraryInfo.h"
45  #include "llvm/Analysis/ValueTracking.h"
46  #include "llvm/IR/Attributes.h"
47  #include "llvm/IR/BasicBlock.h"
48  #include "llvm/IR/Constant.h"
49  #include "llvm/IR/Constants.h"
50  #include "llvm/IR/DebugLoc.h"
51  #include "llvm/IR/Dominators.h"
52  #include "llvm/IR/Function.h"
53  #include "llvm/IR/InstrTypes.h"
54  #include "llvm/IR/Instruction.h"
55  #include "llvm/IR/Instructions.h"
56  #include "llvm/IR/IntrinsicInst.h"
57  #include "llvm/IR/LLVMContext.h"
58  #include "llvm/IR/Metadata.h"
59  #include "llvm/IR/Module.h"
60  #include "llvm/IR/PassManager.h"
61  #include "llvm/IR/PatternMatch.h"
62  #include "llvm/IR/Type.h"
63  #include "llvm/IR/Use.h"
64  #include "llvm/IR/Value.h"
65  #include "llvm/InitializePasses.h"
66  #include "llvm/Pass.h"
67  #include "llvm/Support/Casting.h"
68  #include "llvm/Support/CommandLine.h"
69  #include "llvm/Support/Compiler.h"
70  #include "llvm/Support/Debug.h"
71  #include "llvm/Support/raw_ostream.h"
72  #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74  #include "llvm/Transforms/Utils/Local.h"
75  #include "llvm/Transforms/Utils/SSAUpdater.h"
76  #include "llvm/Transforms/Utils/VNCoercion.h"
77  #include <algorithm>
78  #include <cassert>
79  #include <cstdint>
80  #include <optional>
81  #include <utility>
82  
83  using namespace llvm;
84  using namespace llvm::gvn;
85  using namespace llvm::VNCoercion;
86  using namespace PatternMatch;
87  
88  #define DEBUG_TYPE "gvn"
89  
90  STATISTIC(NumGVNInstr, "Number of instructions deleted");
91  STATISTIC(NumGVNLoad, "Number of loads deleted");
92  STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93  STATISTIC(NumGVNBlocks, "Number of blocks merged");
94  STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95  STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96  STATISTIC(NumPRELoad, "Number of loads PRE'd");
97  STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98  STATISTIC(NumPRELoadMoved2CEPred,
99            "Number of loads moved to predecessor of a critical edge in PRE");
100  
101  STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102            "Number of blocks speculated as available in "
103            "IsValueFullyAvailableInBlock(), max");
104  STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105            "Number of times we we reached gvn-max-block-speculations cut-off "
106            "preventing further exploration");
107  
108  static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
109  static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
110  static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111                                              cl::init(true));
112  static cl::opt<bool>
113  GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114                                  cl::init(false));
115  static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
116  
117  static cl::opt<uint32_t> MaxNumDeps(
118      "gvn-max-num-deps", cl::Hidden, cl::init(100),
119      cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
120  
121  // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
122  static cl::opt<uint32_t> MaxBBSpeculations(
123      "gvn-max-block-speculations", cl::Hidden, cl::init(600),
124      cl::desc("Max number of blocks we're willing to speculate on (and recurse "
125               "into) when deducing if a value is fully available or not in GVN "
126               "(default = 600)"));
127  
128  static cl::opt<uint32_t> MaxNumVisitedInsts(
129      "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
130      cl::desc("Max number of visited instructions when trying to find "
131               "dominating value of select dependency (default = 100)"));
132  
133  static cl::opt<uint32_t> MaxNumInsnsPerBlock(
134      "gvn-max-num-insns", cl::Hidden, cl::init(100),
135      cl::desc("Max number of instructions to scan in each basic block in GVN "
136               "(default = 100)"));
137  
138  struct llvm::GVNPass::Expression {
139    uint32_t opcode;
140    bool commutative = false;
141    // The type is not necessarily the result type of the expression, it may be
142    // any additional type needed to disambiguate the expression.
143    Type *type = nullptr;
144    SmallVector<uint32_t, 4> varargs;
145  
Expressionllvm::GVNPass::Expression146    Expression(uint32_t o = ~2U) : opcode(o) {}
147  
operator ==llvm::GVNPass::Expression148    bool operator==(const Expression &other) const {
149      if (opcode != other.opcode)
150        return false;
151      if (opcode == ~0U || opcode == ~1U)
152        return true;
153      if (type != other.type)
154        return false;
155      if (varargs != other.varargs)
156        return false;
157      return true;
158    }
159  
hash_value(const Expression & Value)160    friend hash_code hash_value(const Expression &Value) {
161      return hash_combine(
162          Value.opcode, Value.type,
163          hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
164    }
165  };
166  
167  namespace llvm {
168  
169  template <> struct DenseMapInfo<GVNPass::Expression> {
getEmptyKeyllvm::DenseMapInfo170    static inline GVNPass::Expression getEmptyKey() { return ~0U; }
getTombstoneKeyllvm::DenseMapInfo171    static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
172  
getHashValuellvm::DenseMapInfo173    static unsigned getHashValue(const GVNPass::Expression &e) {
174      using llvm::hash_value;
175  
176      return static_cast<unsigned>(hash_value(e));
177    }
178  
isEqualllvm::DenseMapInfo179    static bool isEqual(const GVNPass::Expression &LHS,
180                        const GVNPass::Expression &RHS) {
181      return LHS == RHS;
182    }
183  };
184  
185  } // end namespace llvm
186  
187  /// Represents a particular available value that we know how to materialize.
188  /// Materialization of an AvailableValue never fails.  An AvailableValue is
189  /// implicitly associated with a rematerialization point which is the
190  /// location of the instruction from which it was formed.
191  struct llvm::gvn::AvailableValue {
192    enum class ValType {
193      SimpleVal, // A simple offsetted value that is accessed.
194      LoadVal,   // A value produced by a load.
195      MemIntrin, // A memory intrinsic which is loaded from.
196      UndefVal,  // A UndefValue representing a value from dead block (which
197                 // is not yet physically removed from the CFG).
198      SelectVal, // A pointer select which is loaded from and for which the load
199                 // can be replace by a value select.
200    };
201  
202    /// Val - The value that is live out of the block.
203    Value *Val;
204    /// Kind of the live-out value.
205    ValType Kind;
206  
207    /// Offset - The byte offset in Val that is interesting for the load query.
208    unsigned Offset = 0;
209    /// V1, V2 - The dominating non-clobbered values of SelectVal.
210    Value *V1 = nullptr, *V2 = nullptr;
211  
getllvm::gvn::AvailableValue212    static AvailableValue get(Value *V, unsigned Offset = 0) {
213      AvailableValue Res;
214      Res.Val = V;
215      Res.Kind = ValType::SimpleVal;
216      Res.Offset = Offset;
217      return Res;
218    }
219  
getMIllvm::gvn::AvailableValue220    static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
221      AvailableValue Res;
222      Res.Val = MI;
223      Res.Kind = ValType::MemIntrin;
224      Res.Offset = Offset;
225      return Res;
226    }
227  
getLoadllvm::gvn::AvailableValue228    static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
229      AvailableValue Res;
230      Res.Val = Load;
231      Res.Kind = ValType::LoadVal;
232      Res.Offset = Offset;
233      return Res;
234    }
235  
getUndefllvm::gvn::AvailableValue236    static AvailableValue getUndef() {
237      AvailableValue Res;
238      Res.Val = nullptr;
239      Res.Kind = ValType::UndefVal;
240      Res.Offset = 0;
241      return Res;
242    }
243  
getSelectllvm::gvn::AvailableValue244    static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
245      AvailableValue Res;
246      Res.Val = Sel;
247      Res.Kind = ValType::SelectVal;
248      Res.Offset = 0;
249      Res.V1 = V1;
250      Res.V2 = V2;
251      return Res;
252    }
253  
isSimpleValuellvm::gvn::AvailableValue254    bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
isCoercedLoadValuellvm::gvn::AvailableValue255    bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
isMemIntrinValuellvm::gvn::AvailableValue256    bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
isUndefValuellvm::gvn::AvailableValue257    bool isUndefValue() const { return Kind == ValType::UndefVal; }
isSelectValuellvm::gvn::AvailableValue258    bool isSelectValue() const { return Kind == ValType::SelectVal; }
259  
getSimpleValuellvm::gvn::AvailableValue260    Value *getSimpleValue() const {
261      assert(isSimpleValue() && "Wrong accessor");
262      return Val;
263    }
264  
getCoercedLoadValuellvm::gvn::AvailableValue265    LoadInst *getCoercedLoadValue() const {
266      assert(isCoercedLoadValue() && "Wrong accessor");
267      return cast<LoadInst>(Val);
268    }
269  
getMemIntrinValuellvm::gvn::AvailableValue270    MemIntrinsic *getMemIntrinValue() const {
271      assert(isMemIntrinValue() && "Wrong accessor");
272      return cast<MemIntrinsic>(Val);
273    }
274  
getSelectValuellvm::gvn::AvailableValue275    SelectInst *getSelectValue() const {
276      assert(isSelectValue() && "Wrong accessor");
277      return cast<SelectInst>(Val);
278    }
279  
280    /// Emit code at the specified insertion point to adjust the value defined
281    /// here to the specified type. This handles various coercion cases.
282    Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
283                                    GVNPass &gvn) const;
284  };
285  
286  /// Represents an AvailableValue which can be rematerialized at the end of
287  /// the associated BasicBlock.
288  struct llvm::gvn::AvailableValueInBlock {
289    /// BB - The basic block in question.
290    BasicBlock *BB = nullptr;
291  
292    /// AV - The actual available value
293    AvailableValue AV;
294  
getllvm::gvn::AvailableValueInBlock295    static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
296      AvailableValueInBlock Res;
297      Res.BB = BB;
298      Res.AV = std::move(AV);
299      return Res;
300    }
301  
getllvm::gvn::AvailableValueInBlock302    static AvailableValueInBlock get(BasicBlock *BB, Value *V,
303                                     unsigned Offset = 0) {
304      return get(BB, AvailableValue::get(V, Offset));
305    }
306  
getUndefllvm::gvn::AvailableValueInBlock307    static AvailableValueInBlock getUndef(BasicBlock *BB) {
308      return get(BB, AvailableValue::getUndef());
309    }
310  
getSelectllvm::gvn::AvailableValueInBlock311    static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
312                                           Value *V1, Value *V2) {
313      return get(BB, AvailableValue::getSelect(Sel, V1, V2));
314    }
315  
316    /// Emit code at the end of this block to adjust the value defined here to
317    /// the specified type. This handles various coercion cases.
MaterializeAdjustedValuellvm::gvn::AvailableValueInBlock318    Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
319      return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
320    }
321  };
322  
323  //===----------------------------------------------------------------------===//
324  //                     ValueTable Internal Functions
325  //===----------------------------------------------------------------------===//
326  
createExpr(Instruction * I)327  GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
328    Expression e;
329    e.type = I->getType();
330    e.opcode = I->getOpcode();
331    if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
332      // gc.relocate is 'special' call: its second and third operands are
333      // not real values, but indices into statepoint's argument list.
334      // Use the refered to values for purposes of identity.
335      e.varargs.push_back(lookupOrAdd(GCR->getOperand(0)));
336      e.varargs.push_back(lookupOrAdd(GCR->getBasePtr()));
337      e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
338    } else {
339      for (Use &Op : I->operands())
340        e.varargs.push_back(lookupOrAdd(Op));
341    }
342    if (I->isCommutative()) {
343      // Ensure that commutative instructions that only differ by a permutation
344      // of their operands get the same value number by sorting the operand value
345      // numbers.  Since commutative operands are the 1st two operands it is more
346      // efficient to sort by hand rather than using, say, std::sort.
347      assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
348      if (e.varargs[0] > e.varargs[1])
349        std::swap(e.varargs[0], e.varargs[1]);
350      e.commutative = true;
351    }
352  
353    if (auto *C = dyn_cast<CmpInst>(I)) {
354      // Sort the operand value numbers so x<y and y>x get the same value number.
355      CmpInst::Predicate Predicate = C->getPredicate();
356      if (e.varargs[0] > e.varargs[1]) {
357        std::swap(e.varargs[0], e.varargs[1]);
358        Predicate = CmpInst::getSwappedPredicate(Predicate);
359      }
360      e.opcode = (C->getOpcode() << 8) | Predicate;
361      e.commutative = true;
362    } else if (auto *E = dyn_cast<InsertValueInst>(I)) {
363      e.varargs.append(E->idx_begin(), E->idx_end());
364    } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
365      ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
366      e.varargs.append(ShuffleMask.begin(), ShuffleMask.end());
367    }
368  
369    return e;
370  }
371  
createCmpExpr(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)372  GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
373      unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
374    assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
375           "Not a comparison!");
376    Expression e;
377    e.type = CmpInst::makeCmpResultType(LHS->getType());
378    e.varargs.push_back(lookupOrAdd(LHS));
379    e.varargs.push_back(lookupOrAdd(RHS));
380  
381    // Sort the operand value numbers so x<y and y>x get the same value number.
382    if (e.varargs[0] > e.varargs[1]) {
383      std::swap(e.varargs[0], e.varargs[1]);
384      Predicate = CmpInst::getSwappedPredicate(Predicate);
385    }
386    e.opcode = (Opcode << 8) | Predicate;
387    e.commutative = true;
388    return e;
389  }
390  
391  GVNPass::Expression
createExtractvalueExpr(ExtractValueInst * EI)392  GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
393    assert(EI && "Not an ExtractValueInst?");
394    Expression e;
395    e.type = EI->getType();
396    e.opcode = 0;
397  
398    WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
399    if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
400      // EI is an extract from one of our with.overflow intrinsics. Synthesize
401      // a semantically equivalent expression instead of an extract value
402      // expression.
403      e.opcode = WO->getBinaryOp();
404      e.varargs.push_back(lookupOrAdd(WO->getLHS()));
405      e.varargs.push_back(lookupOrAdd(WO->getRHS()));
406      return e;
407    }
408  
409    // Not a recognised intrinsic. Fall back to producing an extract value
410    // expression.
411    e.opcode = EI->getOpcode();
412    for (Use &Op : EI->operands())
413      e.varargs.push_back(lookupOrAdd(Op));
414  
415    append_range(e.varargs, EI->indices());
416  
417    return e;
418  }
419  
createGEPExpr(GetElementPtrInst * GEP)420  GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
421    Expression E;
422    Type *PtrTy = GEP->getType()->getScalarType();
423    const DataLayout &DL = GEP->getDataLayout();
424    unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
425    MapVector<Value *, APInt> VariableOffsets;
426    APInt ConstantOffset(BitWidth, 0);
427    if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
428      // Convert into offset representation, to recognize equivalent address
429      // calculations that use different type encoding.
430      LLVMContext &Context = GEP->getContext();
431      E.opcode = GEP->getOpcode();
432      E.type = nullptr;
433      E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
434      for (const auto &Pair : VariableOffsets) {
435        E.varargs.push_back(lookupOrAdd(Pair.first));
436        E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
437      }
438      if (!ConstantOffset.isZero())
439        E.varargs.push_back(
440            lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
441    } else {
442      // If converting to offset representation fails (for scalable vectors),
443      // fall back to type-based implementation:
444      E.opcode = GEP->getOpcode();
445      E.type = GEP->getSourceElementType();
446      for (Use &Op : GEP->operands())
447        E.varargs.push_back(lookupOrAdd(Op));
448    }
449    return E;
450  }
451  
452  //===----------------------------------------------------------------------===//
453  //                     ValueTable External Functions
454  //===----------------------------------------------------------------------===//
455  
456  GVNPass::ValueTable::ValueTable() = default;
457  GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
458  GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
459  GVNPass::ValueTable::~ValueTable() = default;
460  GVNPass::ValueTable &
461  GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
462  
463  /// add - Insert a value into the table with a specified value number.
add(Value * V,uint32_t num)464  void GVNPass::ValueTable::add(Value *V, uint32_t num) {
465    valueNumbering.insert(std::make_pair(V, num));
466    if (PHINode *PN = dyn_cast<PHINode>(V))
467      NumberingPhi[num] = PN;
468  }
469  
lookupOrAddCall(CallInst * C)470  uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
471    // FIXME: Currently the calls which may access the thread id may
472    // be considered as not accessing the memory. But this is
473    // problematic for coroutines, since coroutines may resume in a
474    // different thread. So we disable the optimization here for the
475    // correctness. However, it may block many other correct
476    // optimizations. Revert this one when we detect the memory
477    // accessing kind more precisely.
478    if (C->getFunction()->isPresplitCoroutine()) {
479      valueNumbering[C] = nextValueNumber;
480      return nextValueNumber++;
481    }
482  
483    // Do not combine convergent calls since they implicitly depend on the set of
484    // threads that is currently executing, and they might be in different basic
485    // blocks.
486    if (C->isConvergent()) {
487      valueNumbering[C] = nextValueNumber;
488      return nextValueNumber++;
489    }
490  
491    if (AA->doesNotAccessMemory(C)) {
492      Expression exp = createExpr(C);
493      uint32_t e = assignExpNewValueNum(exp).first;
494      valueNumbering[C] = e;
495      return e;
496    }
497  
498    if (MD && AA->onlyReadsMemory(C)) {
499      Expression exp = createExpr(C);
500      auto ValNum = assignExpNewValueNum(exp);
501      if (ValNum.second) {
502        valueNumbering[C] = ValNum.first;
503        return ValNum.first;
504      }
505  
506      MemDepResult local_dep = MD->getDependency(C);
507  
508      if (!local_dep.isDef() && !local_dep.isNonLocal()) {
509        valueNumbering[C] =  nextValueNumber;
510        return nextValueNumber++;
511      }
512  
513      if (local_dep.isDef()) {
514        // For masked load/store intrinsics, the local_dep may actually be
515        // a normal load or store instruction.
516        CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
517  
518        if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
519          valueNumbering[C] = nextValueNumber;
520          return nextValueNumber++;
521        }
522  
523        for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
524          uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
525          uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
526          if (c_vn != cd_vn) {
527            valueNumbering[C] = nextValueNumber;
528            return nextValueNumber++;
529          }
530        }
531  
532        uint32_t v = lookupOrAdd(local_cdep);
533        valueNumbering[C] = v;
534        return v;
535      }
536  
537      // Non-local case.
538      const MemoryDependenceResults::NonLocalDepInfo &deps =
539          MD->getNonLocalCallDependency(C);
540      // FIXME: Move the checking logic to MemDep!
541      CallInst* cdep = nullptr;
542  
543      // Check to see if we have a single dominating call instruction that is
544      // identical to C.
545      for (const NonLocalDepEntry &I : deps) {
546        if (I.getResult().isNonLocal())
547          continue;
548  
549        // We don't handle non-definitions.  If we already have a call, reject
550        // instruction dependencies.
551        if (!I.getResult().isDef() || cdep != nullptr) {
552          cdep = nullptr;
553          break;
554        }
555  
556        CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
557        // FIXME: All duplicated with non-local case.
558        if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
559          cdep = NonLocalDepCall;
560          continue;
561        }
562  
563        cdep = nullptr;
564        break;
565      }
566  
567      if (!cdep) {
568        valueNumbering[C] = nextValueNumber;
569        return nextValueNumber++;
570      }
571  
572      if (cdep->arg_size() != C->arg_size()) {
573        valueNumbering[C] = nextValueNumber;
574        return nextValueNumber++;
575      }
576      for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
577        uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
578        uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
579        if (c_vn != cd_vn) {
580          valueNumbering[C] = nextValueNumber;
581          return nextValueNumber++;
582        }
583      }
584  
585      uint32_t v = lookupOrAdd(cdep);
586      valueNumbering[C] = v;
587      return v;
588    }
589  
590    valueNumbering[C] = nextValueNumber;
591    return nextValueNumber++;
592  }
593  
594  /// Returns true if a value number exists for the specified value.
exists(Value * V) const595  bool GVNPass::ValueTable::exists(Value *V) const {
596    return valueNumbering.contains(V);
597  }
598  
599  /// lookup_or_add - Returns the value number for the specified value, assigning
600  /// it a new number if it did not have one before.
lookupOrAdd(Value * V)601  uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
602    DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
603    if (VI != valueNumbering.end())
604      return VI->second;
605  
606    auto *I = dyn_cast<Instruction>(V);
607    if (!I) {
608      valueNumbering[V] = nextValueNumber;
609      return nextValueNumber++;
610    }
611  
612    Expression exp;
613    switch (I->getOpcode()) {
614      case Instruction::Call:
615        return lookupOrAddCall(cast<CallInst>(I));
616      case Instruction::FNeg:
617      case Instruction::Add:
618      case Instruction::FAdd:
619      case Instruction::Sub:
620      case Instruction::FSub:
621      case Instruction::Mul:
622      case Instruction::FMul:
623      case Instruction::UDiv:
624      case Instruction::SDiv:
625      case Instruction::FDiv:
626      case Instruction::URem:
627      case Instruction::SRem:
628      case Instruction::FRem:
629      case Instruction::Shl:
630      case Instruction::LShr:
631      case Instruction::AShr:
632      case Instruction::And:
633      case Instruction::Or:
634      case Instruction::Xor:
635      case Instruction::ICmp:
636      case Instruction::FCmp:
637      case Instruction::Trunc:
638      case Instruction::ZExt:
639      case Instruction::SExt:
640      case Instruction::FPToUI:
641      case Instruction::FPToSI:
642      case Instruction::UIToFP:
643      case Instruction::SIToFP:
644      case Instruction::FPTrunc:
645      case Instruction::FPExt:
646      case Instruction::PtrToInt:
647      case Instruction::IntToPtr:
648      case Instruction::AddrSpaceCast:
649      case Instruction::BitCast:
650      case Instruction::Select:
651      case Instruction::Freeze:
652      case Instruction::ExtractElement:
653      case Instruction::InsertElement:
654      case Instruction::ShuffleVector:
655      case Instruction::InsertValue:
656        exp = createExpr(I);
657        break;
658      case Instruction::GetElementPtr:
659        exp = createGEPExpr(cast<GetElementPtrInst>(I));
660        break;
661      case Instruction::ExtractValue:
662        exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
663        break;
664      case Instruction::PHI:
665        valueNumbering[V] = nextValueNumber;
666        NumberingPhi[nextValueNumber] = cast<PHINode>(V);
667        return nextValueNumber++;
668      default:
669        valueNumbering[V] = nextValueNumber;
670        return nextValueNumber++;
671    }
672  
673    uint32_t e = assignExpNewValueNum(exp).first;
674    valueNumbering[V] = e;
675    return e;
676  }
677  
678  /// Returns the value number of the specified value. Fails if
679  /// the value has not yet been numbered.
lookup(Value * V,bool Verify) const680  uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
681    DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
682    if (Verify) {
683      assert(VI != valueNumbering.end() && "Value not numbered?");
684      return VI->second;
685    }
686    return (VI != valueNumbering.end()) ? VI->second : 0;
687  }
688  
689  /// Returns the value number of the given comparison,
690  /// assigning it a new number if it did not have one before.  Useful when
691  /// we deduced the result of a comparison, but don't immediately have an
692  /// instruction realizing that comparison to hand.
lookupOrAddCmp(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)693  uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
694                                               CmpInst::Predicate Predicate,
695                                               Value *LHS, Value *RHS) {
696    Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
697    return assignExpNewValueNum(exp).first;
698  }
699  
700  /// Remove all entries from the ValueTable.
clear()701  void GVNPass::ValueTable::clear() {
702    valueNumbering.clear();
703    expressionNumbering.clear();
704    NumberingPhi.clear();
705    PhiTranslateTable.clear();
706    nextValueNumber = 1;
707    Expressions.clear();
708    ExprIdx.clear();
709    nextExprNumber = 0;
710  }
711  
712  /// Remove a value from the value numbering.
erase(Value * V)713  void GVNPass::ValueTable::erase(Value *V) {
714    uint32_t Num = valueNumbering.lookup(V);
715    valueNumbering.erase(V);
716    // If V is PHINode, V <--> value number is an one-to-one mapping.
717    if (isa<PHINode>(V))
718      NumberingPhi.erase(Num);
719  }
720  
721  /// verifyRemoved - Verify that the value is removed from all internal data
722  /// structures.
verifyRemoved(const Value * V) const723  void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
724    assert(!valueNumbering.contains(V) &&
725           "Inst still occurs in value numbering map!");
726  }
727  
728  //===----------------------------------------------------------------------===//
729  //                     LeaderMap External Functions
730  //===----------------------------------------------------------------------===//
731  
732  /// Push a new Value to the LeaderTable onto the list for its value number.
insert(uint32_t N,Value * V,const BasicBlock * BB)733  void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
734    LeaderListNode &Curr = NumToLeaders[N];
735    if (!Curr.Entry.Val) {
736      Curr.Entry.Val = V;
737      Curr.Entry.BB = BB;
738      return;
739    }
740  
741    LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
742    Node->Entry.Val = V;
743    Node->Entry.BB = BB;
744    Node->Next = Curr.Next;
745    Curr.Next = Node;
746  }
747  
748  /// Scan the list of values corresponding to a given
749  /// value number, and remove the given instruction if encountered.
erase(uint32_t N,Instruction * I,const BasicBlock * BB)750  void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
751                                 const BasicBlock *BB) {
752    LeaderListNode *Prev = nullptr;
753    LeaderListNode *Curr = &NumToLeaders[N];
754  
755    while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
756      Prev = Curr;
757      Curr = Curr->Next;
758    }
759  
760    if (!Curr)
761      return;
762  
763    if (Prev) {
764      Prev->Next = Curr->Next;
765    } else {
766      if (!Curr->Next) {
767        Curr->Entry.Val = nullptr;
768        Curr->Entry.BB = nullptr;
769      } else {
770        LeaderListNode *Next = Curr->Next;
771        Curr->Entry.Val = Next->Entry.Val;
772        Curr->Entry.BB = Next->Entry.BB;
773        Curr->Next = Next->Next;
774      }
775    }
776  }
777  
verifyRemoved(const Value * V) const778  void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
779    // Walk through the value number scope to make sure the instruction isn't
780    // ferreted away in it.
781    for (const auto &I : NumToLeaders) {
782      (void)I;
783      assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
784      assert(
785          std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
786                       [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
787          "Inst still in value numbering scope!");
788    }
789  }
790  
791  //===----------------------------------------------------------------------===//
792  //                                GVN Pass
793  //===----------------------------------------------------------------------===//
794  
isPREEnabled() const795  bool GVNPass::isPREEnabled() const {
796    return Options.AllowPRE.value_or(GVNEnablePRE);
797  }
798  
isLoadPREEnabled() const799  bool GVNPass::isLoadPREEnabled() const {
800    return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
801  }
802  
isLoadInLoopPREEnabled() const803  bool GVNPass::isLoadInLoopPREEnabled() const {
804    return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
805  }
806  
isLoadPRESplitBackedgeEnabled() const807  bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
808    return Options.AllowLoadPRESplitBackedge.value_or(
809        GVNEnableSplitBackedgeInLoadPRE);
810  }
811  
isMemDepEnabled() const812  bool GVNPass::isMemDepEnabled() const {
813    return Options.AllowMemDep.value_or(GVNEnableMemDep);
814  }
815  
run(Function & F,FunctionAnalysisManager & AM)816  PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
817    // FIXME: The order of evaluation of these 'getResult' calls is very
818    // significant! Re-ordering these variables will cause GVN when run alone to
819    // be less effective! We should fix memdep and basic-aa to not exhibit this
820    // behavior, but until then don't change the order here.
821    auto &AC = AM.getResult<AssumptionAnalysis>(F);
822    auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
823    auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
824    auto &AA = AM.getResult<AAManager>(F);
825    auto *MemDep =
826        isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
827    auto &LI = AM.getResult<LoopAnalysis>(F);
828    auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
829    auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
830    bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
831                           MSSA ? &MSSA->getMSSA() : nullptr);
832    if (!Changed)
833      return PreservedAnalyses::all();
834    PreservedAnalyses PA;
835    PA.preserve<DominatorTreeAnalysis>();
836    PA.preserve<TargetLibraryAnalysis>();
837    if (MSSA)
838      PA.preserve<MemorySSAAnalysis>();
839    PA.preserve<LoopAnalysis>();
840    return PA;
841  }
842  
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)843  void GVNPass::printPipeline(
844      raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
845    static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
846        OS, MapClassName2PassName);
847  
848    OS << '<';
849    if (Options.AllowPRE != std::nullopt)
850      OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
851    if (Options.AllowLoadPRE != std::nullopt)
852      OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
853    if (Options.AllowLoadPRESplitBackedge != std::nullopt)
854      OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
855         << "split-backedge-load-pre;";
856    if (Options.AllowMemDep != std::nullopt)
857      OS << (*Options.AllowMemDep ? "" : "no-") << "memdep";
858    OS << '>';
859  }
860  
861  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump(DenseMap<uint32_t,Value * > & d) const862  LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
863    errs() << "{\n";
864    for (auto &I : d) {
865      errs() << I.first << "\n";
866      I.second->dump();
867    }
868    errs() << "}\n";
869  }
870  #endif
871  
872  enum class AvailabilityState : char {
873    /// We know the block *is not* fully available. This is a fixpoint.
874    Unavailable = 0,
875    /// We know the block *is* fully available. This is a fixpoint.
876    Available = 1,
877    /// We do not know whether the block is fully available or not,
878    /// but we are currently speculating that it will be.
879    /// If it would have turned out that the block was, in fact, not fully
880    /// available, this would have been cleaned up into an Unavailable.
881    SpeculativelyAvailable = 2,
882  };
883  
884  /// Return true if we can prove that the value
885  /// we're analyzing is fully available in the specified block.  As we go, keep
886  /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
887  /// map is actually a tri-state map with the following values:
888  ///   0) we know the block *is not* fully available.
889  ///   1) we know the block *is* fully available.
890  ///   2) we do not know whether the block is fully available or not, but we are
891  ///      currently speculating that it will be.
IsValueFullyAvailableInBlock(BasicBlock * BB,DenseMap<BasicBlock *,AvailabilityState> & FullyAvailableBlocks)892  static bool IsValueFullyAvailableInBlock(
893      BasicBlock *BB,
894      DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
895    SmallVector<BasicBlock *, 32> Worklist;
896    std::optional<BasicBlock *> UnavailableBB;
897  
898    // The number of times we didn't find an entry for a block in a map and
899    // optimistically inserted an entry marking block as speculatively available.
900    unsigned NumNewNewSpeculativelyAvailableBBs = 0;
901  
902  #ifndef NDEBUG
903    SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
904    SmallVector<BasicBlock *, 32> AvailableBBs;
905  #endif
906  
907    Worklist.emplace_back(BB);
908    while (!Worklist.empty()) {
909      BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
910      // Optimistically assume that the block is Speculatively Available and check
911      // to see if we already know about this block in one lookup.
912      std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
913          FullyAvailableBlocks.try_emplace(
914              CurrBB, AvailabilityState::SpeculativelyAvailable);
915      AvailabilityState &State = IV.first->second;
916  
917      // Did the entry already exist for this block?
918      if (!IV.second) {
919        if (State == AvailabilityState::Unavailable) {
920          UnavailableBB = CurrBB;
921          break; // Backpropagate unavailability info.
922        }
923  
924  #ifndef NDEBUG
925        AvailableBBs.emplace_back(CurrBB);
926  #endif
927        continue; // Don't recurse further, but continue processing worklist.
928      }
929  
930      // No entry found for block.
931      ++NumNewNewSpeculativelyAvailableBBs;
932      bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
933  
934      // If we have exhausted our budget, mark this block as unavailable.
935      // Also, if this block has no predecessors, the value isn't live-in here.
936      if (OutOfBudget || pred_empty(CurrBB)) {
937        MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
938        State = AvailabilityState::Unavailable;
939        UnavailableBB = CurrBB;
940        break; // Backpropagate unavailability info.
941      }
942  
943      // Tentatively consider this block as speculatively available.
944  #ifndef NDEBUG
945      NewSpeculativelyAvailableBBs.insert(CurrBB);
946  #endif
947      // And further recurse into block's predecessors, in depth-first order!
948      Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
949    }
950  
951  #if LLVM_ENABLE_STATS
952    IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
953        NumNewNewSpeculativelyAvailableBBs);
954  #endif
955  
956    // If the block isn't marked as fixpoint yet
957    // (the Unavailable and Available states are fixpoints)
958    auto MarkAsFixpointAndEnqueueSuccessors =
959        [&](BasicBlock *BB, AvailabilityState FixpointState) {
960          auto It = FullyAvailableBlocks.find(BB);
961          if (It == FullyAvailableBlocks.end())
962            return; // Never queried this block, leave as-is.
963          switch (AvailabilityState &State = It->second) {
964          case AvailabilityState::Unavailable:
965          case AvailabilityState::Available:
966            return; // Don't backpropagate further, continue processing worklist.
967          case AvailabilityState::SpeculativelyAvailable: // Fix it!
968            State = FixpointState;
969  #ifndef NDEBUG
970            assert(NewSpeculativelyAvailableBBs.erase(BB) &&
971                   "Found a speculatively available successor leftover?");
972  #endif
973            // Queue successors for further processing.
974            Worklist.append(succ_begin(BB), succ_end(BB));
975            return;
976          }
977        };
978  
979    if (UnavailableBB) {
980      // Okay, we have encountered an unavailable block.
981      // Mark speculatively available blocks reachable from UnavailableBB as
982      // unavailable as well. Paths are terminated when they reach blocks not in
983      // FullyAvailableBlocks or they are not marked as speculatively available.
984      Worklist.clear();
985      Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
986      while (!Worklist.empty())
987        MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
988                                           AvailabilityState::Unavailable);
989    }
990  
991  #ifndef NDEBUG
992    Worklist.clear();
993    for (BasicBlock *AvailableBB : AvailableBBs)
994      Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
995    while (!Worklist.empty())
996      MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
997                                         AvailabilityState::Available);
998  
999    assert(NewSpeculativelyAvailableBBs.empty() &&
1000           "Must have fixed all the new speculatively available blocks.");
1001  #endif
1002  
1003    return !UnavailableBB;
1004  }
1005  
1006  /// If the specified OldValue exists in ValuesPerBlock, replace its value with
1007  /// NewValue.
replaceValuesPerBlockEntry(SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,Value * OldValue,Value * NewValue)1008  static void replaceValuesPerBlockEntry(
1009      SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1010      Value *NewValue) {
1011    for (AvailableValueInBlock &V : ValuesPerBlock) {
1012      if (V.AV.Val == OldValue)
1013        V.AV.Val = NewValue;
1014      if (V.AV.isSelectValue()) {
1015        if (V.AV.V1 == OldValue)
1016          V.AV.V1 = NewValue;
1017        if (V.AV.V2 == OldValue)
1018          V.AV.V2 = NewValue;
1019      }
1020    }
1021  }
1022  
1023  /// Given a set of loads specified by ValuesPerBlock,
1024  /// construct SSA form, allowing us to eliminate Load.  This returns the value
1025  /// that should be used at Load's definition site.
1026  static Value *
ConstructSSAForLoadSet(LoadInst * Load,SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,GVNPass & gvn)1027  ConstructSSAForLoadSet(LoadInst *Load,
1028                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1029                         GVNPass &gvn) {
1030    // Check for the fully redundant, dominating load case.  In this case, we can
1031    // just use the dominating value directly.
1032    if (ValuesPerBlock.size() == 1 &&
1033        gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1034                                                 Load->getParent())) {
1035      assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1036             "Dead BB dominate this block");
1037      return ValuesPerBlock[0].MaterializeAdjustedValue(Load, gvn);
1038    }
1039  
1040    // Otherwise, we have to construct SSA form.
1041    SmallVector<PHINode*, 8> NewPHIs;
1042    SSAUpdater SSAUpdate(&NewPHIs);
1043    SSAUpdate.Initialize(Load->getType(), Load->getName());
1044  
1045    for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1046      BasicBlock *BB = AV.BB;
1047  
1048      if (AV.AV.isUndefValue())
1049        continue;
1050  
1051      if (SSAUpdate.HasValueForBlock(BB))
1052        continue;
1053  
1054      // If the value is the load that we will be eliminating, and the block it's
1055      // available in is the block that the load is in, then don't add it as
1056      // SSAUpdater will resolve the value to the relevant phi which may let it
1057      // avoid phi construction entirely if there's actually only one value.
1058      if (BB == Load->getParent() &&
1059          ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1060           (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1061        continue;
1062  
1063      SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load, gvn));
1064    }
1065  
1066    // Perform PHI construction.
1067    return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1068  }
1069  
MaterializeAdjustedValue(LoadInst * Load,Instruction * InsertPt,GVNPass & gvn) const1070  Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1071                                                  Instruction *InsertPt,
1072                                                  GVNPass &gvn) const {
1073    Value *Res;
1074    Type *LoadTy = Load->getType();
1075    const DataLayout &DL = Load->getDataLayout();
1076    if (isSimpleValue()) {
1077      Res = getSimpleValue();
1078      if (Res->getType() != LoadTy) {
1079        Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1080  
1081        LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1082                          << "  " << *getSimpleValue() << '\n'
1083                          << *Res << '\n'
1084                          << "\n\n\n");
1085      }
1086    } else if (isCoercedLoadValue()) {
1087      LoadInst *CoercedLoad = getCoercedLoadValue();
1088      if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1089        Res = CoercedLoad;
1090        combineMetadataForCSE(CoercedLoad, Load, false);
1091      } else {
1092        Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
1093        // We are adding a new user for this load, for which the original
1094        // metadata may not hold. Additionally, the new load may have a different
1095        // size and type, so their metadata cannot be combined in any
1096        // straightforward way.
1097        // Drop all metadata that is not known to cause immediate UB on violation,
1098        // unless the load has !noundef, in which case all metadata violations
1099        // will be promoted to UB.
1100        // TODO: We can combine noalias/alias.scope metadata here, because it is
1101        // independent of the load type.
1102        if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1103          CoercedLoad->dropUnknownNonDebugMetadata(
1104              {LLVMContext::MD_dereferenceable,
1105               LLVMContext::MD_dereferenceable_or_null,
1106               LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1107        LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1108                          << "  " << *getCoercedLoadValue() << '\n'
1109                          << *Res << '\n'
1110                          << "\n\n\n");
1111      }
1112    } else if (isMemIntrinValue()) {
1113      Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1114                                   InsertPt, DL);
1115      LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1116                        << "  " << *getMemIntrinValue() << '\n'
1117                        << *Res << '\n'
1118                        << "\n\n\n");
1119    } else if (isSelectValue()) {
1120      // Introduce a new value select for a load from an eligible pointer select.
1121      SelectInst *Sel = getSelectValue();
1122      assert(V1 && V2 && "both value operands of the select must be present");
1123      Res =
1124          SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
1125    } else {
1126      llvm_unreachable("Should not materialize value from dead block");
1127    }
1128    assert(Res && "failed to materialize?");
1129    return Res;
1130  }
1131  
isLifetimeStart(const Instruction * Inst)1132  static bool isLifetimeStart(const Instruction *Inst) {
1133    if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1134      return II->getIntrinsicID() == Intrinsic::lifetime_start;
1135    return false;
1136  }
1137  
1138  /// Assuming To can be reached from both From and Between, does Between lie on
1139  /// every path from From to To?
liesBetween(const Instruction * From,Instruction * Between,const Instruction * To,DominatorTree * DT)1140  static bool liesBetween(const Instruction *From, Instruction *Between,
1141                          const Instruction *To, DominatorTree *DT) {
1142    if (From->getParent() == Between->getParent())
1143      return DT->dominates(From, Between);
1144    SmallSet<BasicBlock *, 1> Exclusion;
1145    Exclusion.insert(Between->getParent());
1146    return !isPotentiallyReachable(From, To, &Exclusion, DT);
1147  }
1148  
1149  /// Try to locate the three instruction involved in a missed
1150  /// load-elimination case that is due to an intervening store.
reportMayClobberedLoad(LoadInst * Load,MemDepResult DepInfo,DominatorTree * DT,OptimizationRemarkEmitter * ORE)1151  static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1152                                     DominatorTree *DT,
1153                                     OptimizationRemarkEmitter *ORE) {
1154    using namespace ore;
1155  
1156    Instruction *OtherAccess = nullptr;
1157  
1158    OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1159    R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1160      << setExtraArgs();
1161  
1162    for (auto *U : Load->getPointerOperand()->users()) {
1163      if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1164        auto *I = cast<Instruction>(U);
1165        if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1166          // Use the most immediately dominating value
1167          if (OtherAccess) {
1168            if (DT->dominates(OtherAccess, I))
1169              OtherAccess = I;
1170            else
1171              assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1172          } else
1173            OtherAccess = I;
1174        }
1175      }
1176    }
1177  
1178    if (!OtherAccess) {
1179      // There is no dominating use, check if we can find a closest non-dominating
1180      // use that lies between any other potentially available use and Load.
1181      for (auto *U : Load->getPointerOperand()->users()) {
1182        if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1183          auto *I = cast<Instruction>(U);
1184          if (I->getFunction() == Load->getFunction() &&
1185              isPotentiallyReachable(I, Load, nullptr, DT)) {
1186            if (OtherAccess) {
1187              if (liesBetween(OtherAccess, I, Load, DT)) {
1188                OtherAccess = I;
1189              } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1190                // These uses are both partially available at Load were it not for
1191                // the clobber, but neither lies strictly after the other.
1192                OtherAccess = nullptr;
1193                break;
1194              } // else: keep current OtherAccess since it lies between U and Load
1195            } else {
1196              OtherAccess = I;
1197            }
1198          }
1199        }
1200      }
1201    }
1202  
1203    if (OtherAccess)
1204      R << " in favor of " << NV("OtherAccess", OtherAccess);
1205  
1206    R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1207  
1208    ORE->emit(R);
1209  }
1210  
1211  // Find non-clobbered value for Loc memory location in extended basic block
1212  // (chain of basic blocks with single predecessors) starting From instruction.
findDominatingValue(const MemoryLocation & Loc,Type * LoadTy,Instruction * From,AAResults * AA)1213  static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1214                                    Instruction *From, AAResults *AA) {
1215    uint32_t NumVisitedInsts = 0;
1216    BasicBlock *FromBB = From->getParent();
1217    BatchAAResults BatchAA(*AA);
1218    for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1219      for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1220           Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
1221        // Stop the search if limit is reached.
1222        if (++NumVisitedInsts > MaxNumVisitedInsts)
1223          return nullptr;
1224        if (isModSet(BatchAA.getModRefInfo(Inst, Loc)))
1225          return nullptr;
1226        if (auto *LI = dyn_cast<LoadInst>(Inst))
1227          if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1228            return LI;
1229      }
1230    return nullptr;
1231  }
1232  
1233  std::optional<AvailableValue>
AnalyzeLoadAvailability(LoadInst * Load,MemDepResult DepInfo,Value * Address)1234  GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1235                                   Value *Address) {
1236    assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1237    assert(DepInfo.isLocal() && "expected a local dependence");
1238  
1239    Instruction *DepInst = DepInfo.getInst();
1240  
1241    const DataLayout &DL = Load->getDataLayout();
1242    if (DepInfo.isClobber()) {
1243      // If the dependence is to a store that writes to a superset of the bits
1244      // read by the load, we can extract the bits we need for the load from the
1245      // stored value.
1246      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1247        // Can't forward from non-atomic to atomic without violating memory model.
1248        if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1249          int Offset =
1250              analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1251          if (Offset != -1)
1252            return AvailableValue::get(DepSI->getValueOperand(), Offset);
1253        }
1254      }
1255  
1256      // Check to see if we have something like this:
1257      //    load i32* P
1258      //    load i8* (P+1)
1259      // if we have this, replace the later with an extraction from the former.
1260      if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1261        // If this is a clobber and L is the first instruction in its block, then
1262        // we have the first instruction in the entry block.
1263        // Can't forward from non-atomic to atomic without violating memory model.
1264        if (DepLoad != Load && Address &&
1265            Load->isAtomic() <= DepLoad->isAtomic()) {
1266          Type *LoadType = Load->getType();
1267          int Offset = -1;
1268  
1269          // If MD reported clobber, check it was nested.
1270          if (DepInfo.isClobber() &&
1271              canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
1272            const auto ClobberOff = MD->getClobberOffset(DepLoad);
1273            // GVN has no deal with a negative offset.
1274            Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1275                         ? -1
1276                         : *ClobberOff;
1277          }
1278          if (Offset == -1)
1279            Offset =
1280                analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1281          if (Offset != -1)
1282            return AvailableValue::getLoad(DepLoad, Offset);
1283        }
1284      }
1285  
1286      // If the clobbering value is a memset/memcpy/memmove, see if we can
1287      // forward a value on from it.
1288      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1289        if (Address && !Load->isAtomic()) {
1290          int Offset = analyzeLoadFromClobberingMemInst(Load->getType(), Address,
1291                                                        DepMI, DL);
1292          if (Offset != -1)
1293            return AvailableValue::getMI(DepMI, Offset);
1294        }
1295      }
1296  
1297      // Nothing known about this clobber, have to be conservative
1298      LLVM_DEBUG(
1299          // fast print dep, using operator<< on instruction is too slow.
1300          dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1301          dbgs() << " is clobbered by " << *DepInst << '\n';);
1302      if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1303        reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1304  
1305      return std::nullopt;
1306    }
1307    assert(DepInfo.isDef() && "follows from above");
1308  
1309    // Loading the alloca -> undef.
1310    // Loading immediately after lifetime begin -> undef.
1311    if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1312      return AvailableValue::get(UndefValue::get(Load->getType()));
1313  
1314    if (Constant *InitVal =
1315            getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1316      return AvailableValue::get(InitVal);
1317  
1318    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1319      // Reject loads and stores that are to the same address but are of
1320      // different types if we have to. If the stored value is convertable to
1321      // the loaded value, we can reuse it.
1322      if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1323                                           DL))
1324        return std::nullopt;
1325  
1326      // Can't forward from non-atomic to atomic without violating memory model.
1327      if (S->isAtomic() < Load->isAtomic())
1328        return std::nullopt;
1329  
1330      return AvailableValue::get(S->getValueOperand());
1331    }
1332  
1333    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1334      // If the types mismatch and we can't handle it, reject reuse of the load.
1335      // If the stored value is larger or equal to the loaded value, we can reuse
1336      // it.
1337      if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL))
1338        return std::nullopt;
1339  
1340      // Can't forward from non-atomic to atomic without violating memory model.
1341      if (LD->isAtomic() < Load->isAtomic())
1342        return std::nullopt;
1343  
1344      return AvailableValue::getLoad(LD);
1345    }
1346  
1347    // Check if load with Addr dependent from select can be converted to select
1348    // between load values. There must be no instructions between the found
1349    // loads and DepInst that may clobber the loads.
1350    if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1351      assert(Sel->getType() == Load->getPointerOperandType());
1352      auto Loc = MemoryLocation::get(Load);
1353      Value *V1 =
1354          findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()),
1355                              Load->getType(), DepInst, getAliasAnalysis());
1356      if (!V1)
1357        return std::nullopt;
1358      Value *V2 =
1359          findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()),
1360                              Load->getType(), DepInst, getAliasAnalysis());
1361      if (!V2)
1362        return std::nullopt;
1363      return AvailableValue::getSelect(Sel, V1, V2);
1364    }
1365  
1366    // Unknown def - must be conservative
1367    LLVM_DEBUG(
1368        // fast print dep, using operator<< on instruction is too slow.
1369        dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1370        dbgs() << " has unknown def " << *DepInst << '\n';);
1371    return std::nullopt;
1372  }
1373  
AnalyzeLoadAvailability(LoadInst * Load,LoadDepVect & Deps,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1374  void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1375                                        AvailValInBlkVect &ValuesPerBlock,
1376                                        UnavailBlkVect &UnavailableBlocks) {
1377    // Filter out useless results (non-locals, etc).  Keep track of the blocks
1378    // where we have a value available in repl, also keep track of whether we see
1379    // dependencies that produce an unknown value for the load (such as a call
1380    // that could potentially clobber the load).
1381    for (const auto &Dep : Deps) {
1382      BasicBlock *DepBB = Dep.getBB();
1383      MemDepResult DepInfo = Dep.getResult();
1384  
1385      if (DeadBlocks.count(DepBB)) {
1386        // Dead dependent mem-op disguise as a load evaluating the same value
1387        // as the load in question.
1388        ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1389        continue;
1390      }
1391  
1392      if (!DepInfo.isLocal()) {
1393        UnavailableBlocks.push_back(DepBB);
1394        continue;
1395      }
1396  
1397      // The address being loaded in this non-local block may not be the same as
1398      // the pointer operand of the load if PHI translation occurs.  Make sure
1399      // to consider the right address.
1400      if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) {
1401        // subtlety: because we know this was a non-local dependency, we know
1402        // it's safe to materialize anywhere between the instruction within
1403        // DepInfo and the end of it's block.
1404        ValuesPerBlock.push_back(
1405            AvailableValueInBlock::get(DepBB, std::move(*AV)));
1406      } else {
1407        UnavailableBlocks.push_back(DepBB);
1408      }
1409    }
1410  
1411    assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1412           "post condition violation");
1413  }
1414  
1415  /// Given the following code, v1 is partially available on some edges, but not
1416  /// available on the edge from PredBB. This function tries to find if there is
1417  /// another identical load in the other successor of PredBB.
1418  ///
1419  ///      v0 = load %addr
1420  ///      br %LoadBB
1421  ///
1422  ///   LoadBB:
1423  ///      v1 = load %addr
1424  ///      ...
1425  ///
1426  ///   PredBB:
1427  ///      ...
1428  ///      br %cond, label %LoadBB, label %SuccBB
1429  ///
1430  ///   SuccBB:
1431  ///      v2 = load %addr
1432  ///      ...
1433  ///
findLoadToHoistIntoPred(BasicBlock * Pred,BasicBlock * LoadBB,LoadInst * Load)1434  LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1435                                             LoadInst *Load) {
1436    // For simplicity we handle a Pred has 2 successors only.
1437    auto *Term = Pred->getTerminator();
1438    if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1439      return nullptr;
1440    auto *SuccBB = Term->getSuccessor(0);
1441    if (SuccBB == LoadBB)
1442      SuccBB = Term->getSuccessor(1);
1443    if (!SuccBB->getSinglePredecessor())
1444      return nullptr;
1445  
1446    unsigned int NumInsts = MaxNumInsnsPerBlock;
1447    for (Instruction &Inst : *SuccBB) {
1448      if (Inst.isDebugOrPseudoInst())
1449        continue;
1450      if (--NumInsts == 0)
1451        return nullptr;
1452  
1453      if (!Inst.isIdenticalTo(Load))
1454        continue;
1455  
1456      MemDepResult Dep = MD->getDependency(&Inst);
1457      // If an identical load doesn't depends on any local instructions, it can
1458      // be safely moved to PredBB.
1459      // Also check for the implicit control flow instructions. See the comments
1460      // in PerformLoadPRE for details.
1461      if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1462        return cast<LoadInst>(&Inst);
1463  
1464      // Otherwise there is something in the same BB clobbers the memory, we can't
1465      // move this and later load to PredBB.
1466      return nullptr;
1467    }
1468  
1469    return nullptr;
1470  }
1471  
eliminatePartiallyRedundantLoad(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,MapVector<BasicBlock *,Value * > & AvailableLoads,MapVector<BasicBlock *,LoadInst * > * CriticalEdgePredAndLoad)1472  void GVNPass::eliminatePartiallyRedundantLoad(
1473      LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1474      MapVector<BasicBlock *, Value *> &AvailableLoads,
1475      MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1476    for (const auto &AvailableLoad : AvailableLoads) {
1477      BasicBlock *UnavailableBlock = AvailableLoad.first;
1478      Value *LoadPtr = AvailableLoad.second;
1479  
1480      auto *NewLoad = new LoadInst(
1481          Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1482          Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1483          UnavailableBlock->getTerminator()->getIterator());
1484      NewLoad->setDebugLoc(Load->getDebugLoc());
1485      if (MSSAU) {
1486        auto *NewAccess = MSSAU->createMemoryAccessInBB(
1487            NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1488        if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1489          MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1490        else
1491          MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1492      }
1493  
1494      // Transfer the old load's AA tags to the new load.
1495      AAMDNodes Tags = Load->getAAMetadata();
1496      if (Tags)
1497        NewLoad->setAAMetadata(Tags);
1498  
1499      if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1500        NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1501      if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1502        NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1503      if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1504        NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1505      if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1506        if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1507          NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1508  
1509      // We do not propagate the old load's debug location, because the new
1510      // load now lives in a different BB, and we want to avoid a jumpy line
1511      // table.
1512      // FIXME: How do we retain source locations without causing poor debugging
1513      // behavior?
1514  
1515      // Add the newly created load.
1516      ValuesPerBlock.push_back(
1517          AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1518      MD->invalidateCachedPointerInfo(LoadPtr);
1519      LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1520  
1521      // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1522      // load instruction with the new created load instruction.
1523      if (CriticalEdgePredAndLoad) {
1524        auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
1525        if (I != CriticalEdgePredAndLoad->end()) {
1526          ++NumPRELoadMoved2CEPred;
1527          ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1528          LoadInst *OldLoad = I->second;
1529          combineMetadataForCSE(NewLoad, OldLoad, false);
1530          OldLoad->replaceAllUsesWith(NewLoad);
1531          replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1532          if (uint32_t ValNo = VN.lookup(OldLoad, false))
1533            LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1534          VN.erase(OldLoad);
1535          removeInstruction(OldLoad);
1536        }
1537      }
1538    }
1539  
1540    // Perform PHI construction.
1541    Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1542    // ConstructSSAForLoadSet is responsible for combining metadata.
1543    ICF->removeUsersOf(Load);
1544    Load->replaceAllUsesWith(V);
1545    if (isa<PHINode>(V))
1546      V->takeName(Load);
1547    if (Instruction *I = dyn_cast<Instruction>(V))
1548      I->setDebugLoc(Load->getDebugLoc());
1549    if (V->getType()->isPtrOrPtrVectorTy())
1550      MD->invalidateCachedPointerInfo(V);
1551    markInstructionForDeletion(Load);
1552    ORE->emit([&]() {
1553      return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1554             << "load eliminated by PRE";
1555    });
1556  }
1557  
PerformLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1558  bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1559                               UnavailBlkVect &UnavailableBlocks) {
1560    // Okay, we have *some* definitions of the value.  This means that the value
1561    // is available in some of our (transitive) predecessors.  Lets think about
1562    // doing PRE of this load.  This will involve inserting a new load into the
1563    // predecessor when it's not available.  We could do this in general, but
1564    // prefer to not increase code size.  As such, we only do this when we know
1565    // that we only have to insert *one* load (which means we're basically moving
1566    // the load, not inserting a new one).
1567  
1568    SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1569                                          UnavailableBlocks.end());
1570  
1571    // Let's find the first basic block with more than one predecessor.  Walk
1572    // backwards through predecessors if needed.
1573    BasicBlock *LoadBB = Load->getParent();
1574    BasicBlock *TmpBB = LoadBB;
1575  
1576    // Check that there is no implicit control flow instructions above our load in
1577    // its block. If there is an instruction that doesn't always pass the
1578    // execution to the following instruction, then moving through it may become
1579    // invalid. For example:
1580    //
1581    // int arr[LEN];
1582    // int index = ???;
1583    // ...
1584    // guard(0 <= index && index < LEN);
1585    // use(arr[index]);
1586    //
1587    // It is illegal to move the array access to any point above the guard,
1588    // because if the index is out of bounds we should deoptimize rather than
1589    // access the array.
1590    // Check that there is no guard in this block above our instruction.
1591    bool MustEnsureSafetyOfSpeculativeExecution =
1592        ICF->isDominatedByICFIFromSameBlock(Load);
1593  
1594    while (TmpBB->getSinglePredecessor()) {
1595      TmpBB = TmpBB->getSinglePredecessor();
1596      if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1597        return false;
1598      if (Blockers.count(TmpBB))
1599        return false;
1600  
1601      // If any of these blocks has more than one successor (i.e. if the edge we
1602      // just traversed was critical), then there are other paths through this
1603      // block along which the load may not be anticipated.  Hoisting the load
1604      // above this block would be adding the load to execution paths along
1605      // which it was not previously executed.
1606      if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1607        return false;
1608  
1609      // Check that there is no implicit control flow in a block above.
1610      MustEnsureSafetyOfSpeculativeExecution =
1611          MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1612    }
1613  
1614    assert(TmpBB);
1615    LoadBB = TmpBB;
1616  
1617    // Check to see how many predecessors have the loaded value fully
1618    // available.
1619    MapVector<BasicBlock *, Value *> PredLoads;
1620    DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1621    for (const AvailableValueInBlock &AV : ValuesPerBlock)
1622      FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1623    for (BasicBlock *UnavailableBB : UnavailableBlocks)
1624      FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1625  
1626    // The edge from Pred to LoadBB is a critical edge will be splitted.
1627    SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1628    // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1629    // contains a load can be moved to Pred. This data structure maps the Pred to
1630    // the movable load.
1631    MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1632    for (BasicBlock *Pred : predecessors(LoadBB)) {
1633      // If any predecessor block is an EH pad that does not allow non-PHI
1634      // instructions before the terminator, we can't PRE the load.
1635      if (Pred->getTerminator()->isEHPad()) {
1636        LLVM_DEBUG(
1637            dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1638                   << Pred->getName() << "': " << *Load << '\n');
1639        return false;
1640      }
1641  
1642      if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1643        continue;
1644      }
1645  
1646      if (Pred->getTerminator()->getNumSuccessors() != 1) {
1647        if (isa<IndirectBrInst>(Pred->getTerminator())) {
1648          LLVM_DEBUG(
1649              dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1650                     << Pred->getName() << "': " << *Load << '\n');
1651          return false;
1652        }
1653  
1654        if (LoadBB->isEHPad()) {
1655          LLVM_DEBUG(
1656              dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1657                     << Pred->getName() << "': " << *Load << '\n');
1658          return false;
1659        }
1660  
1661        // Do not split backedge as it will break the canonical loop form.
1662        if (!isLoadPRESplitBackedgeEnabled())
1663          if (DT->dominates(LoadBB, Pred)) {
1664            LLVM_DEBUG(
1665                dbgs()
1666                << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1667                << Pred->getName() << "': " << *Load << '\n');
1668            return false;
1669          }
1670  
1671        if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1672          CriticalEdgePredAndLoad[Pred] = LI;
1673        else
1674          CriticalEdgePredSplit.push_back(Pred);
1675      } else {
1676        // Only add the predecessors that will not be split for now.
1677        PredLoads[Pred] = nullptr;
1678      }
1679    }
1680  
1681    // Decide whether PRE is profitable for this load.
1682    unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1683    unsigned NumUnavailablePreds = NumInsertPreds +
1684        CriticalEdgePredAndLoad.size();
1685    assert(NumUnavailablePreds != 0 &&
1686           "Fully available value should already be eliminated!");
1687    (void)NumUnavailablePreds;
1688  
1689    // If we need to insert new load in multiple predecessors, reject it.
1690    // FIXME: If we could restructure the CFG, we could make a common pred with
1691    // all the preds that don't have an available Load and insert a new load into
1692    // that one block.
1693    if (NumInsertPreds > 1)
1694        return false;
1695  
1696    // Now we know where we will insert load. We must ensure that it is safe
1697    // to speculatively execute the load at that points.
1698    if (MustEnsureSafetyOfSpeculativeExecution) {
1699      if (CriticalEdgePredSplit.size())
1700        if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
1701          return false;
1702      for (auto &PL : PredLoads)
1703        if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1704                                          DT))
1705          return false;
1706      for (auto &CEP : CriticalEdgePredAndLoad)
1707        if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1708                                          DT))
1709          return false;
1710    }
1711  
1712    // Split critical edges, and update the unavailable predecessors accordingly.
1713    for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1714      BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1715      assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1716      PredLoads[NewPred] = nullptr;
1717      LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1718                        << LoadBB->getName() << '\n');
1719    }
1720  
1721    for (auto &CEP : CriticalEdgePredAndLoad)
1722      PredLoads[CEP.first] = nullptr;
1723  
1724    // Check if the load can safely be moved to all the unavailable predecessors.
1725    bool CanDoPRE = true;
1726    const DataLayout &DL = Load->getDataLayout();
1727    SmallVector<Instruction*, 8> NewInsts;
1728    for (auto &PredLoad : PredLoads) {
1729      BasicBlock *UnavailablePred = PredLoad.first;
1730  
1731      // Do PHI translation to get its value in the predecessor if necessary.  The
1732      // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1733      // We do the translation for each edge we skipped by going from Load's block
1734      // to LoadBB, otherwise we might miss pieces needing translation.
1735  
1736      // If all preds have a single successor, then we know it is safe to insert
1737      // the load on the pred (?!?), so we can insert code to materialize the
1738      // pointer if it is not available.
1739      Value *LoadPtr = Load->getPointerOperand();
1740      BasicBlock *Cur = Load->getParent();
1741      while (Cur != LoadBB) {
1742        PHITransAddr Address(LoadPtr, DL, AC);
1743        LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1744                                                 *DT, NewInsts);
1745        if (!LoadPtr) {
1746          CanDoPRE = false;
1747          break;
1748        }
1749        Cur = Cur->getSinglePredecessor();
1750      }
1751  
1752      if (LoadPtr) {
1753        PHITransAddr Address(LoadPtr, DL, AC);
1754        LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1755                                                 NewInsts);
1756      }
1757      // If we couldn't find or insert a computation of this phi translated value,
1758      // we fail PRE.
1759      if (!LoadPtr) {
1760        LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1761                          << *Load->getPointerOperand() << "\n");
1762        CanDoPRE = false;
1763        break;
1764      }
1765  
1766      PredLoad.second = LoadPtr;
1767    }
1768  
1769    if (!CanDoPRE) {
1770      while (!NewInsts.empty()) {
1771        // Erase instructions generated by the failed PHI translation before
1772        // trying to number them. PHI translation might insert instructions
1773        // in basic blocks other than the current one, and we delete them
1774        // directly, as markInstructionForDeletion only allows removing from the
1775        // current basic block.
1776        NewInsts.pop_back_val()->eraseFromParent();
1777      }
1778      // HINT: Don't revert the edge-splitting as following transformation may
1779      // also need to split these critical edges.
1780      return !CriticalEdgePredSplit.empty();
1781    }
1782  
1783    // Okay, we can eliminate this load by inserting a reload in the predecessor
1784    // and using PHI construction to get the value in the other predecessors, do
1785    // it.
1786    LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1787    LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1788                                             << " INSTS: " << *NewInsts.back()
1789                                             << '\n');
1790  
1791    // Assign value numbers to the new instructions.
1792    for (Instruction *I : NewInsts) {
1793      // Instructions that have been inserted in predecessor(s) to materialize
1794      // the load address do not retain their original debug locations. Doing
1795      // so could lead to confusing (but correct) source attributions.
1796      I->updateLocationAfterHoist();
1797  
1798      // FIXME: We really _ought_ to insert these value numbers into their
1799      // parent's availability map.  However, in doing so, we risk getting into
1800      // ordering issues.  If a block hasn't been processed yet, we would be
1801      // marking a value as AVAIL-IN, which isn't what we intend.
1802      VN.lookupOrAdd(I);
1803    }
1804  
1805    eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1806                                    &CriticalEdgePredAndLoad);
1807    ++NumPRELoad;
1808    return true;
1809  }
1810  
performLoopLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1811  bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1812                                   AvailValInBlkVect &ValuesPerBlock,
1813                                   UnavailBlkVect &UnavailableBlocks) {
1814    const Loop *L = LI->getLoopFor(Load->getParent());
1815    // TODO: Generalize to other loop blocks that dominate the latch.
1816    if (!L || L->getHeader() != Load->getParent())
1817      return false;
1818  
1819    BasicBlock *Preheader = L->getLoopPreheader();
1820    BasicBlock *Latch = L->getLoopLatch();
1821    if (!Preheader || !Latch)
1822      return false;
1823  
1824    Value *LoadPtr = Load->getPointerOperand();
1825    // Must be available in preheader.
1826    if (!L->isLoopInvariant(LoadPtr))
1827      return false;
1828  
1829    // We plan to hoist the load to preheader without introducing a new fault.
1830    // In order to do it, we need to prove that we cannot side-exit the loop
1831    // once loop header is first entered before execution of the load.
1832    if (ICF->isDominatedByICFIFromSameBlock(Load))
1833      return false;
1834  
1835    BasicBlock *LoopBlock = nullptr;
1836    for (auto *Blocker : UnavailableBlocks) {
1837      // Blockers from outside the loop are handled in preheader.
1838      if (!L->contains(Blocker))
1839        continue;
1840  
1841      // Only allow one loop block. Loop header is not less frequently executed
1842      // than each loop block, and likely it is much more frequently executed. But
1843      // in case of multiple loop blocks, we need extra information (such as block
1844      // frequency info) to understand whether it is profitable to PRE into
1845      // multiple loop blocks.
1846      if (LoopBlock)
1847        return false;
1848  
1849      // Do not sink into inner loops. This may be non-profitable.
1850      if (L != LI->getLoopFor(Blocker))
1851        return false;
1852  
1853      // Blocks that dominate the latch execute on every single iteration, maybe
1854      // except the last one. So PREing into these blocks doesn't make much sense
1855      // in most cases. But the blocks that do not necessarily execute on each
1856      // iteration are sometimes much colder than the header, and this is when
1857      // PRE is potentially profitable.
1858      if (DT->dominates(Blocker, Latch))
1859        return false;
1860  
1861      // Make sure that the terminator itself doesn't clobber.
1862      if (Blocker->getTerminator()->mayWriteToMemory())
1863        return false;
1864  
1865      LoopBlock = Blocker;
1866    }
1867  
1868    if (!LoopBlock)
1869      return false;
1870  
1871    // Make sure the memory at this pointer cannot be freed, therefore we can
1872    // safely reload from it after clobber.
1873    if (LoadPtr->canBeFreed())
1874      return false;
1875  
1876    // TODO: Support critical edge splitting if blocker has more than 1 successor.
1877    MapVector<BasicBlock *, Value *> AvailableLoads;
1878    AvailableLoads[LoopBlock] = LoadPtr;
1879    AvailableLoads[Preheader] = LoadPtr;
1880  
1881    LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1882    eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1883                                    /*CriticalEdgePredAndLoad*/ nullptr);
1884    ++NumPRELoopLoad;
1885    return true;
1886  }
1887  
reportLoadElim(LoadInst * Load,Value * AvailableValue,OptimizationRemarkEmitter * ORE)1888  static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1889                             OptimizationRemarkEmitter *ORE) {
1890    using namespace ore;
1891  
1892    ORE->emit([&]() {
1893      return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1894             << "load of type " << NV("Type", Load->getType()) << " eliminated"
1895             << setExtraArgs() << " in favor of "
1896             << NV("InfavorOfValue", AvailableValue);
1897    });
1898  }
1899  
1900  /// Attempt to eliminate a load whose dependencies are
1901  /// non-local by performing PHI construction.
processNonLocalLoad(LoadInst * Load)1902  bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1903    // non-local speculations are not allowed under asan.
1904    if (Load->getParent()->getParent()->hasFnAttribute(
1905            Attribute::SanitizeAddress) ||
1906        Load->getParent()->getParent()->hasFnAttribute(
1907            Attribute::SanitizeHWAddress))
1908      return false;
1909  
1910    // Step 1: Find the non-local dependencies of the load.
1911    LoadDepVect Deps;
1912    MD->getNonLocalPointerDependency(Load, Deps);
1913  
1914    // If we had to process more than one hundred blocks to find the
1915    // dependencies, this load isn't worth worrying about.  Optimizing
1916    // it will be too expensive.
1917    unsigned NumDeps = Deps.size();
1918    if (NumDeps > MaxNumDeps)
1919      return false;
1920  
1921    // If we had a phi translation failure, we'll have a single entry which is a
1922    // clobber in the current block.  Reject this early.
1923    if (NumDeps == 1 &&
1924        !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1925      LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
1926                 dbgs() << " has unknown dependencies\n";);
1927      return false;
1928    }
1929  
1930    bool Changed = false;
1931    // If this load follows a GEP, see if we can PRE the indices before analyzing.
1932    if (GetElementPtrInst *GEP =
1933            dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
1934      for (Use &U : GEP->indices())
1935        if (Instruction *I = dyn_cast<Instruction>(U.get()))
1936          Changed |= performScalarPRE(I);
1937    }
1938  
1939    // Step 2: Analyze the availability of the load
1940    AvailValInBlkVect ValuesPerBlock;
1941    UnavailBlkVect UnavailableBlocks;
1942    AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
1943  
1944    // If we have no predecessors that produce a known value for this load, exit
1945    // early.
1946    if (ValuesPerBlock.empty())
1947      return Changed;
1948  
1949    // Step 3: Eliminate fully redundancy.
1950    //
1951    // If all of the instructions we depend on produce a known value for this
1952    // load, then it is fully redundant and we can use PHI insertion to compute
1953    // its value.  Insert PHIs and remove the fully redundant value now.
1954    if (UnavailableBlocks.empty()) {
1955      LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
1956  
1957      // Perform PHI construction.
1958      Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1959      // ConstructSSAForLoadSet is responsible for combining metadata.
1960      ICF->removeUsersOf(Load);
1961      Load->replaceAllUsesWith(V);
1962  
1963      if (isa<PHINode>(V))
1964        V->takeName(Load);
1965      if (Instruction *I = dyn_cast<Instruction>(V))
1966        // If instruction I has debug info, then we should not update it.
1967        // Also, if I has a null DebugLoc, then it is still potentially incorrect
1968        // to propagate Load's DebugLoc because Load may not post-dominate I.
1969        if (Load->getDebugLoc() && Load->getParent() == I->getParent())
1970          I->setDebugLoc(Load->getDebugLoc());
1971      if (V->getType()->isPtrOrPtrVectorTy())
1972        MD->invalidateCachedPointerInfo(V);
1973      markInstructionForDeletion(Load);
1974      ++NumGVNLoad;
1975      reportLoadElim(Load, V, ORE);
1976      return true;
1977    }
1978  
1979    // Step 4: Eliminate partial redundancy.
1980    if (!isPREEnabled() || !isLoadPREEnabled())
1981      return Changed;
1982    if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
1983      return Changed;
1984  
1985    if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
1986        PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
1987      return true;
1988  
1989    return Changed;
1990  }
1991  
impliesEquivalanceIfTrue(CmpInst * Cmp)1992  static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
1993    if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
1994      return true;
1995  
1996    // Floating point comparisons can be equal, but not equivalent.  Cases:
1997    // NaNs for unordered operators
1998    // +0.0 vs 0.0 for all operators
1999    if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
2000        (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
2001         Cmp->getFastMathFlags().noNaNs())) {
2002        Value *LHS = Cmp->getOperand(0);
2003        Value *RHS = Cmp->getOperand(1);
2004        // If we can prove either side non-zero, then equality must imply
2005        // equivalence.
2006        // FIXME: We should do this optimization if 'no signed zeros' is
2007        // applicable via an instruction-level fast-math-flag or some other
2008        // indicator that relaxed FP semantics are being used.
2009        if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2010          return true;
2011        if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2012          return true;
2013        // TODO: Handle vector floating point constants
2014    }
2015    return false;
2016  }
2017  
impliesEquivalanceIfFalse(CmpInst * Cmp)2018  static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
2019    if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
2020      return true;
2021  
2022    // Floating point comparisons can be equal, but not equivelent.  Cases:
2023    // NaNs for unordered operators
2024    // +0.0 vs 0.0 for all operators
2025    if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
2026         Cmp->getFastMathFlags().noNaNs()) ||
2027        Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
2028        Value *LHS = Cmp->getOperand(0);
2029        Value *RHS = Cmp->getOperand(1);
2030        // If we can prove either side non-zero, then equality must imply
2031        // equivalence.
2032        // FIXME: We should do this optimization if 'no signed zeros' is
2033        // applicable via an instruction-level fast-math-flag or some other
2034        // indicator that relaxed FP semantics are being used.
2035        if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2036          return true;
2037        if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2038          return true;
2039        // TODO: Handle vector floating point constants
2040    }
2041    return false;
2042  }
2043  
2044  
hasUsersIn(Value * V,BasicBlock * BB)2045  static bool hasUsersIn(Value *V, BasicBlock *BB) {
2046    return llvm::any_of(V->users(), [BB](User *U) {
2047      auto *I = dyn_cast<Instruction>(U);
2048      return I && I->getParent() == BB;
2049    });
2050  }
2051  
processAssumeIntrinsic(AssumeInst * IntrinsicI)2052  bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2053    Value *V = IntrinsicI->getArgOperand(0);
2054  
2055    if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2056      if (Cond->isZero()) {
2057        Type *Int8Ty = Type::getInt8Ty(V->getContext());
2058        Type *PtrTy = PointerType::get(V->getContext(), 0);
2059        // Insert a new store to null instruction before the load to indicate that
2060        // this code is not reachable.  FIXME: We could insert unreachable
2061        // instruction directly because we can modify the CFG.
2062        auto *NewS =
2063            new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
2064                          IntrinsicI->getIterator());
2065        if (MSSAU) {
2066          const MemoryUseOrDef *FirstNonDom = nullptr;
2067          const auto *AL =
2068              MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2069  
2070          // If there are accesses in the current basic block, find the first one
2071          // that does not come before NewS. The new memory access is inserted
2072          // after the found access or before the terminator if no such access is
2073          // found.
2074          if (AL) {
2075            for (const auto &Acc : *AL) {
2076              if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2077                if (!Current->getMemoryInst()->comesBefore(NewS)) {
2078                  FirstNonDom = Current;
2079                  break;
2080                }
2081            }
2082          }
2083  
2084          auto *NewDef =
2085              FirstNonDom ? MSSAU->createMemoryAccessBefore(
2086                                NewS, nullptr,
2087                                const_cast<MemoryUseOrDef *>(FirstNonDom))
2088                          : MSSAU->createMemoryAccessInBB(
2089                                NewS, nullptr,
2090                                NewS->getParent(), MemorySSA::BeforeTerminator);
2091  
2092          MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2093        }
2094      }
2095      if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2096        markInstructionForDeletion(IntrinsicI);
2097        return true;
2098      }
2099      return false;
2100    }
2101  
2102    if (isa<Constant>(V)) {
2103      // If it's not false, and constant, it must evaluate to true. This means our
2104      // assume is assume(true), and thus, pointless, and we don't want to do
2105      // anything more here.
2106      return false;
2107    }
2108  
2109    Constant *True = ConstantInt::getTrue(V->getContext());
2110    bool Changed = false;
2111  
2112    for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
2113      BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
2114  
2115      // This property is only true in dominated successors, propagateEquality
2116      // will check dominance for us.
2117      Changed |= propagateEquality(V, True, Edge, false);
2118    }
2119  
2120    // We can replace assume value with true, which covers cases like this:
2121    // call void @llvm.assume(i1 %cmp)
2122    // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2123    ReplaceOperandsWithMap[V] = True;
2124  
2125    // Similarly, after assume(!NotV) we know that NotV == false.
2126    Value *NotV;
2127    if (match(V, m_Not(m_Value(NotV))))
2128      ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
2129  
2130    // If we find an equality fact, canonicalize all dominated uses in this block
2131    // to one of the two values.  We heuristically choice the "oldest" of the
2132    // two where age is determined by value number. (Note that propagateEquality
2133    // above handles the cross block case.)
2134    //
2135    // Key case to cover are:
2136    // 1)
2137    // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2138    // call void @llvm.assume(i1 %cmp)
2139    // ret float %0 ; will change it to ret float 3.000000e+00
2140    // 2)
2141    // %load = load float, float* %addr
2142    // %cmp = fcmp oeq float %load, %0
2143    // call void @llvm.assume(i1 %cmp)
2144    // ret float %load ; will change it to ret float %0
2145    if (auto *CmpI = dyn_cast<CmpInst>(V)) {
2146      if (impliesEquivalanceIfTrue(CmpI)) {
2147        Value *CmpLHS = CmpI->getOperand(0);
2148        Value *CmpRHS = CmpI->getOperand(1);
2149        // Heuristically pick the better replacement -- the choice of heuristic
2150        // isn't terribly important here, but the fact we canonicalize on some
2151        // replacement is for exposing other simplifications.
2152        // TODO: pull this out as a helper function and reuse w/existing
2153        // (slightly different) logic.
2154        if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
2155          std::swap(CmpLHS, CmpRHS);
2156        if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
2157          std::swap(CmpLHS, CmpRHS);
2158        if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
2159            (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
2160          // Move the 'oldest' value to the right-hand side, using the value
2161          // number as a proxy for age.
2162          uint32_t LVN = VN.lookupOrAdd(CmpLHS);
2163          uint32_t RVN = VN.lookupOrAdd(CmpRHS);
2164          if (LVN < RVN)
2165            std::swap(CmpLHS, CmpRHS);
2166        }
2167  
2168        // Handle degenerate case where we either haven't pruned a dead path or a
2169        // removed a trivial assume yet.
2170        if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
2171          return Changed;
2172  
2173        LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2174                   << *CmpLHS << " with "
2175                   << *CmpRHS << " in block "
2176                   << IntrinsicI->getParent()->getName() << "\n");
2177  
2178  
2179        // Setup the replacement map - this handles uses within the same block
2180        if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
2181          ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
2182  
2183        // NOTE: The non-block local cases are handled by the call to
2184        // propagateEquality above; this block is just about handling the block
2185        // local cases.  TODO: There's a bunch of logic in propagateEqualiy which
2186        // isn't duplicated for the block local case, can we share it somehow?
2187      }
2188    }
2189    return Changed;
2190  }
2191  
patchAndReplaceAllUsesWith(Instruction * I,Value * Repl)2192  static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2193    patchReplacementInstruction(I, Repl);
2194    I->replaceAllUsesWith(Repl);
2195  }
2196  
2197  /// Attempt to eliminate a load, first by eliminating it
2198  /// locally, and then attempting non-local elimination if that fails.
processLoad(LoadInst * L)2199  bool GVNPass::processLoad(LoadInst *L) {
2200    if (!MD)
2201      return false;
2202  
2203    // This code hasn't been audited for ordered or volatile memory access
2204    if (!L->isUnordered())
2205      return false;
2206  
2207    if (L->use_empty()) {
2208      markInstructionForDeletion(L);
2209      return true;
2210    }
2211  
2212    // ... to a pointer that has been loaded from before...
2213    MemDepResult Dep = MD->getDependency(L);
2214  
2215    // If it is defined in another block, try harder.
2216    if (Dep.isNonLocal())
2217      return processNonLocalLoad(L);
2218  
2219    // Only handle the local case below
2220    if (!Dep.isLocal()) {
2221      // This might be a NonFuncLocal or an Unknown
2222      LLVM_DEBUG(
2223          // fast print dep, using operator<< on instruction is too slow.
2224          dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2225          dbgs() << " has unknown dependence\n";);
2226      return false;
2227    }
2228  
2229    auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand());
2230    if (!AV)
2231      return false;
2232  
2233    Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
2234  
2235    // MaterializeAdjustedValue is responsible for combining metadata.
2236    ICF->removeUsersOf(L);
2237    L->replaceAllUsesWith(AvailableValue);
2238    markInstructionForDeletion(L);
2239    if (MSSAU)
2240      MSSAU->removeMemoryAccess(L);
2241    ++NumGVNLoad;
2242    reportLoadElim(L, AvailableValue, ORE);
2243    // Tell MDA to reexamine the reused pointer since we might have more
2244    // information after forwarding it.
2245    if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2246      MD->invalidateCachedPointerInfo(AvailableValue);
2247    return true;
2248  }
2249  
2250  /// Return a pair the first field showing the value number of \p Exp and the
2251  /// second field showing whether it is a value number newly created.
2252  std::pair<uint32_t, bool>
assignExpNewValueNum(Expression & Exp)2253  GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2254    uint32_t &e = expressionNumbering[Exp];
2255    bool CreateNewValNum = !e;
2256    if (CreateNewValNum) {
2257      Expressions.push_back(Exp);
2258      if (ExprIdx.size() < nextValueNumber + 1)
2259        ExprIdx.resize(nextValueNumber * 2);
2260      e = nextValueNumber;
2261      ExprIdx[nextValueNumber++] = nextExprNumber++;
2262    }
2263    return {e, CreateNewValNum};
2264  }
2265  
2266  /// Return whether all the values related with the same \p num are
2267  /// defined in \p BB.
areAllValsInBB(uint32_t Num,const BasicBlock * BB,GVNPass & Gvn)2268  bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2269                                           GVNPass &Gvn) {
2270    return all_of(
2271        Gvn.LeaderTable.getLeaders(Num),
2272        [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2273  }
2274  
2275  /// Wrap phiTranslateImpl to provide caching functionality.
phiTranslate(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2276  uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2277                                             const BasicBlock *PhiBlock,
2278                                             uint32_t Num, GVNPass &Gvn) {
2279    auto FindRes = PhiTranslateTable.find({Num, Pred});
2280    if (FindRes != PhiTranslateTable.end())
2281      return FindRes->second;
2282    uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
2283    PhiTranslateTable.insert({{Num, Pred}, NewNum});
2284    return NewNum;
2285  }
2286  
2287  // Return true if the value number \p Num and NewNum have equal value.
2288  // Return false if the result is unknown.
areCallValsEqual(uint32_t Num,uint32_t NewNum,const BasicBlock * Pred,const BasicBlock * PhiBlock,GVNPass & Gvn)2289  bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2290                                             const BasicBlock *Pred,
2291                                             const BasicBlock *PhiBlock,
2292                                             GVNPass &Gvn) {
2293    CallInst *Call = nullptr;
2294    auto Leaders = Gvn.LeaderTable.getLeaders(Num);
2295    for (const auto &Entry : Leaders) {
2296      Call = dyn_cast<CallInst>(Entry.Val);
2297      if (Call && Call->getParent() == PhiBlock)
2298        break;
2299    }
2300  
2301    if (AA->doesNotAccessMemory(Call))
2302      return true;
2303  
2304    if (!MD || !AA->onlyReadsMemory(Call))
2305      return false;
2306  
2307    MemDepResult local_dep = MD->getDependency(Call);
2308    if (!local_dep.isNonLocal())
2309      return false;
2310  
2311    const MemoryDependenceResults::NonLocalDepInfo &deps =
2312        MD->getNonLocalCallDependency(Call);
2313  
2314    // Check to see if the Call has no function local clobber.
2315    for (const NonLocalDepEntry &D : deps) {
2316      if (D.getResult().isNonFuncLocal())
2317        return true;
2318    }
2319    return false;
2320  }
2321  
2322  /// Translate value number \p Num using phis, so that it has the values of
2323  /// the phis in BB.
phiTranslateImpl(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2324  uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2325                                                 const BasicBlock *PhiBlock,
2326                                                 uint32_t Num, GVNPass &Gvn) {
2327    if (PHINode *PN = NumberingPhi[Num]) {
2328      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
2329        if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
2330          if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
2331            return TransVal;
2332      }
2333      return Num;
2334    }
2335  
2336    // If there is any value related with Num is defined in a BB other than
2337    // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2338    // a backedge. We can do an early exit in that case to save compile time.
2339    if (!areAllValsInBB(Num, PhiBlock, Gvn))
2340      return Num;
2341  
2342    if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2343      return Num;
2344    Expression Exp = Expressions[ExprIdx[Num]];
2345  
2346    for (unsigned i = 0; i < Exp.varargs.size(); i++) {
2347      // For InsertValue and ExtractValue, some varargs are index numbers
2348      // instead of value numbers. Those index numbers should not be
2349      // translated.
2350      if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
2351          (i > 0 && Exp.opcode == Instruction::ExtractValue) ||
2352          (i > 1 && Exp.opcode == Instruction::ShuffleVector))
2353        continue;
2354      Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
2355    }
2356  
2357    if (Exp.commutative) {
2358      assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
2359      if (Exp.varargs[0] > Exp.varargs[1]) {
2360        std::swap(Exp.varargs[0], Exp.varargs[1]);
2361        uint32_t Opcode = Exp.opcode >> 8;
2362        if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2363          Exp.opcode = (Opcode << 8) |
2364                       CmpInst::getSwappedPredicate(
2365                           static_cast<CmpInst::Predicate>(Exp.opcode & 255));
2366      }
2367    }
2368  
2369    if (uint32_t NewNum = expressionNumbering[Exp]) {
2370      if (Exp.opcode == Instruction::Call && NewNum != Num)
2371        return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num;
2372      return NewNum;
2373    }
2374    return Num;
2375  }
2376  
2377  /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2378  /// again.
eraseTranslateCacheEntry(uint32_t Num,const BasicBlock & CurrBlock)2379  void GVNPass::ValueTable::eraseTranslateCacheEntry(
2380      uint32_t Num, const BasicBlock &CurrBlock) {
2381    for (const BasicBlock *Pred : predecessors(&CurrBlock))
2382      PhiTranslateTable.erase({Num, Pred});
2383  }
2384  
2385  // In order to find a leader for a given value number at a
2386  // specific basic block, we first obtain the list of all Values for that number,
2387  // and then scan the list to find one whose block dominates the block in
2388  // question.  This is fast because dominator tree queries consist of only
2389  // a few comparisons of DFS numbers.
findLeader(const BasicBlock * BB,uint32_t num)2390  Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
2391    auto Leaders = LeaderTable.getLeaders(num);
2392    if (Leaders.empty())
2393      return nullptr;
2394  
2395    Value *Val = nullptr;
2396    for (const auto &Entry : Leaders) {
2397      if (DT->dominates(Entry.BB, BB)) {
2398        Val = Entry.Val;
2399        if (isa<Constant>(Val))
2400          return Val;
2401      }
2402    }
2403  
2404    return Val;
2405  }
2406  
2407  /// There is an edge from 'Src' to 'Dst'.  Return
2408  /// true if every path from the entry block to 'Dst' passes via this edge.  In
2409  /// particular 'Dst' must not be reachable via another edge from 'Src'.
isOnlyReachableViaThisEdge(const BasicBlockEdge & E,DominatorTree * DT)2410  static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2411                                         DominatorTree *DT) {
2412    // While in theory it is interesting to consider the case in which Dst has
2413    // more than one predecessor, because Dst might be part of a loop which is
2414    // only reachable from Src, in practice it is pointless since at the time
2415    // GVN runs all such loops have preheaders, which means that Dst will have
2416    // been changed to have only one predecessor, namely Src.
2417    const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2418    assert((!Pred || Pred == E.getStart()) &&
2419           "No edge between these basic blocks!");
2420    return Pred != nullptr;
2421  }
2422  
assignBlockRPONumber(Function & F)2423  void GVNPass::assignBlockRPONumber(Function &F) {
2424    BlockRPONumber.clear();
2425    uint32_t NextBlockNumber = 1;
2426    ReversePostOrderTraversal<Function *> RPOT(&F);
2427    for (BasicBlock *BB : RPOT)
2428      BlockRPONumber[BB] = NextBlockNumber++;
2429    InvalidBlockRPONumbers = false;
2430  }
2431  
replaceOperandsForInBlockEquality(Instruction * Instr) const2432  bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
2433    bool Changed = false;
2434    for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
2435      Value *Operand = Instr->getOperand(OpNum);
2436      auto it = ReplaceOperandsWithMap.find(Operand);
2437      if (it != ReplaceOperandsWithMap.end()) {
2438        LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
2439                          << *it->second << " in instruction " << *Instr << '\n');
2440        Instr->setOperand(OpNum, it->second);
2441        Changed = true;
2442      }
2443    }
2444    return Changed;
2445  }
2446  
2447  /// The given values are known to be equal in every block
2448  /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
2449  /// 'RHS' everywhere in the scope.  Returns whether a change was made.
2450  /// If DominatesByEdge is false, then it means that we will propagate the RHS
2451  /// value starting from the end of Root.Start.
propagateEquality(Value * LHS,Value * RHS,const BasicBlockEdge & Root,bool DominatesByEdge)2452  bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
2453                                  const BasicBlockEdge &Root,
2454                                  bool DominatesByEdge) {
2455    SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2456    Worklist.push_back(std::make_pair(LHS, RHS));
2457    bool Changed = false;
2458    // For speed, compute a conservative fast approximation to
2459    // DT->dominates(Root, Root.getEnd());
2460    const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
2461  
2462    while (!Worklist.empty()) {
2463      std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2464      LHS = Item.first; RHS = Item.second;
2465  
2466      if (LHS == RHS)
2467        continue;
2468      assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2469  
2470      // Don't try to propagate equalities between constants.
2471      if (isa<Constant>(LHS) && isa<Constant>(RHS))
2472        continue;
2473  
2474      // Prefer a constant on the right-hand side, or an Argument if no constants.
2475      if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
2476        std::swap(LHS, RHS);
2477      assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2478      const DataLayout &DL =
2479          isa<Argument>(LHS)
2480              ? cast<Argument>(LHS)->getParent()->getDataLayout()
2481              : cast<Instruction>(LHS)->getDataLayout();
2482  
2483      // If there is no obvious reason to prefer the left-hand side over the
2484      // right-hand side, ensure the longest lived term is on the right-hand side,
2485      // so the shortest lived term will be replaced by the longest lived.
2486      // This tends to expose more simplifications.
2487      uint32_t LVN = VN.lookupOrAdd(LHS);
2488      if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
2489          (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
2490        // Move the 'oldest' value to the right-hand side, using the value number
2491        // as a proxy for age.
2492        uint32_t RVN = VN.lookupOrAdd(RHS);
2493        if (LVN < RVN) {
2494          std::swap(LHS, RHS);
2495          LVN = RVN;
2496        }
2497      }
2498  
2499      // If value numbering later sees that an instruction in the scope is equal
2500      // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
2501      // the invariant that instructions only occur in the leader table for their
2502      // own value number (this is used by removeFromLeaderTable), do not do this
2503      // if RHS is an instruction (if an instruction in the scope is morphed into
2504      // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2505      // using the leader table is about compiling faster, not optimizing better).
2506      // The leader table only tracks basic blocks, not edges. Only add to if we
2507      // have the simple case where the edge dominates the end.
2508      if (RootDominatesEnd && !isa<Instruction>(RHS) &&
2509          canReplacePointersIfEqual(LHS, RHS, DL))
2510        LeaderTable.insert(LVN, RHS, Root.getEnd());
2511  
2512      // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
2513      // LHS always has at least one use that is not dominated by Root, this will
2514      // never do anything if LHS has only one use.
2515      if (!LHS->hasOneUse()) {
2516        // Create a callback that captures the DL.
2517        auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2518          return canReplacePointersInUseIfEqual(U, To, DL);
2519        };
2520        unsigned NumReplacements =
2521            DominatesByEdge
2522                ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
2523                                             canReplacePointersCallBack)
2524                : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
2525                                             canReplacePointersCallBack);
2526  
2527        if (NumReplacements > 0) {
2528          Changed = true;
2529          NumGVNEqProp += NumReplacements;
2530          // Cached information for anything that uses LHS will be invalid.
2531          if (MD)
2532            MD->invalidateCachedPointerInfo(LHS);
2533        }
2534      }
2535  
2536      // Now try to deduce additional equalities from this one. For example, if
2537      // the known equality was "(A != B)" == "false" then it follows that A and B
2538      // are equal in the scope. Only boolean equalities with an explicit true or
2539      // false RHS are currently supported.
2540      if (!RHS->getType()->isIntegerTy(1))
2541        // Not a boolean equality - bail out.
2542        continue;
2543      ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
2544      if (!CI)
2545        // RHS neither 'true' nor 'false' - bail out.
2546        continue;
2547      // Whether RHS equals 'true'.  Otherwise it equals 'false'.
2548      bool isKnownTrue = CI->isMinusOne();
2549      bool isKnownFalse = !isKnownTrue;
2550  
2551      // If "A && B" is known true then both A and B are known true.  If "A || B"
2552      // is known false then both A and B are known false.
2553      Value *A, *B;
2554      if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
2555          (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
2556        Worklist.push_back(std::make_pair(A, RHS));
2557        Worklist.push_back(std::make_pair(B, RHS));
2558        continue;
2559      }
2560  
2561      // If we are propagating an equality like "(A == B)" == "true" then also
2562      // propagate the equality A == B.  When propagating a comparison such as
2563      // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2564      if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
2565        Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
2566  
2567        // If "A == B" is known true, or "A != B" is known false, then replace
2568        // A with B everywhere in the scope.  For floating point operations, we
2569        // have to be careful since equality does not always imply equivalance.
2570        if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
2571            (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
2572          Worklist.push_back(std::make_pair(Op0, Op1));
2573  
2574        // If "A >= B" is known true, replace "A < B" with false everywhere.
2575        CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2576        Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
2577        // Since we don't have the instruction "A < B" immediately to hand, work
2578        // out the value number that it would have and use that to find an
2579        // appropriate instruction (if any).
2580        uint32_t NextNum = VN.getNextUnusedValueNumber();
2581        uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
2582        // If the number we were assigned was brand new then there is no point in
2583        // looking for an instruction realizing it: there cannot be one!
2584        if (Num < NextNum) {
2585          Value *NotCmp = findLeader(Root.getEnd(), Num);
2586          if (NotCmp && isa<Instruction>(NotCmp)) {
2587            unsigned NumReplacements =
2588                DominatesByEdge
2589                    ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
2590                    : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
2591                                               Root.getStart());
2592            Changed |= NumReplacements > 0;
2593            NumGVNEqProp += NumReplacements;
2594            // Cached information for anything that uses NotCmp will be invalid.
2595            if (MD)
2596              MD->invalidateCachedPointerInfo(NotCmp);
2597          }
2598        }
2599        // Ensure that any instruction in scope that gets the "A < B" value number
2600        // is replaced with false.
2601        // The leader table only tracks basic blocks, not edges. Only add to if we
2602        // have the simple case where the edge dominates the end.
2603        if (RootDominatesEnd)
2604          LeaderTable.insert(Num, NotVal, Root.getEnd());
2605  
2606        continue;
2607      }
2608    }
2609  
2610    return Changed;
2611  }
2612  
2613  /// When calculating availability, handle an instruction
2614  /// by inserting it into the appropriate sets
processInstruction(Instruction * I)2615  bool GVNPass::processInstruction(Instruction *I) {
2616    // Ignore dbg info intrinsics.
2617    if (isa<DbgInfoIntrinsic>(I))
2618      return false;
2619  
2620    // If the instruction can be easily simplified then do so now in preference
2621    // to value numbering it.  Value numbering often exposes redundancies, for
2622    // example if it determines that %y is equal to %x then the instruction
2623    // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2624    const DataLayout &DL = I->getDataLayout();
2625    if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
2626      bool Changed = false;
2627      if (!I->use_empty()) {
2628        // Simplification can cause a special instruction to become not special.
2629        // For example, devirtualization to a willreturn function.
2630        ICF->removeUsersOf(I);
2631        I->replaceAllUsesWith(V);
2632        Changed = true;
2633      }
2634      if (isInstructionTriviallyDead(I, TLI)) {
2635        markInstructionForDeletion(I);
2636        Changed = true;
2637      }
2638      if (Changed) {
2639        if (MD && V->getType()->isPtrOrPtrVectorTy())
2640          MD->invalidateCachedPointerInfo(V);
2641        ++NumGVNSimpl;
2642        return true;
2643      }
2644    }
2645  
2646    if (auto *Assume = dyn_cast<AssumeInst>(I))
2647      return processAssumeIntrinsic(Assume);
2648  
2649    if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
2650      if (processLoad(Load))
2651        return true;
2652  
2653      unsigned Num = VN.lookupOrAdd(Load);
2654      LeaderTable.insert(Num, Load, Load->getParent());
2655      return false;
2656    }
2657  
2658    // For conditional branches, we can perform simple conditional propagation on
2659    // the condition value itself.
2660    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
2661      if (!BI->isConditional())
2662        return false;
2663  
2664      if (isa<Constant>(BI->getCondition()))
2665        return processFoldableCondBr(BI);
2666  
2667      Value *BranchCond = BI->getCondition();
2668      BasicBlock *TrueSucc = BI->getSuccessor(0);
2669      BasicBlock *FalseSucc = BI->getSuccessor(1);
2670      // Avoid multiple edges early.
2671      if (TrueSucc == FalseSucc)
2672        return false;
2673  
2674      BasicBlock *Parent = BI->getParent();
2675      bool Changed = false;
2676  
2677      Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
2678      BasicBlockEdge TrueE(Parent, TrueSucc);
2679      Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
2680  
2681      Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
2682      BasicBlockEdge FalseE(Parent, FalseSucc);
2683      Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
2684  
2685      return Changed;
2686    }
2687  
2688    // For switches, propagate the case values into the case destinations.
2689    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
2690      Value *SwitchCond = SI->getCondition();
2691      BasicBlock *Parent = SI->getParent();
2692      bool Changed = false;
2693  
2694      // Remember how many outgoing edges there are to every successor.
2695      SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2696      for (BasicBlock *Succ : successors(Parent))
2697        ++SwitchEdges[Succ];
2698  
2699      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
2700           i != e; ++i) {
2701        BasicBlock *Dst = i->getCaseSuccessor();
2702        // If there is only a single edge, propagate the case value into it.
2703        if (SwitchEdges.lookup(Dst) == 1) {
2704          BasicBlockEdge E(Parent, Dst);
2705          Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
2706        }
2707      }
2708      return Changed;
2709    }
2710  
2711    // Instructions with void type don't return a value, so there's
2712    // no point in trying to find redundancies in them.
2713    if (I->getType()->isVoidTy())
2714      return false;
2715  
2716    uint32_t NextNum = VN.getNextUnusedValueNumber();
2717    unsigned Num = VN.lookupOrAdd(I);
2718  
2719    // Allocations are always uniquely numbered, so we can save time and memory
2720    // by fast failing them.
2721    if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
2722      LeaderTable.insert(Num, I, I->getParent());
2723      return false;
2724    }
2725  
2726    // If the number we were assigned was a brand new VN, then we don't
2727    // need to do a lookup to see if the number already exists
2728    // somewhere in the domtree: it can't!
2729    if (Num >= NextNum) {
2730      LeaderTable.insert(Num, I, I->getParent());
2731      return false;
2732    }
2733  
2734    // Perform fast-path value-number based elimination of values inherited from
2735    // dominators.
2736    Value *Repl = findLeader(I->getParent(), Num);
2737    if (!Repl) {
2738      // Failure, just remember this instance for future use.
2739      LeaderTable.insert(Num, I, I->getParent());
2740      return false;
2741    }
2742  
2743    if (Repl == I) {
2744      // If I was the result of a shortcut PRE, it might already be in the table
2745      // and the best replacement for itself. Nothing to do.
2746      return false;
2747    }
2748  
2749    // Remove it!
2750    patchAndReplaceAllUsesWith(I, Repl);
2751    if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2752      MD->invalidateCachedPointerInfo(Repl);
2753    markInstructionForDeletion(I);
2754    return true;
2755  }
2756  
2757  /// runOnFunction - This is the main transformation entry point for a function.
runImpl(Function & F,AssumptionCache & RunAC,DominatorTree & RunDT,const TargetLibraryInfo & RunTLI,AAResults & RunAA,MemoryDependenceResults * RunMD,LoopInfo & LI,OptimizationRemarkEmitter * RunORE,MemorySSA * MSSA)2758  bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2759                        const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2760                        MemoryDependenceResults *RunMD, LoopInfo &LI,
2761                        OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2762    AC = &RunAC;
2763    DT = &RunDT;
2764    VN.setDomTree(DT);
2765    TLI = &RunTLI;
2766    VN.setAliasAnalysis(&RunAA);
2767    MD = RunMD;
2768    ImplicitControlFlowTracking ImplicitCFT;
2769    ICF = &ImplicitCFT;
2770    this->LI = &LI;
2771    VN.setMemDep(MD);
2772    ORE = RunORE;
2773    InvalidBlockRPONumbers = true;
2774    MemorySSAUpdater Updater(MSSA);
2775    MSSAU = MSSA ? &Updater : nullptr;
2776  
2777    bool Changed = false;
2778    bool ShouldContinue = true;
2779  
2780    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2781    // Merge unconditional branches, allowing PRE to catch more
2782    // optimization opportunities.
2783    for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
2784      bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
2785      if (removedBlock)
2786        ++NumGVNBlocks;
2787  
2788      Changed |= removedBlock;
2789    }
2790    DTU.flush();
2791  
2792    unsigned Iteration = 0;
2793    while (ShouldContinue) {
2794      LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2795      (void) Iteration;
2796      ShouldContinue = iterateOnFunction(F);
2797      Changed |= ShouldContinue;
2798      ++Iteration;
2799    }
2800  
2801    if (isPREEnabled()) {
2802      // Fabricate val-num for dead-code in order to suppress assertion in
2803      // performPRE().
2804      assignValNumForDeadCode();
2805      bool PREChanged = true;
2806      while (PREChanged) {
2807        PREChanged = performPRE(F);
2808        Changed |= PREChanged;
2809      }
2810    }
2811  
2812    // FIXME: Should perform GVN again after PRE does something.  PRE can move
2813    // computations into blocks where they become fully redundant.  Note that
2814    // we can't do this until PRE's critical edge splitting updates memdep.
2815    // Actually, when this happens, we should just fully integrate PRE into GVN.
2816  
2817    cleanupGlobalSets();
2818    // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2819    // iteration.
2820    DeadBlocks.clear();
2821  
2822    if (MSSA && VerifyMemorySSA)
2823      MSSA->verifyMemorySSA();
2824  
2825    return Changed;
2826  }
2827  
processBlock(BasicBlock * BB)2828  bool GVNPass::processBlock(BasicBlock *BB) {
2829    // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2830    // (and incrementing BI before processing an instruction).
2831    assert(InstrsToErase.empty() &&
2832           "We expect InstrsToErase to be empty across iterations");
2833    if (DeadBlocks.count(BB))
2834      return false;
2835  
2836    // Clearing map before every BB because it can be used only for single BB.
2837    ReplaceOperandsWithMap.clear();
2838    bool ChangedFunction = false;
2839  
2840    // Since we may not have visited the input blocks of the phis, we can't
2841    // use our normal hash approach for phis.  Instead, simply look for
2842    // obvious duplicates.  The first pass of GVN will tend to create
2843    // identical phis, and the second or later passes can eliminate them.
2844    SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2845    ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
2846    for (PHINode *PN : PHINodesToRemove) {
2847      VN.erase(PN);
2848      removeInstruction(PN);
2849    }
2850  
2851    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2852         BI != BE;) {
2853      if (!ReplaceOperandsWithMap.empty())
2854        ChangedFunction |= replaceOperandsForInBlockEquality(&*BI);
2855      ChangedFunction |= processInstruction(&*BI);
2856  
2857      if (InstrsToErase.empty()) {
2858        ++BI;
2859        continue;
2860      }
2861  
2862      // If we need some instructions deleted, do it now.
2863      NumGVNInstr += InstrsToErase.size();
2864  
2865      // Avoid iterator invalidation.
2866      bool AtStart = BI == BB->begin();
2867      if (!AtStart)
2868        --BI;
2869  
2870      for (auto *I : InstrsToErase) {
2871        assert(I->getParent() == BB && "Removing instruction from wrong block?");
2872        LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
2873        salvageKnowledge(I, AC);
2874        salvageDebugInfo(*I);
2875        removeInstruction(I);
2876      }
2877      InstrsToErase.clear();
2878  
2879      if (AtStart)
2880        BI = BB->begin();
2881      else
2882        ++BI;
2883    }
2884  
2885    return ChangedFunction;
2886  }
2887  
2888  // Instantiate an expression in a predecessor that lacked it.
performScalarPREInsertion(Instruction * Instr,BasicBlock * Pred,BasicBlock * Curr,unsigned int ValNo)2889  bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2890                                          BasicBlock *Curr, unsigned int ValNo) {
2891    // Because we are going top-down through the block, all value numbers
2892    // will be available in the predecessor by the time we need them.  Any
2893    // that weren't originally present will have been instantiated earlier
2894    // in this loop.
2895    bool success = true;
2896    for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2897      Value *Op = Instr->getOperand(i);
2898      if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2899        continue;
2900      // This could be a newly inserted instruction, in which case, we won't
2901      // find a value number, and should give up before we hurt ourselves.
2902      // FIXME: Rewrite the infrastructure to let it easier to value number
2903      // and process newly inserted instructions.
2904      if (!VN.exists(Op)) {
2905        success = false;
2906        break;
2907      }
2908      uint32_t TValNo =
2909          VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2910      if (Value *V = findLeader(Pred, TValNo)) {
2911        Instr->setOperand(i, V);
2912      } else {
2913        success = false;
2914        break;
2915      }
2916    }
2917  
2918    // Fail out if we encounter an operand that is not available in
2919    // the PRE predecessor.  This is typically because of loads which
2920    // are not value numbered precisely.
2921    if (!success)
2922      return false;
2923  
2924    Instr->insertBefore(Pred->getTerminator());
2925    Instr->setName(Instr->getName() + ".pre");
2926    Instr->setDebugLoc(Instr->getDebugLoc());
2927  
2928    ICF->insertInstructionTo(Instr, Pred);
2929  
2930    unsigned Num = VN.lookupOrAdd(Instr);
2931    VN.add(Instr, Num);
2932  
2933    // Update the availability map to include the new instruction.
2934    LeaderTable.insert(Num, Instr, Pred);
2935    return true;
2936  }
2937  
performScalarPRE(Instruction * CurInst)2938  bool GVNPass::performScalarPRE(Instruction *CurInst) {
2939    if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
2940        isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2941        CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2942        isa<DbgInfoIntrinsic>(CurInst))
2943      return false;
2944  
2945    // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2946    // sinking the compare again, and it would force the code generator to
2947    // move the i1 from processor flags or predicate registers into a general
2948    // purpose register.
2949    if (isa<CmpInst>(CurInst))
2950      return false;
2951  
2952    // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2953    // sinking the addressing mode computation back to its uses. Extending the
2954    // GEP's live range increases the register pressure, and therefore it can
2955    // introduce unnecessary spills.
2956    //
2957    // This doesn't prevent Load PRE. PHI translation will make the GEP available
2958    // to the load by moving it to the predecessor block if necessary.
2959    if (isa<GetElementPtrInst>(CurInst))
2960      return false;
2961  
2962    if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
2963      // We don't currently value number ANY inline asm calls.
2964      if (CallB->isInlineAsm())
2965        return false;
2966    }
2967  
2968    uint32_t ValNo = VN.lookup(CurInst);
2969  
2970    // Look for the predecessors for PRE opportunities.  We're
2971    // only trying to solve the basic diamond case, where
2972    // a value is computed in the successor and one predecessor,
2973    // but not the other.  We also explicitly disallow cases
2974    // where the successor is its own predecessor, because they're
2975    // more complicated to get right.
2976    unsigned NumWith = 0;
2977    unsigned NumWithout = 0;
2978    BasicBlock *PREPred = nullptr;
2979    BasicBlock *CurrentBlock = CurInst->getParent();
2980  
2981    // Update the RPO numbers for this function.
2982    if (InvalidBlockRPONumbers)
2983      assignBlockRPONumber(*CurrentBlock->getParent());
2984  
2985    SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2986    for (BasicBlock *P : predecessors(CurrentBlock)) {
2987      // We're not interested in PRE where blocks with predecessors that are
2988      // not reachable.
2989      if (!DT->isReachableFromEntry(P)) {
2990        NumWithout = 2;
2991        break;
2992      }
2993      // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2994      assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
2995             "Invalid BlockRPONumber map.");
2996      if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
2997        NumWithout = 2;
2998        break;
2999      }
3000  
3001      uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3002      Value *predV = findLeader(P, TValNo);
3003      if (!predV) {
3004        predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3005        PREPred = P;
3006        ++NumWithout;
3007      } else if (predV == CurInst) {
3008        /* CurInst dominates this predecessor. */
3009        NumWithout = 2;
3010        break;
3011      } else {
3012        predMap.push_back(std::make_pair(predV, P));
3013        ++NumWith;
3014      }
3015    }
3016  
3017    // Don't do PRE when it might increase code size, i.e. when
3018    // we would need to insert instructions in more than one pred.
3019    if (NumWithout > 1 || NumWith == 0)
3020      return false;
3021  
3022    // We may have a case where all predecessors have the instruction,
3023    // and we just need to insert a phi node. Otherwise, perform
3024    // insertion.
3025    Instruction *PREInstr = nullptr;
3026  
3027    if (NumWithout != 0) {
3028      if (!isSafeToSpeculativelyExecute(CurInst)) {
3029        // It is only valid to insert a new instruction if the current instruction
3030        // is always executed. An instruction with implicit control flow could
3031        // prevent us from doing it. If we cannot speculate the execution, then
3032        // PRE should be prohibited.
3033        if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3034          return false;
3035      }
3036  
3037      // Don't do PRE across indirect branch.
3038      if (isa<IndirectBrInst>(PREPred->getTerminator()))
3039        return false;
3040  
3041      // We can't do PRE safely on a critical edge, so instead we schedule
3042      // the edge to be split and perform the PRE the next time we iterate
3043      // on the function.
3044      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3045      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3046        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3047        return false;
3048      }
3049      // We need to insert somewhere, so let's give it a shot
3050      PREInstr = CurInst->clone();
3051      if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3052        // If we failed insertion, make sure we remove the instruction.
3053  #ifndef NDEBUG
3054        verifyRemoved(PREInstr);
3055  #endif
3056        PREInstr->deleteValue();
3057        return false;
3058      }
3059    }
3060  
3061    // Either we should have filled in the PRE instruction, or we should
3062    // not have needed insertions.
3063    assert(PREInstr != nullptr || NumWithout == 0);
3064  
3065    ++NumGVNPRE;
3066  
3067    // Create a PHI to make the value available in this block.
3068    PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(),
3069                                   CurInst->getName() + ".pre-phi");
3070    Phi->insertBefore(CurrentBlock->begin());
3071    for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
3072      if (Value *V = predMap[i].first) {
3073        // If we use an existing value in this phi, we have to patch the original
3074        // value because the phi will be used to replace a later value.
3075        patchReplacementInstruction(CurInst, V);
3076        Phi->addIncoming(V, predMap[i].second);
3077      } else
3078        Phi->addIncoming(PREInstr, PREPred);
3079    }
3080  
3081    VN.add(Phi, ValNo);
3082    // After creating a new PHI for ValNo, the phi translate result for ValNo will
3083    // be changed, so erase the related stale entries in phi translate cache.
3084    VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3085    LeaderTable.insert(ValNo, Phi, CurrentBlock);
3086    Phi->setDebugLoc(CurInst->getDebugLoc());
3087    CurInst->replaceAllUsesWith(Phi);
3088    if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3089      MD->invalidateCachedPointerInfo(Phi);
3090    VN.erase(CurInst);
3091    LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3092  
3093    LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3094    removeInstruction(CurInst);
3095    ++NumGVNInstr;
3096  
3097    return true;
3098  }
3099  
3100  /// Perform a purely local form of PRE that looks for diamond
3101  /// control flow patterns and attempts to perform simple PRE at the join point.
performPRE(Function & F)3102  bool GVNPass::performPRE(Function &F) {
3103    bool Changed = false;
3104    for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3105      // Nothing to PRE in the entry block.
3106      if (CurrentBlock == &F.getEntryBlock())
3107        continue;
3108  
3109      // Don't perform PRE on an EH pad.
3110      if (CurrentBlock->isEHPad())
3111        continue;
3112  
3113      for (BasicBlock::iterator BI = CurrentBlock->begin(),
3114                                BE = CurrentBlock->end();
3115           BI != BE;) {
3116        Instruction *CurInst = &*BI++;
3117        Changed |= performScalarPRE(CurInst);
3118      }
3119    }
3120  
3121    if (splitCriticalEdges())
3122      Changed = true;
3123  
3124    return Changed;
3125  }
3126  
3127  /// Split the critical edge connecting the given two blocks, and return
3128  /// the block inserted to the critical edge.
splitCriticalEdges(BasicBlock * Pred,BasicBlock * Succ)3129  BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3130    // GVN does not require loop-simplify, do not try to preserve it if it is not
3131    // possible.
3132    BasicBlock *BB = SplitCriticalEdge(
3133        Pred, Succ,
3134        CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3135    if (BB) {
3136      if (MD)
3137        MD->invalidateCachedPredecessors();
3138      InvalidBlockRPONumbers = true;
3139    }
3140    return BB;
3141  }
3142  
3143  /// Split critical edges found during the previous
3144  /// iteration that may enable further optimization.
splitCriticalEdges()3145  bool GVNPass::splitCriticalEdges() {
3146    if (toSplit.empty())
3147      return false;
3148  
3149    bool Changed = false;
3150    do {
3151      std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
3152      Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3153                                   CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3154                 nullptr;
3155    } while (!toSplit.empty());
3156    if (Changed) {
3157      if (MD)
3158        MD->invalidateCachedPredecessors();
3159      InvalidBlockRPONumbers = true;
3160    }
3161    return Changed;
3162  }
3163  
3164  /// Executes one iteration of GVN
iterateOnFunction(Function & F)3165  bool GVNPass::iterateOnFunction(Function &F) {
3166    cleanupGlobalSets();
3167  
3168    // Top-down walk of the dominator tree
3169    bool Changed = false;
3170    // Needed for value numbering with phi construction to work.
3171    // RPOT walks the graph in its constructor and will not be invalidated during
3172    // processBlock.
3173    ReversePostOrderTraversal<Function *> RPOT(&F);
3174  
3175    for (BasicBlock *BB : RPOT)
3176      Changed |= processBlock(BB);
3177  
3178    return Changed;
3179  }
3180  
cleanupGlobalSets()3181  void GVNPass::cleanupGlobalSets() {
3182    VN.clear();
3183    LeaderTable.clear();
3184    BlockRPONumber.clear();
3185    ICF->clear();
3186    InvalidBlockRPONumbers = true;
3187  }
3188  
removeInstruction(Instruction * I)3189  void GVNPass::removeInstruction(Instruction *I) {
3190    if (MD) MD->removeInstruction(I);
3191    if (MSSAU)
3192      MSSAU->removeMemoryAccess(I);
3193  #ifndef NDEBUG
3194    verifyRemoved(I);
3195  #endif
3196    ICF->removeInstruction(I);
3197    I->eraseFromParent();
3198  }
3199  
3200  /// Verify that the specified instruction does not occur in our
3201  /// internal data structures.
verifyRemoved(const Instruction * Inst) const3202  void GVNPass::verifyRemoved(const Instruction *Inst) const {
3203    VN.verifyRemoved(Inst);
3204    LeaderTable.verifyRemoved(Inst);
3205  }
3206  
3207  /// BB is declared dead, which implied other blocks become dead as well. This
3208  /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3209  /// live successors, update their phi nodes by replacing the operands
3210  /// corresponding to dead blocks with UndefVal.
addDeadBlock(BasicBlock * BB)3211  void GVNPass::addDeadBlock(BasicBlock *BB) {
3212    SmallVector<BasicBlock *, 4> NewDead;
3213    SmallSetVector<BasicBlock *, 4> DF;
3214  
3215    NewDead.push_back(BB);
3216    while (!NewDead.empty()) {
3217      BasicBlock *D = NewDead.pop_back_val();
3218      if (DeadBlocks.count(D))
3219        continue;
3220  
3221      // All blocks dominated by D are dead.
3222      SmallVector<BasicBlock *, 8> Dom;
3223      DT->getDescendants(D, Dom);
3224      DeadBlocks.insert(Dom.begin(), Dom.end());
3225  
3226      // Figure out the dominance-frontier(D).
3227      for (BasicBlock *B : Dom) {
3228        for (BasicBlock *S : successors(B)) {
3229          if (DeadBlocks.count(S))
3230            continue;
3231  
3232          bool AllPredDead = true;
3233          for (BasicBlock *P : predecessors(S))
3234            if (!DeadBlocks.count(P)) {
3235              AllPredDead = false;
3236              break;
3237            }
3238  
3239          if (!AllPredDead) {
3240            // S could be proved dead later on. That is why we don't update phi
3241            // operands at this moment.
3242            DF.insert(S);
3243          } else {
3244            // While S is not dominated by D, it is dead by now. This could take
3245            // place if S already have a dead predecessor before D is declared
3246            // dead.
3247            NewDead.push_back(S);
3248          }
3249        }
3250      }
3251    }
3252  
3253    // For the dead blocks' live successors, update their phi nodes by replacing
3254    // the operands corresponding to dead blocks with UndefVal.
3255    for (BasicBlock *B : DF) {
3256      if (DeadBlocks.count(B))
3257        continue;
3258  
3259      // First, split the critical edges. This might also create additional blocks
3260      // to preserve LoopSimplify form and adjust edges accordingly.
3261      SmallVector<BasicBlock *, 4> Preds(predecessors(B));
3262      for (BasicBlock *P : Preds) {
3263        if (!DeadBlocks.count(P))
3264          continue;
3265  
3266        if (llvm::is_contained(successors(P), B) &&
3267            isCriticalEdge(P->getTerminator(), B)) {
3268          if (BasicBlock *S = splitCriticalEdges(P, B))
3269            DeadBlocks.insert(P = S);
3270        }
3271      }
3272  
3273      // Now poison the incoming values from the dead predecessors.
3274      for (BasicBlock *P : predecessors(B)) {
3275        if (!DeadBlocks.count(P))
3276          continue;
3277        for (PHINode &Phi : B->phis()) {
3278          Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3279          if (MD)
3280            MD->invalidateCachedPointerInfo(&Phi);
3281        }
3282      }
3283    }
3284  }
3285  
3286  // If the given branch is recognized as a foldable branch (i.e. conditional
3287  // branch with constant condition), it will perform following analyses and
3288  // transformation.
3289  //  1) If the dead out-coming edge is a critical-edge, split it. Let
3290  //     R be the target of the dead out-coming edge.
3291  //  1) Identify the set of dead blocks implied by the branch's dead outcoming
3292  //     edge. The result of this step will be {X| X is dominated by R}
3293  //  2) Identify those blocks which haves at least one dead predecessor. The
3294  //     result of this step will be dominance-frontier(R).
3295  //  3) Update the PHIs in DF(R) by replacing the operands corresponding to
3296  //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3297  //
3298  // Return true iff *NEW* dead code are found.
processFoldableCondBr(BranchInst * BI)3299  bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3300    if (!BI || BI->isUnconditional())
3301      return false;
3302  
3303    // If a branch has two identical successors, we cannot declare either dead.
3304    if (BI->getSuccessor(0) == BI->getSuccessor(1))
3305      return false;
3306  
3307    ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3308    if (!Cond)
3309      return false;
3310  
3311    BasicBlock *DeadRoot =
3312        Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3313    if (DeadBlocks.count(DeadRoot))
3314      return false;
3315  
3316    if (!DeadRoot->getSinglePredecessor())
3317      DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3318  
3319    addDeadBlock(DeadRoot);
3320    return true;
3321  }
3322  
3323  // performPRE() will trigger assert if it comes across an instruction without
3324  // associated val-num. As it normally has far more live instructions than dead
3325  // instructions, it makes more sense just to "fabricate" a val-number for the
3326  // dead code than checking if instruction involved is dead or not.
assignValNumForDeadCode()3327  void GVNPass::assignValNumForDeadCode() {
3328    for (BasicBlock *BB : DeadBlocks) {
3329      for (Instruction &Inst : *BB) {
3330        unsigned ValNum = VN.lookupOrAdd(&Inst);
3331        LeaderTable.insert(ValNum, &Inst, BB);
3332      }
3333    }
3334  }
3335  
3336  class llvm::gvn::GVNLegacyPass : public FunctionPass {
3337  public:
3338    static char ID; // Pass identification, replacement for typeid
3339  
GVNLegacyPass(bool NoMemDepAnalysis=!GVNEnableMemDep)3340    explicit GVNLegacyPass(bool NoMemDepAnalysis = !GVNEnableMemDep)
3341        : FunctionPass(ID), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis)) {
3342      initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3343    }
3344  
runOnFunction(Function & F)3345    bool runOnFunction(Function &F) override {
3346      if (skipFunction(F))
3347        return false;
3348  
3349      auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3350      return Impl.runImpl(
3351          F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3352          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3353          getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3354          getAnalysis<AAResultsWrapperPass>().getAAResults(),
3355          Impl.isMemDepEnabled()
3356              ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3357              : nullptr,
3358          getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3359          &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3360          MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3361    }
3362  
getAnalysisUsage(AnalysisUsage & AU) const3363    void getAnalysisUsage(AnalysisUsage &AU) const override {
3364      AU.addRequired<AssumptionCacheTracker>();
3365      AU.addRequired<DominatorTreeWrapperPass>();
3366      AU.addRequired<TargetLibraryInfoWrapperPass>();
3367      AU.addRequired<LoopInfoWrapperPass>();
3368      if (Impl.isMemDepEnabled())
3369        AU.addRequired<MemoryDependenceWrapperPass>();
3370      AU.addRequired<AAResultsWrapperPass>();
3371      AU.addPreserved<DominatorTreeWrapperPass>();
3372      AU.addPreserved<GlobalsAAWrapperPass>();
3373      AU.addPreserved<TargetLibraryInfoWrapperPass>();
3374      AU.addPreserved<LoopInfoWrapperPass>();
3375      AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3376      AU.addPreserved<MemorySSAWrapperPass>();
3377    }
3378  
3379  private:
3380    GVNPass Impl;
3381  };
3382  
3383  char GVNLegacyPass::ID = 0;
3384  
3385  INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)3386  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3387  INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3388  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3389  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3390  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3391  INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3392  INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3393  INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3394  
3395  // The public interface to this file...
3396  FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
3397    return new GVNLegacyPass(NoMemDepAnalysis);
3398  }
3399