1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global value numbering to eliminate fully redundant
10 // instructions. It also performs simple dead load elimination.
11 //
12 // Note that this pass does the value numbering itself; it does not use the
13 // ValueNumbering analysis passes.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/Transforms/Scalar/GVN.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/AliasAnalysis.h"
29 #include "llvm/Analysis/AssumeBundleQueries.h"
30 #include "llvm/Analysis/AssumptionCache.h"
31 #include "llvm/Analysis/CFG.h"
32 #include "llvm/Analysis/DomTreeUpdater.h"
33 #include "llvm/Analysis/GlobalsModRef.h"
34 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
35 #include "llvm/Analysis/InstructionSimplify.h"
36 #include "llvm/Analysis/Loads.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/MemoryBuiltins.h"
39 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
40 #include "llvm/Analysis/MemorySSA.h"
41 #include "llvm/Analysis/MemorySSAUpdater.h"
42 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
43 #include "llvm/Analysis/PHITransAddr.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/ValueTracking.h"
46 #include "llvm/IR/Attributes.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/Constants.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/Module.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/Value.h"
65 #include "llvm/InitializePasses.h"
66 #include "llvm/Pass.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/raw_ostream.h"
72 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
75 #include "llvm/Transforms/Utils/SSAUpdater.h"
76 #include "llvm/Transforms/Utils/VNCoercion.h"
77 #include <algorithm>
78 #include <cassert>
79 #include <cstdint>
80 #include <optional>
81 #include <utility>
82
83 using namespace llvm;
84 using namespace llvm::gvn;
85 using namespace llvm::VNCoercion;
86 using namespace PatternMatch;
87
88 #define DEBUG_TYPE "gvn"
89
90 STATISTIC(NumGVNInstr, "Number of instructions deleted");
91 STATISTIC(NumGVNLoad, "Number of loads deleted");
92 STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93 STATISTIC(NumGVNBlocks, "Number of blocks merged");
94 STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95 STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96 STATISTIC(NumPRELoad, "Number of loads PRE'd");
97 STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98 STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101 STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104 STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108 static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden);
109 static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true));
110 static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111 cl::init(true));
112 static cl::opt<bool>
113 GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 cl::init(false));
115 static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true));
116
117 static cl::opt<uint32_t> MaxNumDeps(
118 "gvn-max-num-deps", cl::Hidden, cl::init(100),
119 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
120
121 // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
122 static cl::opt<uint32_t> MaxBBSpeculations(
123 "gvn-max-block-speculations", cl::Hidden, cl::init(600),
124 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
125 "into) when deducing if a value is fully available or not in GVN "
126 "(default = 600)"));
127
128 static cl::opt<uint32_t> MaxNumVisitedInsts(
129 "gvn-max-num-visited-insts", cl::Hidden, cl::init(100),
130 cl::desc("Max number of visited instructions when trying to find "
131 "dominating value of select dependency (default = 100)"));
132
133 static cl::opt<uint32_t> MaxNumInsnsPerBlock(
134 "gvn-max-num-insns", cl::Hidden, cl::init(100),
135 cl::desc("Max number of instructions to scan in each basic block in GVN "
136 "(default = 100)"));
137
138 struct llvm::GVNPass::Expression {
139 uint32_t opcode;
140 bool commutative = false;
141 // The type is not necessarily the result type of the expression, it may be
142 // any additional type needed to disambiguate the expression.
143 Type *type = nullptr;
144 SmallVector<uint32_t, 4> varargs;
145
Expressionllvm::GVNPass::Expression146 Expression(uint32_t o = ~2U) : opcode(o) {}
147
operator ==llvm::GVNPass::Expression148 bool operator==(const Expression &other) const {
149 if (opcode != other.opcode)
150 return false;
151 if (opcode == ~0U || opcode == ~1U)
152 return true;
153 if (type != other.type)
154 return false;
155 if (varargs != other.varargs)
156 return false;
157 return true;
158 }
159
hash_value(const Expression & Value)160 friend hash_code hash_value(const Expression &Value) {
161 return hash_combine(
162 Value.opcode, Value.type,
163 hash_combine_range(Value.varargs.begin(), Value.varargs.end()));
164 }
165 };
166
167 namespace llvm {
168
169 template <> struct DenseMapInfo<GVNPass::Expression> {
getEmptyKeyllvm::DenseMapInfo170 static inline GVNPass::Expression getEmptyKey() { return ~0U; }
getTombstoneKeyllvm::DenseMapInfo171 static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
172
getHashValuellvm::DenseMapInfo173 static unsigned getHashValue(const GVNPass::Expression &e) {
174 using llvm::hash_value;
175
176 return static_cast<unsigned>(hash_value(e));
177 }
178
isEqualllvm::DenseMapInfo179 static bool isEqual(const GVNPass::Expression &LHS,
180 const GVNPass::Expression &RHS) {
181 return LHS == RHS;
182 }
183 };
184
185 } // end namespace llvm
186
187 /// Represents a particular available value that we know how to materialize.
188 /// Materialization of an AvailableValue never fails. An AvailableValue is
189 /// implicitly associated with a rematerialization point which is the
190 /// location of the instruction from which it was formed.
191 struct llvm::gvn::AvailableValue {
192 enum class ValType {
193 SimpleVal, // A simple offsetted value that is accessed.
194 LoadVal, // A value produced by a load.
195 MemIntrin, // A memory intrinsic which is loaded from.
196 UndefVal, // A UndefValue representing a value from dead block (which
197 // is not yet physically removed from the CFG).
198 SelectVal, // A pointer select which is loaded from and for which the load
199 // can be replace by a value select.
200 };
201
202 /// Val - The value that is live out of the block.
203 Value *Val;
204 /// Kind of the live-out value.
205 ValType Kind;
206
207 /// Offset - The byte offset in Val that is interesting for the load query.
208 unsigned Offset = 0;
209 /// V1, V2 - The dominating non-clobbered values of SelectVal.
210 Value *V1 = nullptr, *V2 = nullptr;
211
getllvm::gvn::AvailableValue212 static AvailableValue get(Value *V, unsigned Offset = 0) {
213 AvailableValue Res;
214 Res.Val = V;
215 Res.Kind = ValType::SimpleVal;
216 Res.Offset = Offset;
217 return Res;
218 }
219
getMIllvm::gvn::AvailableValue220 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
221 AvailableValue Res;
222 Res.Val = MI;
223 Res.Kind = ValType::MemIntrin;
224 Res.Offset = Offset;
225 return Res;
226 }
227
getLoadllvm::gvn::AvailableValue228 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
229 AvailableValue Res;
230 Res.Val = Load;
231 Res.Kind = ValType::LoadVal;
232 Res.Offset = Offset;
233 return Res;
234 }
235
getUndefllvm::gvn::AvailableValue236 static AvailableValue getUndef() {
237 AvailableValue Res;
238 Res.Val = nullptr;
239 Res.Kind = ValType::UndefVal;
240 Res.Offset = 0;
241 return Res;
242 }
243
getSelectllvm::gvn::AvailableValue244 static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
245 AvailableValue Res;
246 Res.Val = Sel;
247 Res.Kind = ValType::SelectVal;
248 Res.Offset = 0;
249 Res.V1 = V1;
250 Res.V2 = V2;
251 return Res;
252 }
253
isSimpleValuellvm::gvn::AvailableValue254 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
isCoercedLoadValuellvm::gvn::AvailableValue255 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
isMemIntrinValuellvm::gvn::AvailableValue256 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
isUndefValuellvm::gvn::AvailableValue257 bool isUndefValue() const { return Kind == ValType::UndefVal; }
isSelectValuellvm::gvn::AvailableValue258 bool isSelectValue() const { return Kind == ValType::SelectVal; }
259
getSimpleValuellvm::gvn::AvailableValue260 Value *getSimpleValue() const {
261 assert(isSimpleValue() && "Wrong accessor");
262 return Val;
263 }
264
getCoercedLoadValuellvm::gvn::AvailableValue265 LoadInst *getCoercedLoadValue() const {
266 assert(isCoercedLoadValue() && "Wrong accessor");
267 return cast<LoadInst>(Val);
268 }
269
getMemIntrinValuellvm::gvn::AvailableValue270 MemIntrinsic *getMemIntrinValue() const {
271 assert(isMemIntrinValue() && "Wrong accessor");
272 return cast<MemIntrinsic>(Val);
273 }
274
getSelectValuellvm::gvn::AvailableValue275 SelectInst *getSelectValue() const {
276 assert(isSelectValue() && "Wrong accessor");
277 return cast<SelectInst>(Val);
278 }
279
280 /// Emit code at the specified insertion point to adjust the value defined
281 /// here to the specified type. This handles various coercion cases.
282 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
283 GVNPass &gvn) const;
284 };
285
286 /// Represents an AvailableValue which can be rematerialized at the end of
287 /// the associated BasicBlock.
288 struct llvm::gvn::AvailableValueInBlock {
289 /// BB - The basic block in question.
290 BasicBlock *BB = nullptr;
291
292 /// AV - The actual available value
293 AvailableValue AV;
294
getllvm::gvn::AvailableValueInBlock295 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
296 AvailableValueInBlock Res;
297 Res.BB = BB;
298 Res.AV = std::move(AV);
299 return Res;
300 }
301
getllvm::gvn::AvailableValueInBlock302 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
303 unsigned Offset = 0) {
304 return get(BB, AvailableValue::get(V, Offset));
305 }
306
getUndefllvm::gvn::AvailableValueInBlock307 static AvailableValueInBlock getUndef(BasicBlock *BB) {
308 return get(BB, AvailableValue::getUndef());
309 }
310
getSelectllvm::gvn::AvailableValueInBlock311 static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
312 Value *V1, Value *V2) {
313 return get(BB, AvailableValue::getSelect(Sel, V1, V2));
314 }
315
316 /// Emit code at the end of this block to adjust the value defined here to
317 /// the specified type. This handles various coercion cases.
MaterializeAdjustedValuellvm::gvn::AvailableValueInBlock318 Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
319 return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
320 }
321 };
322
323 //===----------------------------------------------------------------------===//
324 // ValueTable Internal Functions
325 //===----------------------------------------------------------------------===//
326
createExpr(Instruction * I)327 GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
328 Expression e;
329 e.type = I->getType();
330 e.opcode = I->getOpcode();
331 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) {
332 // gc.relocate is 'special' call: its second and third operands are
333 // not real values, but indices into statepoint's argument list.
334 // Use the refered to values for purposes of identity.
335 e.varargs.push_back(lookupOrAdd(GCR->getOperand(0)));
336 e.varargs.push_back(lookupOrAdd(GCR->getBasePtr()));
337 e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr()));
338 } else {
339 for (Use &Op : I->operands())
340 e.varargs.push_back(lookupOrAdd(Op));
341 }
342 if (I->isCommutative()) {
343 // Ensure that commutative instructions that only differ by a permutation
344 // of their operands get the same value number by sorting the operand value
345 // numbers. Since commutative operands are the 1st two operands it is more
346 // efficient to sort by hand rather than using, say, std::sort.
347 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
348 if (e.varargs[0] > e.varargs[1])
349 std::swap(e.varargs[0], e.varargs[1]);
350 e.commutative = true;
351 }
352
353 if (auto *C = dyn_cast<CmpInst>(I)) {
354 // Sort the operand value numbers so x<y and y>x get the same value number.
355 CmpInst::Predicate Predicate = C->getPredicate();
356 if (e.varargs[0] > e.varargs[1]) {
357 std::swap(e.varargs[0], e.varargs[1]);
358 Predicate = CmpInst::getSwappedPredicate(Predicate);
359 }
360 e.opcode = (C->getOpcode() << 8) | Predicate;
361 e.commutative = true;
362 } else if (auto *E = dyn_cast<InsertValueInst>(I)) {
363 e.varargs.append(E->idx_begin(), E->idx_end());
364 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
365 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
366 e.varargs.append(ShuffleMask.begin(), ShuffleMask.end());
367 }
368
369 return e;
370 }
371
createCmpExpr(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)372 GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
373 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
374 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
375 "Not a comparison!");
376 Expression e;
377 e.type = CmpInst::makeCmpResultType(LHS->getType());
378 e.varargs.push_back(lookupOrAdd(LHS));
379 e.varargs.push_back(lookupOrAdd(RHS));
380
381 // Sort the operand value numbers so x<y and y>x get the same value number.
382 if (e.varargs[0] > e.varargs[1]) {
383 std::swap(e.varargs[0], e.varargs[1]);
384 Predicate = CmpInst::getSwappedPredicate(Predicate);
385 }
386 e.opcode = (Opcode << 8) | Predicate;
387 e.commutative = true;
388 return e;
389 }
390
391 GVNPass::Expression
createExtractvalueExpr(ExtractValueInst * EI)392 GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
393 assert(EI && "Not an ExtractValueInst?");
394 Expression e;
395 e.type = EI->getType();
396 e.opcode = 0;
397
398 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
399 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
400 // EI is an extract from one of our with.overflow intrinsics. Synthesize
401 // a semantically equivalent expression instead of an extract value
402 // expression.
403 e.opcode = WO->getBinaryOp();
404 e.varargs.push_back(lookupOrAdd(WO->getLHS()));
405 e.varargs.push_back(lookupOrAdd(WO->getRHS()));
406 return e;
407 }
408
409 // Not a recognised intrinsic. Fall back to producing an extract value
410 // expression.
411 e.opcode = EI->getOpcode();
412 for (Use &Op : EI->operands())
413 e.varargs.push_back(lookupOrAdd(Op));
414
415 append_range(e.varargs, EI->indices());
416
417 return e;
418 }
419
createGEPExpr(GetElementPtrInst * GEP)420 GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
421 Expression E;
422 Type *PtrTy = GEP->getType()->getScalarType();
423 const DataLayout &DL = GEP->getDataLayout();
424 unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
425 MapVector<Value *, APInt> VariableOffsets;
426 APInt ConstantOffset(BitWidth, 0);
427 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
428 // Convert into offset representation, to recognize equivalent address
429 // calculations that use different type encoding.
430 LLVMContext &Context = GEP->getContext();
431 E.opcode = GEP->getOpcode();
432 E.type = nullptr;
433 E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand()));
434 for (const auto &Pair : VariableOffsets) {
435 E.varargs.push_back(lookupOrAdd(Pair.first));
436 E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second)));
437 }
438 if (!ConstantOffset.isZero())
439 E.varargs.push_back(
440 lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
441 } else {
442 // If converting to offset representation fails (for scalable vectors),
443 // fall back to type-based implementation:
444 E.opcode = GEP->getOpcode();
445 E.type = GEP->getSourceElementType();
446 for (Use &Op : GEP->operands())
447 E.varargs.push_back(lookupOrAdd(Op));
448 }
449 return E;
450 }
451
452 //===----------------------------------------------------------------------===//
453 // ValueTable External Functions
454 //===----------------------------------------------------------------------===//
455
456 GVNPass::ValueTable::ValueTable() = default;
457 GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
458 GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
459 GVNPass::ValueTable::~ValueTable() = default;
460 GVNPass::ValueTable &
461 GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
462
463 /// add - Insert a value into the table with a specified value number.
add(Value * V,uint32_t num)464 void GVNPass::ValueTable::add(Value *V, uint32_t num) {
465 valueNumbering.insert(std::make_pair(V, num));
466 if (PHINode *PN = dyn_cast<PHINode>(V))
467 NumberingPhi[num] = PN;
468 }
469
lookupOrAddCall(CallInst * C)470 uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
471 // FIXME: Currently the calls which may access the thread id may
472 // be considered as not accessing the memory. But this is
473 // problematic for coroutines, since coroutines may resume in a
474 // different thread. So we disable the optimization here for the
475 // correctness. However, it may block many other correct
476 // optimizations. Revert this one when we detect the memory
477 // accessing kind more precisely.
478 if (C->getFunction()->isPresplitCoroutine()) {
479 valueNumbering[C] = nextValueNumber;
480 return nextValueNumber++;
481 }
482
483 // Do not combine convergent calls since they implicitly depend on the set of
484 // threads that is currently executing, and they might be in different basic
485 // blocks.
486 if (C->isConvergent()) {
487 valueNumbering[C] = nextValueNumber;
488 return nextValueNumber++;
489 }
490
491 if (AA->doesNotAccessMemory(C)) {
492 Expression exp = createExpr(C);
493 uint32_t e = assignExpNewValueNum(exp).first;
494 valueNumbering[C] = e;
495 return e;
496 }
497
498 if (MD && AA->onlyReadsMemory(C)) {
499 Expression exp = createExpr(C);
500 auto ValNum = assignExpNewValueNum(exp);
501 if (ValNum.second) {
502 valueNumbering[C] = ValNum.first;
503 return ValNum.first;
504 }
505
506 MemDepResult local_dep = MD->getDependency(C);
507
508 if (!local_dep.isDef() && !local_dep.isNonLocal()) {
509 valueNumbering[C] = nextValueNumber;
510 return nextValueNumber++;
511 }
512
513 if (local_dep.isDef()) {
514 // For masked load/store intrinsics, the local_dep may actually be
515 // a normal load or store instruction.
516 CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
517
518 if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
519 valueNumbering[C] = nextValueNumber;
520 return nextValueNumber++;
521 }
522
523 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
524 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
525 uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
526 if (c_vn != cd_vn) {
527 valueNumbering[C] = nextValueNumber;
528 return nextValueNumber++;
529 }
530 }
531
532 uint32_t v = lookupOrAdd(local_cdep);
533 valueNumbering[C] = v;
534 return v;
535 }
536
537 // Non-local case.
538 const MemoryDependenceResults::NonLocalDepInfo &deps =
539 MD->getNonLocalCallDependency(C);
540 // FIXME: Move the checking logic to MemDep!
541 CallInst* cdep = nullptr;
542
543 // Check to see if we have a single dominating call instruction that is
544 // identical to C.
545 for (const NonLocalDepEntry &I : deps) {
546 if (I.getResult().isNonLocal())
547 continue;
548
549 // We don't handle non-definitions. If we already have a call, reject
550 // instruction dependencies.
551 if (!I.getResult().isDef() || cdep != nullptr) {
552 cdep = nullptr;
553 break;
554 }
555
556 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst());
557 // FIXME: All duplicated with non-local case.
558 if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) {
559 cdep = NonLocalDepCall;
560 continue;
561 }
562
563 cdep = nullptr;
564 break;
565 }
566
567 if (!cdep) {
568 valueNumbering[C] = nextValueNumber;
569 return nextValueNumber++;
570 }
571
572 if (cdep->arg_size() != C->arg_size()) {
573 valueNumbering[C] = nextValueNumber;
574 return nextValueNumber++;
575 }
576 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
577 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
578 uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
579 if (c_vn != cd_vn) {
580 valueNumbering[C] = nextValueNumber;
581 return nextValueNumber++;
582 }
583 }
584
585 uint32_t v = lookupOrAdd(cdep);
586 valueNumbering[C] = v;
587 return v;
588 }
589
590 valueNumbering[C] = nextValueNumber;
591 return nextValueNumber++;
592 }
593
594 /// Returns true if a value number exists for the specified value.
exists(Value * V) const595 bool GVNPass::ValueTable::exists(Value *V) const {
596 return valueNumbering.contains(V);
597 }
598
599 /// lookup_or_add - Returns the value number for the specified value, assigning
600 /// it a new number if it did not have one before.
lookupOrAdd(Value * V)601 uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
602 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
603 if (VI != valueNumbering.end())
604 return VI->second;
605
606 auto *I = dyn_cast<Instruction>(V);
607 if (!I) {
608 valueNumbering[V] = nextValueNumber;
609 return nextValueNumber++;
610 }
611
612 Expression exp;
613 switch (I->getOpcode()) {
614 case Instruction::Call:
615 return lookupOrAddCall(cast<CallInst>(I));
616 case Instruction::FNeg:
617 case Instruction::Add:
618 case Instruction::FAdd:
619 case Instruction::Sub:
620 case Instruction::FSub:
621 case Instruction::Mul:
622 case Instruction::FMul:
623 case Instruction::UDiv:
624 case Instruction::SDiv:
625 case Instruction::FDiv:
626 case Instruction::URem:
627 case Instruction::SRem:
628 case Instruction::FRem:
629 case Instruction::Shl:
630 case Instruction::LShr:
631 case Instruction::AShr:
632 case Instruction::And:
633 case Instruction::Or:
634 case Instruction::Xor:
635 case Instruction::ICmp:
636 case Instruction::FCmp:
637 case Instruction::Trunc:
638 case Instruction::ZExt:
639 case Instruction::SExt:
640 case Instruction::FPToUI:
641 case Instruction::FPToSI:
642 case Instruction::UIToFP:
643 case Instruction::SIToFP:
644 case Instruction::FPTrunc:
645 case Instruction::FPExt:
646 case Instruction::PtrToInt:
647 case Instruction::IntToPtr:
648 case Instruction::AddrSpaceCast:
649 case Instruction::BitCast:
650 case Instruction::Select:
651 case Instruction::Freeze:
652 case Instruction::ExtractElement:
653 case Instruction::InsertElement:
654 case Instruction::ShuffleVector:
655 case Instruction::InsertValue:
656 exp = createExpr(I);
657 break;
658 case Instruction::GetElementPtr:
659 exp = createGEPExpr(cast<GetElementPtrInst>(I));
660 break;
661 case Instruction::ExtractValue:
662 exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
663 break;
664 case Instruction::PHI:
665 valueNumbering[V] = nextValueNumber;
666 NumberingPhi[nextValueNumber] = cast<PHINode>(V);
667 return nextValueNumber++;
668 default:
669 valueNumbering[V] = nextValueNumber;
670 return nextValueNumber++;
671 }
672
673 uint32_t e = assignExpNewValueNum(exp).first;
674 valueNumbering[V] = e;
675 return e;
676 }
677
678 /// Returns the value number of the specified value. Fails if
679 /// the value has not yet been numbered.
lookup(Value * V,bool Verify) const680 uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
681 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
682 if (Verify) {
683 assert(VI != valueNumbering.end() && "Value not numbered?");
684 return VI->second;
685 }
686 return (VI != valueNumbering.end()) ? VI->second : 0;
687 }
688
689 /// Returns the value number of the given comparison,
690 /// assigning it a new number if it did not have one before. Useful when
691 /// we deduced the result of a comparison, but don't immediately have an
692 /// instruction realizing that comparison to hand.
lookupOrAddCmp(unsigned Opcode,CmpInst::Predicate Predicate,Value * LHS,Value * RHS)693 uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
694 CmpInst::Predicate Predicate,
695 Value *LHS, Value *RHS) {
696 Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
697 return assignExpNewValueNum(exp).first;
698 }
699
700 /// Remove all entries from the ValueTable.
clear()701 void GVNPass::ValueTable::clear() {
702 valueNumbering.clear();
703 expressionNumbering.clear();
704 NumberingPhi.clear();
705 PhiTranslateTable.clear();
706 nextValueNumber = 1;
707 Expressions.clear();
708 ExprIdx.clear();
709 nextExprNumber = 0;
710 }
711
712 /// Remove a value from the value numbering.
erase(Value * V)713 void GVNPass::ValueTable::erase(Value *V) {
714 uint32_t Num = valueNumbering.lookup(V);
715 valueNumbering.erase(V);
716 // If V is PHINode, V <--> value number is an one-to-one mapping.
717 if (isa<PHINode>(V))
718 NumberingPhi.erase(Num);
719 }
720
721 /// verifyRemoved - Verify that the value is removed from all internal data
722 /// structures.
verifyRemoved(const Value * V) const723 void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
724 assert(!valueNumbering.contains(V) &&
725 "Inst still occurs in value numbering map!");
726 }
727
728 //===----------------------------------------------------------------------===//
729 // LeaderMap External Functions
730 //===----------------------------------------------------------------------===//
731
732 /// Push a new Value to the LeaderTable onto the list for its value number.
insert(uint32_t N,Value * V,const BasicBlock * BB)733 void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
734 LeaderListNode &Curr = NumToLeaders[N];
735 if (!Curr.Entry.Val) {
736 Curr.Entry.Val = V;
737 Curr.Entry.BB = BB;
738 return;
739 }
740
741 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
742 Node->Entry.Val = V;
743 Node->Entry.BB = BB;
744 Node->Next = Curr.Next;
745 Curr.Next = Node;
746 }
747
748 /// Scan the list of values corresponding to a given
749 /// value number, and remove the given instruction if encountered.
erase(uint32_t N,Instruction * I,const BasicBlock * BB)750 void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
751 const BasicBlock *BB) {
752 LeaderListNode *Prev = nullptr;
753 LeaderListNode *Curr = &NumToLeaders[N];
754
755 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
756 Prev = Curr;
757 Curr = Curr->Next;
758 }
759
760 if (!Curr)
761 return;
762
763 if (Prev) {
764 Prev->Next = Curr->Next;
765 } else {
766 if (!Curr->Next) {
767 Curr->Entry.Val = nullptr;
768 Curr->Entry.BB = nullptr;
769 } else {
770 LeaderListNode *Next = Curr->Next;
771 Curr->Entry.Val = Next->Entry.Val;
772 Curr->Entry.BB = Next->Entry.BB;
773 Curr->Next = Next->Next;
774 }
775 }
776 }
777
verifyRemoved(const Value * V) const778 void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
779 // Walk through the value number scope to make sure the instruction isn't
780 // ferreted away in it.
781 for (const auto &I : NumToLeaders) {
782 (void)I;
783 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
784 assert(
785 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
786 [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
787 "Inst still in value numbering scope!");
788 }
789 }
790
791 //===----------------------------------------------------------------------===//
792 // GVN Pass
793 //===----------------------------------------------------------------------===//
794
isPREEnabled() const795 bool GVNPass::isPREEnabled() const {
796 return Options.AllowPRE.value_or(GVNEnablePRE);
797 }
798
isLoadPREEnabled() const799 bool GVNPass::isLoadPREEnabled() const {
800 return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE);
801 }
802
isLoadInLoopPREEnabled() const803 bool GVNPass::isLoadInLoopPREEnabled() const {
804 return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE);
805 }
806
isLoadPRESplitBackedgeEnabled() const807 bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
808 return Options.AllowLoadPRESplitBackedge.value_or(
809 GVNEnableSplitBackedgeInLoadPRE);
810 }
811
isMemDepEnabled() const812 bool GVNPass::isMemDepEnabled() const {
813 return Options.AllowMemDep.value_or(GVNEnableMemDep);
814 }
815
run(Function & F,FunctionAnalysisManager & AM)816 PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
817 // FIXME: The order of evaluation of these 'getResult' calls is very
818 // significant! Re-ordering these variables will cause GVN when run alone to
819 // be less effective! We should fix memdep and basic-aa to not exhibit this
820 // behavior, but until then don't change the order here.
821 auto &AC = AM.getResult<AssumptionAnalysis>(F);
822 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
823 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
824 auto &AA = AM.getResult<AAManager>(F);
825 auto *MemDep =
826 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr;
827 auto &LI = AM.getResult<LoopAnalysis>(F);
828 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F);
829 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
830 bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE,
831 MSSA ? &MSSA->getMSSA() : nullptr);
832 if (!Changed)
833 return PreservedAnalyses::all();
834 PreservedAnalyses PA;
835 PA.preserve<DominatorTreeAnalysis>();
836 PA.preserve<TargetLibraryAnalysis>();
837 if (MSSA)
838 PA.preserve<MemorySSAAnalysis>();
839 PA.preserve<LoopAnalysis>();
840 return PA;
841 }
842
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)843 void GVNPass::printPipeline(
844 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
845 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
846 OS, MapClassName2PassName);
847
848 OS << '<';
849 if (Options.AllowPRE != std::nullopt)
850 OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
851 if (Options.AllowLoadPRE != std::nullopt)
852 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
853 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
854 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
855 << "split-backedge-load-pre;";
856 if (Options.AllowMemDep != std::nullopt)
857 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep";
858 OS << '>';
859 }
860
861 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump(DenseMap<uint32_t,Value * > & d) const862 LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
863 errs() << "{\n";
864 for (auto &I : d) {
865 errs() << I.first << "\n";
866 I.second->dump();
867 }
868 errs() << "}\n";
869 }
870 #endif
871
872 enum class AvailabilityState : char {
873 /// We know the block *is not* fully available. This is a fixpoint.
874 Unavailable = 0,
875 /// We know the block *is* fully available. This is a fixpoint.
876 Available = 1,
877 /// We do not know whether the block is fully available or not,
878 /// but we are currently speculating that it will be.
879 /// If it would have turned out that the block was, in fact, not fully
880 /// available, this would have been cleaned up into an Unavailable.
881 SpeculativelyAvailable = 2,
882 };
883
884 /// Return true if we can prove that the value
885 /// we're analyzing is fully available in the specified block. As we go, keep
886 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
887 /// map is actually a tri-state map with the following values:
888 /// 0) we know the block *is not* fully available.
889 /// 1) we know the block *is* fully available.
890 /// 2) we do not know whether the block is fully available or not, but we are
891 /// currently speculating that it will be.
IsValueFullyAvailableInBlock(BasicBlock * BB,DenseMap<BasicBlock *,AvailabilityState> & FullyAvailableBlocks)892 static bool IsValueFullyAvailableInBlock(
893 BasicBlock *BB,
894 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
895 SmallVector<BasicBlock *, 32> Worklist;
896 std::optional<BasicBlock *> UnavailableBB;
897
898 // The number of times we didn't find an entry for a block in a map and
899 // optimistically inserted an entry marking block as speculatively available.
900 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
901
902 #ifndef NDEBUG
903 SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
904 SmallVector<BasicBlock *, 32> AvailableBBs;
905 #endif
906
907 Worklist.emplace_back(BB);
908 while (!Worklist.empty()) {
909 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
910 // Optimistically assume that the block is Speculatively Available and check
911 // to see if we already know about this block in one lookup.
912 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
913 FullyAvailableBlocks.try_emplace(
914 CurrBB, AvailabilityState::SpeculativelyAvailable);
915 AvailabilityState &State = IV.first->second;
916
917 // Did the entry already exist for this block?
918 if (!IV.second) {
919 if (State == AvailabilityState::Unavailable) {
920 UnavailableBB = CurrBB;
921 break; // Backpropagate unavailability info.
922 }
923
924 #ifndef NDEBUG
925 AvailableBBs.emplace_back(CurrBB);
926 #endif
927 continue; // Don't recurse further, but continue processing worklist.
928 }
929
930 // No entry found for block.
931 ++NumNewNewSpeculativelyAvailableBBs;
932 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
933
934 // If we have exhausted our budget, mark this block as unavailable.
935 // Also, if this block has no predecessors, the value isn't live-in here.
936 if (OutOfBudget || pred_empty(CurrBB)) {
937 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
938 State = AvailabilityState::Unavailable;
939 UnavailableBB = CurrBB;
940 break; // Backpropagate unavailability info.
941 }
942
943 // Tentatively consider this block as speculatively available.
944 #ifndef NDEBUG
945 NewSpeculativelyAvailableBBs.insert(CurrBB);
946 #endif
947 // And further recurse into block's predecessors, in depth-first order!
948 Worklist.append(pred_begin(CurrBB), pred_end(CurrBB));
949 }
950
951 #if LLVM_ENABLE_STATS
952 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
953 NumNewNewSpeculativelyAvailableBBs);
954 #endif
955
956 // If the block isn't marked as fixpoint yet
957 // (the Unavailable and Available states are fixpoints)
958 auto MarkAsFixpointAndEnqueueSuccessors =
959 [&](BasicBlock *BB, AvailabilityState FixpointState) {
960 auto It = FullyAvailableBlocks.find(BB);
961 if (It == FullyAvailableBlocks.end())
962 return; // Never queried this block, leave as-is.
963 switch (AvailabilityState &State = It->second) {
964 case AvailabilityState::Unavailable:
965 case AvailabilityState::Available:
966 return; // Don't backpropagate further, continue processing worklist.
967 case AvailabilityState::SpeculativelyAvailable: // Fix it!
968 State = FixpointState;
969 #ifndef NDEBUG
970 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
971 "Found a speculatively available successor leftover?");
972 #endif
973 // Queue successors for further processing.
974 Worklist.append(succ_begin(BB), succ_end(BB));
975 return;
976 }
977 };
978
979 if (UnavailableBB) {
980 // Okay, we have encountered an unavailable block.
981 // Mark speculatively available blocks reachable from UnavailableBB as
982 // unavailable as well. Paths are terminated when they reach blocks not in
983 // FullyAvailableBlocks or they are not marked as speculatively available.
984 Worklist.clear();
985 Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB));
986 while (!Worklist.empty())
987 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
988 AvailabilityState::Unavailable);
989 }
990
991 #ifndef NDEBUG
992 Worklist.clear();
993 for (BasicBlock *AvailableBB : AvailableBBs)
994 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
995 while (!Worklist.empty())
996 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
997 AvailabilityState::Available);
998
999 assert(NewSpeculativelyAvailableBBs.empty() &&
1000 "Must have fixed all the new speculatively available blocks.");
1001 #endif
1002
1003 return !UnavailableBB;
1004 }
1005
1006 /// If the specified OldValue exists in ValuesPerBlock, replace its value with
1007 /// NewValue.
replaceValuesPerBlockEntry(SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,Value * OldValue,Value * NewValue)1008 static void replaceValuesPerBlockEntry(
1009 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1010 Value *NewValue) {
1011 for (AvailableValueInBlock &V : ValuesPerBlock) {
1012 if (V.AV.Val == OldValue)
1013 V.AV.Val = NewValue;
1014 if (V.AV.isSelectValue()) {
1015 if (V.AV.V1 == OldValue)
1016 V.AV.V1 = NewValue;
1017 if (V.AV.V2 == OldValue)
1018 V.AV.V2 = NewValue;
1019 }
1020 }
1021 }
1022
1023 /// Given a set of loads specified by ValuesPerBlock,
1024 /// construct SSA form, allowing us to eliminate Load. This returns the value
1025 /// that should be used at Load's definition site.
1026 static Value *
ConstructSSAForLoadSet(LoadInst * Load,SmallVectorImpl<AvailableValueInBlock> & ValuesPerBlock,GVNPass & gvn)1027 ConstructSSAForLoadSet(LoadInst *Load,
1028 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1029 GVNPass &gvn) {
1030 // Check for the fully redundant, dominating load case. In this case, we can
1031 // just use the dominating value directly.
1032 if (ValuesPerBlock.size() == 1 &&
1033 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
1034 Load->getParent())) {
1035 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1036 "Dead BB dominate this block");
1037 return ValuesPerBlock[0].MaterializeAdjustedValue(Load, gvn);
1038 }
1039
1040 // Otherwise, we have to construct SSA form.
1041 SmallVector<PHINode*, 8> NewPHIs;
1042 SSAUpdater SSAUpdate(&NewPHIs);
1043 SSAUpdate.Initialize(Load->getType(), Load->getName());
1044
1045 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1046 BasicBlock *BB = AV.BB;
1047
1048 if (AV.AV.isUndefValue())
1049 continue;
1050
1051 if (SSAUpdate.HasValueForBlock(BB))
1052 continue;
1053
1054 // If the value is the load that we will be eliminating, and the block it's
1055 // available in is the block that the load is in, then don't add it as
1056 // SSAUpdater will resolve the value to the relevant phi which may let it
1057 // avoid phi construction entirely if there's actually only one value.
1058 if (BB == Load->getParent() &&
1059 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1060 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1061 continue;
1062
1063 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load, gvn));
1064 }
1065
1066 // Perform PHI construction.
1067 return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
1068 }
1069
MaterializeAdjustedValue(LoadInst * Load,Instruction * InsertPt,GVNPass & gvn) const1070 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1071 Instruction *InsertPt,
1072 GVNPass &gvn) const {
1073 Value *Res;
1074 Type *LoadTy = Load->getType();
1075 const DataLayout &DL = Load->getDataLayout();
1076 if (isSimpleValue()) {
1077 Res = getSimpleValue();
1078 if (Res->getType() != LoadTy) {
1079 Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
1080
1081 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1082 << " " << *getSimpleValue() << '\n'
1083 << *Res << '\n'
1084 << "\n\n\n");
1085 }
1086 } else if (isCoercedLoadValue()) {
1087 LoadInst *CoercedLoad = getCoercedLoadValue();
1088 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1089 Res = CoercedLoad;
1090 combineMetadataForCSE(CoercedLoad, Load, false);
1091 } else {
1092 Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
1093 // We are adding a new user for this load, for which the original
1094 // metadata may not hold. Additionally, the new load may have a different
1095 // size and type, so their metadata cannot be combined in any
1096 // straightforward way.
1097 // Drop all metadata that is not known to cause immediate UB on violation,
1098 // unless the load has !noundef, in which case all metadata violations
1099 // will be promoted to UB.
1100 // TODO: We can combine noalias/alias.scope metadata here, because it is
1101 // independent of the load type.
1102 if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
1103 CoercedLoad->dropUnknownNonDebugMetadata(
1104 {LLVMContext::MD_dereferenceable,
1105 LLVMContext::MD_dereferenceable_or_null,
1106 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1107 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1108 << " " << *getCoercedLoadValue() << '\n'
1109 << *Res << '\n'
1110 << "\n\n\n");
1111 }
1112 } else if (isMemIntrinValue()) {
1113 Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
1114 InsertPt, DL);
1115 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1116 << " " << *getMemIntrinValue() << '\n'
1117 << *Res << '\n'
1118 << "\n\n\n");
1119 } else if (isSelectValue()) {
1120 // Introduce a new value select for a load from an eligible pointer select.
1121 SelectInst *Sel = getSelectValue();
1122 assert(V1 && V2 && "both value operands of the select must be present");
1123 Res =
1124 SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
1125 } else {
1126 llvm_unreachable("Should not materialize value from dead block");
1127 }
1128 assert(Res && "failed to materialize?");
1129 return Res;
1130 }
1131
isLifetimeStart(const Instruction * Inst)1132 static bool isLifetimeStart(const Instruction *Inst) {
1133 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1134 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1135 return false;
1136 }
1137
1138 /// Assuming To can be reached from both From and Between, does Between lie on
1139 /// every path from From to To?
liesBetween(const Instruction * From,Instruction * Between,const Instruction * To,DominatorTree * DT)1140 static bool liesBetween(const Instruction *From, Instruction *Between,
1141 const Instruction *To, DominatorTree *DT) {
1142 if (From->getParent() == Between->getParent())
1143 return DT->dominates(From, Between);
1144 SmallSet<BasicBlock *, 1> Exclusion;
1145 Exclusion.insert(Between->getParent());
1146 return !isPotentiallyReachable(From, To, &Exclusion, DT);
1147 }
1148
1149 /// Try to locate the three instruction involved in a missed
1150 /// load-elimination case that is due to an intervening store.
reportMayClobberedLoad(LoadInst * Load,MemDepResult DepInfo,DominatorTree * DT,OptimizationRemarkEmitter * ORE)1151 static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1152 DominatorTree *DT,
1153 OptimizationRemarkEmitter *ORE) {
1154 using namespace ore;
1155
1156 Instruction *OtherAccess = nullptr;
1157
1158 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1159 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1160 << setExtraArgs();
1161
1162 for (auto *U : Load->getPointerOperand()->users()) {
1163 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1164 auto *I = cast<Instruction>(U);
1165 if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) {
1166 // Use the most immediately dominating value
1167 if (OtherAccess) {
1168 if (DT->dominates(OtherAccess, I))
1169 OtherAccess = I;
1170 else
1171 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1172 } else
1173 OtherAccess = I;
1174 }
1175 }
1176 }
1177
1178 if (!OtherAccess) {
1179 // There is no dominating use, check if we can find a closest non-dominating
1180 // use that lies between any other potentially available use and Load.
1181 for (auto *U : Load->getPointerOperand()->users()) {
1182 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) {
1183 auto *I = cast<Instruction>(U);
1184 if (I->getFunction() == Load->getFunction() &&
1185 isPotentiallyReachable(I, Load, nullptr, DT)) {
1186 if (OtherAccess) {
1187 if (liesBetween(OtherAccess, I, Load, DT)) {
1188 OtherAccess = I;
1189 } else if (!liesBetween(I, OtherAccess, Load, DT)) {
1190 // These uses are both partially available at Load were it not for
1191 // the clobber, but neither lies strictly after the other.
1192 OtherAccess = nullptr;
1193 break;
1194 } // else: keep current OtherAccess since it lies between U and Load
1195 } else {
1196 OtherAccess = I;
1197 }
1198 }
1199 }
1200 }
1201 }
1202
1203 if (OtherAccess)
1204 R << " in favor of " << NV("OtherAccess", OtherAccess);
1205
1206 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1207
1208 ORE->emit(R);
1209 }
1210
1211 // Find non-clobbered value for Loc memory location in extended basic block
1212 // (chain of basic blocks with single predecessors) starting From instruction.
findDominatingValue(const MemoryLocation & Loc,Type * LoadTy,Instruction * From,AAResults * AA)1213 static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1214 Instruction *From, AAResults *AA) {
1215 uint32_t NumVisitedInsts = 0;
1216 BasicBlock *FromBB = From->getParent();
1217 BatchAAResults BatchAA(*AA);
1218 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1219 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1220 Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
1221 // Stop the search if limit is reached.
1222 if (++NumVisitedInsts > MaxNumVisitedInsts)
1223 return nullptr;
1224 if (isModSet(BatchAA.getModRefInfo(Inst, Loc)))
1225 return nullptr;
1226 if (auto *LI = dyn_cast<LoadInst>(Inst))
1227 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1228 return LI;
1229 }
1230 return nullptr;
1231 }
1232
1233 std::optional<AvailableValue>
AnalyzeLoadAvailability(LoadInst * Load,MemDepResult DepInfo,Value * Address)1234 GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1235 Value *Address) {
1236 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1237 assert(DepInfo.isLocal() && "expected a local dependence");
1238
1239 Instruction *DepInst = DepInfo.getInst();
1240
1241 const DataLayout &DL = Load->getDataLayout();
1242 if (DepInfo.isClobber()) {
1243 // If the dependence is to a store that writes to a superset of the bits
1244 // read by the load, we can extract the bits we need for the load from the
1245 // stored value.
1246 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1247 // Can't forward from non-atomic to atomic without violating memory model.
1248 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1249 int Offset =
1250 analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL);
1251 if (Offset != -1)
1252 return AvailableValue::get(DepSI->getValueOperand(), Offset);
1253 }
1254 }
1255
1256 // Check to see if we have something like this:
1257 // load i32* P
1258 // load i8* (P+1)
1259 // if we have this, replace the later with an extraction from the former.
1260 if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
1261 // If this is a clobber and L is the first instruction in its block, then
1262 // we have the first instruction in the entry block.
1263 // Can't forward from non-atomic to atomic without violating memory model.
1264 if (DepLoad != Load && Address &&
1265 Load->isAtomic() <= DepLoad->isAtomic()) {
1266 Type *LoadType = Load->getType();
1267 int Offset = -1;
1268
1269 // If MD reported clobber, check it was nested.
1270 if (DepInfo.isClobber() &&
1271 canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
1272 const auto ClobberOff = MD->getClobberOffset(DepLoad);
1273 // GVN has no deal with a negative offset.
1274 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1275 ? -1
1276 : *ClobberOff;
1277 }
1278 if (Offset == -1)
1279 Offset =
1280 analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
1281 if (Offset != -1)
1282 return AvailableValue::getLoad(DepLoad, Offset);
1283 }
1284 }
1285
1286 // If the clobbering value is a memset/memcpy/memmove, see if we can
1287 // forward a value on from it.
1288 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
1289 if (Address && !Load->isAtomic()) {
1290 int Offset = analyzeLoadFromClobberingMemInst(Load->getType(), Address,
1291 DepMI, DL);
1292 if (Offset != -1)
1293 return AvailableValue::getMI(DepMI, Offset);
1294 }
1295 }
1296
1297 // Nothing known about this clobber, have to be conservative
1298 LLVM_DEBUG(
1299 // fast print dep, using operator<< on instruction is too slow.
1300 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1301 dbgs() << " is clobbered by " << *DepInst << '\n';);
1302 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1303 reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1304
1305 return std::nullopt;
1306 }
1307 assert(DepInfo.isDef() && "follows from above");
1308
1309 // Loading the alloca -> undef.
1310 // Loading immediately after lifetime begin -> undef.
1311 if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst))
1312 return AvailableValue::get(UndefValue::get(Load->getType()));
1313
1314 if (Constant *InitVal =
1315 getInitialValueOfAllocation(DepInst, TLI, Load->getType()))
1316 return AvailableValue::get(InitVal);
1317
1318 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1319 // Reject loads and stores that are to the same address but are of
1320 // different types if we have to. If the stored value is convertable to
1321 // the loaded value, we can reuse it.
1322 if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(),
1323 DL))
1324 return std::nullopt;
1325
1326 // Can't forward from non-atomic to atomic without violating memory model.
1327 if (S->isAtomic() < Load->isAtomic())
1328 return std::nullopt;
1329
1330 return AvailableValue::get(S->getValueOperand());
1331 }
1332
1333 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1334 // If the types mismatch and we can't handle it, reject reuse of the load.
1335 // If the stored value is larger or equal to the loaded value, we can reuse
1336 // it.
1337 if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL))
1338 return std::nullopt;
1339
1340 // Can't forward from non-atomic to atomic without violating memory model.
1341 if (LD->isAtomic() < Load->isAtomic())
1342 return std::nullopt;
1343
1344 return AvailableValue::getLoad(LD);
1345 }
1346
1347 // Check if load with Addr dependent from select can be converted to select
1348 // between load values. There must be no instructions between the found
1349 // loads and DepInst that may clobber the loads.
1350 if (auto *Sel = dyn_cast<SelectInst>(DepInst)) {
1351 assert(Sel->getType() == Load->getPointerOperandType());
1352 auto Loc = MemoryLocation::get(Load);
1353 Value *V1 =
1354 findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()),
1355 Load->getType(), DepInst, getAliasAnalysis());
1356 if (!V1)
1357 return std::nullopt;
1358 Value *V2 =
1359 findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()),
1360 Load->getType(), DepInst, getAliasAnalysis());
1361 if (!V2)
1362 return std::nullopt;
1363 return AvailableValue::getSelect(Sel, V1, V2);
1364 }
1365
1366 // Unknown def - must be conservative
1367 LLVM_DEBUG(
1368 // fast print dep, using operator<< on instruction is too slow.
1369 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1370 dbgs() << " has unknown def " << *DepInst << '\n';);
1371 return std::nullopt;
1372 }
1373
AnalyzeLoadAvailability(LoadInst * Load,LoadDepVect & Deps,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1374 void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1375 AvailValInBlkVect &ValuesPerBlock,
1376 UnavailBlkVect &UnavailableBlocks) {
1377 // Filter out useless results (non-locals, etc). Keep track of the blocks
1378 // where we have a value available in repl, also keep track of whether we see
1379 // dependencies that produce an unknown value for the load (such as a call
1380 // that could potentially clobber the load).
1381 for (const auto &Dep : Deps) {
1382 BasicBlock *DepBB = Dep.getBB();
1383 MemDepResult DepInfo = Dep.getResult();
1384
1385 if (DeadBlocks.count(DepBB)) {
1386 // Dead dependent mem-op disguise as a load evaluating the same value
1387 // as the load in question.
1388 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
1389 continue;
1390 }
1391
1392 if (!DepInfo.isLocal()) {
1393 UnavailableBlocks.push_back(DepBB);
1394 continue;
1395 }
1396
1397 // The address being loaded in this non-local block may not be the same as
1398 // the pointer operand of the load if PHI translation occurs. Make sure
1399 // to consider the right address.
1400 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) {
1401 // subtlety: because we know this was a non-local dependency, we know
1402 // it's safe to materialize anywhere between the instruction within
1403 // DepInfo and the end of it's block.
1404 ValuesPerBlock.push_back(
1405 AvailableValueInBlock::get(DepBB, std::move(*AV)));
1406 } else {
1407 UnavailableBlocks.push_back(DepBB);
1408 }
1409 }
1410
1411 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1412 "post condition violation");
1413 }
1414
1415 /// Given the following code, v1 is partially available on some edges, but not
1416 /// available on the edge from PredBB. This function tries to find if there is
1417 /// another identical load in the other successor of PredBB.
1418 ///
1419 /// v0 = load %addr
1420 /// br %LoadBB
1421 ///
1422 /// LoadBB:
1423 /// v1 = load %addr
1424 /// ...
1425 ///
1426 /// PredBB:
1427 /// ...
1428 /// br %cond, label %LoadBB, label %SuccBB
1429 ///
1430 /// SuccBB:
1431 /// v2 = load %addr
1432 /// ...
1433 ///
findLoadToHoistIntoPred(BasicBlock * Pred,BasicBlock * LoadBB,LoadInst * Load)1434 LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1435 LoadInst *Load) {
1436 // For simplicity we handle a Pred has 2 successors only.
1437 auto *Term = Pred->getTerminator();
1438 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1439 return nullptr;
1440 auto *SuccBB = Term->getSuccessor(0);
1441 if (SuccBB == LoadBB)
1442 SuccBB = Term->getSuccessor(1);
1443 if (!SuccBB->getSinglePredecessor())
1444 return nullptr;
1445
1446 unsigned int NumInsts = MaxNumInsnsPerBlock;
1447 for (Instruction &Inst : *SuccBB) {
1448 if (Inst.isDebugOrPseudoInst())
1449 continue;
1450 if (--NumInsts == 0)
1451 return nullptr;
1452
1453 if (!Inst.isIdenticalTo(Load))
1454 continue;
1455
1456 MemDepResult Dep = MD->getDependency(&Inst);
1457 // If an identical load doesn't depends on any local instructions, it can
1458 // be safely moved to PredBB.
1459 // Also check for the implicit control flow instructions. See the comments
1460 // in PerformLoadPRE for details.
1461 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
1462 return cast<LoadInst>(&Inst);
1463
1464 // Otherwise there is something in the same BB clobbers the memory, we can't
1465 // move this and later load to PredBB.
1466 return nullptr;
1467 }
1468
1469 return nullptr;
1470 }
1471
eliminatePartiallyRedundantLoad(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,MapVector<BasicBlock *,Value * > & AvailableLoads,MapVector<BasicBlock *,LoadInst * > * CriticalEdgePredAndLoad)1472 void GVNPass::eliminatePartiallyRedundantLoad(
1473 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1474 MapVector<BasicBlock *, Value *> &AvailableLoads,
1475 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1476 for (const auto &AvailableLoad : AvailableLoads) {
1477 BasicBlock *UnavailableBlock = AvailableLoad.first;
1478 Value *LoadPtr = AvailableLoad.second;
1479
1480 auto *NewLoad = new LoadInst(
1481 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1482 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1483 UnavailableBlock->getTerminator()->getIterator());
1484 NewLoad->setDebugLoc(Load->getDebugLoc());
1485 if (MSSAU) {
1486 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1487 NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator);
1488 if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
1489 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
1490 else
1491 MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true);
1492 }
1493
1494 // Transfer the old load's AA tags to the new load.
1495 AAMDNodes Tags = Load->getAAMetadata();
1496 if (Tags)
1497 NewLoad->setAAMetadata(Tags);
1498
1499 if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load))
1500 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
1501 if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group))
1502 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
1503 if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range))
1504 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD);
1505 if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group))
1506 if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock))
1507 NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD);
1508
1509 // We do not propagate the old load's debug location, because the new
1510 // load now lives in a different BB, and we want to avoid a jumpy line
1511 // table.
1512 // FIXME: How do we retain source locations without causing poor debugging
1513 // behavior?
1514
1515 // Add the newly created load.
1516 ValuesPerBlock.push_back(
1517 AvailableValueInBlock::get(UnavailableBlock, NewLoad));
1518 MD->invalidateCachedPointerInfo(LoadPtr);
1519 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1520
1521 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1522 // load instruction with the new created load instruction.
1523 if (CriticalEdgePredAndLoad) {
1524 auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
1525 if (I != CriticalEdgePredAndLoad->end()) {
1526 ++NumPRELoadMoved2CEPred;
1527 ICF->insertInstructionTo(NewLoad, UnavailableBlock);
1528 LoadInst *OldLoad = I->second;
1529 combineMetadataForCSE(NewLoad, OldLoad, false);
1530 OldLoad->replaceAllUsesWith(NewLoad);
1531 replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
1532 if (uint32_t ValNo = VN.lookup(OldLoad, false))
1533 LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
1534 VN.erase(OldLoad);
1535 removeInstruction(OldLoad);
1536 }
1537 }
1538 }
1539
1540 // Perform PHI construction.
1541 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1542 // ConstructSSAForLoadSet is responsible for combining metadata.
1543 ICF->removeUsersOf(Load);
1544 Load->replaceAllUsesWith(V);
1545 if (isa<PHINode>(V))
1546 V->takeName(Load);
1547 if (Instruction *I = dyn_cast<Instruction>(V))
1548 I->setDebugLoc(Load->getDebugLoc());
1549 if (V->getType()->isPtrOrPtrVectorTy())
1550 MD->invalidateCachedPointerInfo(V);
1551 markInstructionForDeletion(Load);
1552 ORE->emit([&]() {
1553 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1554 << "load eliminated by PRE";
1555 });
1556 }
1557
PerformLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1558 bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1559 UnavailBlkVect &UnavailableBlocks) {
1560 // Okay, we have *some* definitions of the value. This means that the value
1561 // is available in some of our (transitive) predecessors. Lets think about
1562 // doing PRE of this load. This will involve inserting a new load into the
1563 // predecessor when it's not available. We could do this in general, but
1564 // prefer to not increase code size. As such, we only do this when we know
1565 // that we only have to insert *one* load (which means we're basically moving
1566 // the load, not inserting a new one).
1567
1568 SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
1569 UnavailableBlocks.end());
1570
1571 // Let's find the first basic block with more than one predecessor. Walk
1572 // backwards through predecessors if needed.
1573 BasicBlock *LoadBB = Load->getParent();
1574 BasicBlock *TmpBB = LoadBB;
1575
1576 // Check that there is no implicit control flow instructions above our load in
1577 // its block. If there is an instruction that doesn't always pass the
1578 // execution to the following instruction, then moving through it may become
1579 // invalid. For example:
1580 //
1581 // int arr[LEN];
1582 // int index = ???;
1583 // ...
1584 // guard(0 <= index && index < LEN);
1585 // use(arr[index]);
1586 //
1587 // It is illegal to move the array access to any point above the guard,
1588 // because if the index is out of bounds we should deoptimize rather than
1589 // access the array.
1590 // Check that there is no guard in this block above our instruction.
1591 bool MustEnsureSafetyOfSpeculativeExecution =
1592 ICF->isDominatedByICFIFromSameBlock(Load);
1593
1594 while (TmpBB->getSinglePredecessor()) {
1595 TmpBB = TmpBB->getSinglePredecessor();
1596 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1597 return false;
1598 if (Blockers.count(TmpBB))
1599 return false;
1600
1601 // If any of these blocks has more than one successor (i.e. if the edge we
1602 // just traversed was critical), then there are other paths through this
1603 // block along which the load may not be anticipated. Hoisting the load
1604 // above this block would be adding the load to execution paths along
1605 // which it was not previously executed.
1606 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1607 return false;
1608
1609 // Check that there is no implicit control flow in a block above.
1610 MustEnsureSafetyOfSpeculativeExecution =
1611 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB);
1612 }
1613
1614 assert(TmpBB);
1615 LoadBB = TmpBB;
1616
1617 // Check to see how many predecessors have the loaded value fully
1618 // available.
1619 MapVector<BasicBlock *, Value *> PredLoads;
1620 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1621 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1622 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1623 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1624 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1625
1626 // The edge from Pred to LoadBB is a critical edge will be splitted.
1627 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1628 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1629 // contains a load can be moved to Pred. This data structure maps the Pred to
1630 // the movable load.
1631 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1632 for (BasicBlock *Pred : predecessors(LoadBB)) {
1633 // If any predecessor block is an EH pad that does not allow non-PHI
1634 // instructions before the terminator, we can't PRE the load.
1635 if (Pred->getTerminator()->isEHPad()) {
1636 LLVM_DEBUG(
1637 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1638 << Pred->getName() << "': " << *Load << '\n');
1639 return false;
1640 }
1641
1642 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1643 continue;
1644 }
1645
1646 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1647 if (isa<IndirectBrInst>(Pred->getTerminator())) {
1648 LLVM_DEBUG(
1649 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1650 << Pred->getName() << "': " << *Load << '\n');
1651 return false;
1652 }
1653
1654 if (LoadBB->isEHPad()) {
1655 LLVM_DEBUG(
1656 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1657 << Pred->getName() << "': " << *Load << '\n');
1658 return false;
1659 }
1660
1661 // Do not split backedge as it will break the canonical loop form.
1662 if (!isLoadPRESplitBackedgeEnabled())
1663 if (DT->dominates(LoadBB, Pred)) {
1664 LLVM_DEBUG(
1665 dbgs()
1666 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1667 << Pred->getName() << "': " << *Load << '\n');
1668 return false;
1669 }
1670
1671 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1672 CriticalEdgePredAndLoad[Pred] = LI;
1673 else
1674 CriticalEdgePredSplit.push_back(Pred);
1675 } else {
1676 // Only add the predecessors that will not be split for now.
1677 PredLoads[Pred] = nullptr;
1678 }
1679 }
1680
1681 // Decide whether PRE is profitable for this load.
1682 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1683 unsigned NumUnavailablePreds = NumInsertPreds +
1684 CriticalEdgePredAndLoad.size();
1685 assert(NumUnavailablePreds != 0 &&
1686 "Fully available value should already be eliminated!");
1687 (void)NumUnavailablePreds;
1688
1689 // If we need to insert new load in multiple predecessors, reject it.
1690 // FIXME: If we could restructure the CFG, we could make a common pred with
1691 // all the preds that don't have an available Load and insert a new load into
1692 // that one block.
1693 if (NumInsertPreds > 1)
1694 return false;
1695
1696 // Now we know where we will insert load. We must ensure that it is safe
1697 // to speculatively execute the load at that points.
1698 if (MustEnsureSafetyOfSpeculativeExecution) {
1699 if (CriticalEdgePredSplit.size())
1700 if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
1701 return false;
1702 for (auto &PL : PredLoads)
1703 if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
1704 DT))
1705 return false;
1706 for (auto &CEP : CriticalEdgePredAndLoad)
1707 if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
1708 DT))
1709 return false;
1710 }
1711
1712 // Split critical edges, and update the unavailable predecessors accordingly.
1713 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1714 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
1715 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1716 PredLoads[NewPred] = nullptr;
1717 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1718 << LoadBB->getName() << '\n');
1719 }
1720
1721 for (auto &CEP : CriticalEdgePredAndLoad)
1722 PredLoads[CEP.first] = nullptr;
1723
1724 // Check if the load can safely be moved to all the unavailable predecessors.
1725 bool CanDoPRE = true;
1726 const DataLayout &DL = Load->getDataLayout();
1727 SmallVector<Instruction*, 8> NewInsts;
1728 for (auto &PredLoad : PredLoads) {
1729 BasicBlock *UnavailablePred = PredLoad.first;
1730
1731 // Do PHI translation to get its value in the predecessor if necessary. The
1732 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1733 // We do the translation for each edge we skipped by going from Load's block
1734 // to LoadBB, otherwise we might miss pieces needing translation.
1735
1736 // If all preds have a single successor, then we know it is safe to insert
1737 // the load on the pred (?!?), so we can insert code to materialize the
1738 // pointer if it is not available.
1739 Value *LoadPtr = Load->getPointerOperand();
1740 BasicBlock *Cur = Load->getParent();
1741 while (Cur != LoadBB) {
1742 PHITransAddr Address(LoadPtr, DL, AC);
1743 LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
1744 *DT, NewInsts);
1745 if (!LoadPtr) {
1746 CanDoPRE = false;
1747 break;
1748 }
1749 Cur = Cur->getSinglePredecessor();
1750 }
1751
1752 if (LoadPtr) {
1753 PHITransAddr Address(LoadPtr, DL, AC);
1754 LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
1755 NewInsts);
1756 }
1757 // If we couldn't find or insert a computation of this phi translated value,
1758 // we fail PRE.
1759 if (!LoadPtr) {
1760 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1761 << *Load->getPointerOperand() << "\n");
1762 CanDoPRE = false;
1763 break;
1764 }
1765
1766 PredLoad.second = LoadPtr;
1767 }
1768
1769 if (!CanDoPRE) {
1770 while (!NewInsts.empty()) {
1771 // Erase instructions generated by the failed PHI translation before
1772 // trying to number them. PHI translation might insert instructions
1773 // in basic blocks other than the current one, and we delete them
1774 // directly, as markInstructionForDeletion only allows removing from the
1775 // current basic block.
1776 NewInsts.pop_back_val()->eraseFromParent();
1777 }
1778 // HINT: Don't revert the edge-splitting as following transformation may
1779 // also need to split these critical edges.
1780 return !CriticalEdgePredSplit.empty();
1781 }
1782
1783 // Okay, we can eliminate this load by inserting a reload in the predecessor
1784 // and using PHI construction to get the value in the other predecessors, do
1785 // it.
1786 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1787 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1788 << " INSTS: " << *NewInsts.back()
1789 << '\n');
1790
1791 // Assign value numbers to the new instructions.
1792 for (Instruction *I : NewInsts) {
1793 // Instructions that have been inserted in predecessor(s) to materialize
1794 // the load address do not retain their original debug locations. Doing
1795 // so could lead to confusing (but correct) source attributions.
1796 I->updateLocationAfterHoist();
1797
1798 // FIXME: We really _ought_ to insert these value numbers into their
1799 // parent's availability map. However, in doing so, we risk getting into
1800 // ordering issues. If a block hasn't been processed yet, we would be
1801 // marking a value as AVAIL-IN, which isn't what we intend.
1802 VN.lookupOrAdd(I);
1803 }
1804
1805 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
1806 &CriticalEdgePredAndLoad);
1807 ++NumPRELoad;
1808 return true;
1809 }
1810
performLoopLoadPRE(LoadInst * Load,AvailValInBlkVect & ValuesPerBlock,UnavailBlkVect & UnavailableBlocks)1811 bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1812 AvailValInBlkVect &ValuesPerBlock,
1813 UnavailBlkVect &UnavailableBlocks) {
1814 const Loop *L = LI->getLoopFor(Load->getParent());
1815 // TODO: Generalize to other loop blocks that dominate the latch.
1816 if (!L || L->getHeader() != Load->getParent())
1817 return false;
1818
1819 BasicBlock *Preheader = L->getLoopPreheader();
1820 BasicBlock *Latch = L->getLoopLatch();
1821 if (!Preheader || !Latch)
1822 return false;
1823
1824 Value *LoadPtr = Load->getPointerOperand();
1825 // Must be available in preheader.
1826 if (!L->isLoopInvariant(LoadPtr))
1827 return false;
1828
1829 // We plan to hoist the load to preheader without introducing a new fault.
1830 // In order to do it, we need to prove that we cannot side-exit the loop
1831 // once loop header is first entered before execution of the load.
1832 if (ICF->isDominatedByICFIFromSameBlock(Load))
1833 return false;
1834
1835 BasicBlock *LoopBlock = nullptr;
1836 for (auto *Blocker : UnavailableBlocks) {
1837 // Blockers from outside the loop are handled in preheader.
1838 if (!L->contains(Blocker))
1839 continue;
1840
1841 // Only allow one loop block. Loop header is not less frequently executed
1842 // than each loop block, and likely it is much more frequently executed. But
1843 // in case of multiple loop blocks, we need extra information (such as block
1844 // frequency info) to understand whether it is profitable to PRE into
1845 // multiple loop blocks.
1846 if (LoopBlock)
1847 return false;
1848
1849 // Do not sink into inner loops. This may be non-profitable.
1850 if (L != LI->getLoopFor(Blocker))
1851 return false;
1852
1853 // Blocks that dominate the latch execute on every single iteration, maybe
1854 // except the last one. So PREing into these blocks doesn't make much sense
1855 // in most cases. But the blocks that do not necessarily execute on each
1856 // iteration are sometimes much colder than the header, and this is when
1857 // PRE is potentially profitable.
1858 if (DT->dominates(Blocker, Latch))
1859 return false;
1860
1861 // Make sure that the terminator itself doesn't clobber.
1862 if (Blocker->getTerminator()->mayWriteToMemory())
1863 return false;
1864
1865 LoopBlock = Blocker;
1866 }
1867
1868 if (!LoopBlock)
1869 return false;
1870
1871 // Make sure the memory at this pointer cannot be freed, therefore we can
1872 // safely reload from it after clobber.
1873 if (LoadPtr->canBeFreed())
1874 return false;
1875
1876 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1877 MapVector<BasicBlock *, Value *> AvailableLoads;
1878 AvailableLoads[LoopBlock] = LoadPtr;
1879 AvailableLoads[Preheader] = LoadPtr;
1880
1881 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1882 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1883 /*CriticalEdgePredAndLoad*/ nullptr);
1884 ++NumPRELoopLoad;
1885 return true;
1886 }
1887
reportLoadElim(LoadInst * Load,Value * AvailableValue,OptimizationRemarkEmitter * ORE)1888 static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1889 OptimizationRemarkEmitter *ORE) {
1890 using namespace ore;
1891
1892 ORE->emit([&]() {
1893 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1894 << "load of type " << NV("Type", Load->getType()) << " eliminated"
1895 << setExtraArgs() << " in favor of "
1896 << NV("InfavorOfValue", AvailableValue);
1897 });
1898 }
1899
1900 /// Attempt to eliminate a load whose dependencies are
1901 /// non-local by performing PHI construction.
processNonLocalLoad(LoadInst * Load)1902 bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1903 // non-local speculations are not allowed under asan.
1904 if (Load->getParent()->getParent()->hasFnAttribute(
1905 Attribute::SanitizeAddress) ||
1906 Load->getParent()->getParent()->hasFnAttribute(
1907 Attribute::SanitizeHWAddress))
1908 return false;
1909
1910 // Step 1: Find the non-local dependencies of the load.
1911 LoadDepVect Deps;
1912 MD->getNonLocalPointerDependency(Load, Deps);
1913
1914 // If we had to process more than one hundred blocks to find the
1915 // dependencies, this load isn't worth worrying about. Optimizing
1916 // it will be too expensive.
1917 unsigned NumDeps = Deps.size();
1918 if (NumDeps > MaxNumDeps)
1919 return false;
1920
1921 // If we had a phi translation failure, we'll have a single entry which is a
1922 // clobber in the current block. Reject this early.
1923 if (NumDeps == 1 &&
1924 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
1925 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
1926 dbgs() << " has unknown dependencies\n";);
1927 return false;
1928 }
1929
1930 bool Changed = false;
1931 // If this load follows a GEP, see if we can PRE the indices before analyzing.
1932 if (GetElementPtrInst *GEP =
1933 dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
1934 for (Use &U : GEP->indices())
1935 if (Instruction *I = dyn_cast<Instruction>(U.get()))
1936 Changed |= performScalarPRE(I);
1937 }
1938
1939 // Step 2: Analyze the availability of the load
1940 AvailValInBlkVect ValuesPerBlock;
1941 UnavailBlkVect UnavailableBlocks;
1942 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
1943
1944 // If we have no predecessors that produce a known value for this load, exit
1945 // early.
1946 if (ValuesPerBlock.empty())
1947 return Changed;
1948
1949 // Step 3: Eliminate fully redundancy.
1950 //
1951 // If all of the instructions we depend on produce a known value for this
1952 // load, then it is fully redundant and we can use PHI insertion to compute
1953 // its value. Insert PHIs and remove the fully redundant value now.
1954 if (UnavailableBlocks.empty()) {
1955 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
1956
1957 // Perform PHI construction.
1958 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
1959 // ConstructSSAForLoadSet is responsible for combining metadata.
1960 ICF->removeUsersOf(Load);
1961 Load->replaceAllUsesWith(V);
1962
1963 if (isa<PHINode>(V))
1964 V->takeName(Load);
1965 if (Instruction *I = dyn_cast<Instruction>(V))
1966 // If instruction I has debug info, then we should not update it.
1967 // Also, if I has a null DebugLoc, then it is still potentially incorrect
1968 // to propagate Load's DebugLoc because Load may not post-dominate I.
1969 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
1970 I->setDebugLoc(Load->getDebugLoc());
1971 if (V->getType()->isPtrOrPtrVectorTy())
1972 MD->invalidateCachedPointerInfo(V);
1973 markInstructionForDeletion(Load);
1974 ++NumGVNLoad;
1975 reportLoadElim(Load, V, ORE);
1976 return true;
1977 }
1978
1979 // Step 4: Eliminate partial redundancy.
1980 if (!isPREEnabled() || !isLoadPREEnabled())
1981 return Changed;
1982 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent()))
1983 return Changed;
1984
1985 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
1986 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
1987 return true;
1988
1989 return Changed;
1990 }
1991
impliesEquivalanceIfTrue(CmpInst * Cmp)1992 static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
1993 if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_EQ)
1994 return true;
1995
1996 // Floating point comparisons can be equal, but not equivalent. Cases:
1997 // NaNs for unordered operators
1998 // +0.0 vs 0.0 for all operators
1999 if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
2000 (Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
2001 Cmp->getFastMathFlags().noNaNs())) {
2002 Value *LHS = Cmp->getOperand(0);
2003 Value *RHS = Cmp->getOperand(1);
2004 // If we can prove either side non-zero, then equality must imply
2005 // equivalence.
2006 // FIXME: We should do this optimization if 'no signed zeros' is
2007 // applicable via an instruction-level fast-math-flag or some other
2008 // indicator that relaxed FP semantics are being used.
2009 if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2010 return true;
2011 if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2012 return true;
2013 // TODO: Handle vector floating point constants
2014 }
2015 return false;
2016 }
2017
impliesEquivalanceIfFalse(CmpInst * Cmp)2018 static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
2019 if (Cmp->getPredicate() == CmpInst::Predicate::ICMP_NE)
2020 return true;
2021
2022 // Floating point comparisons can be equal, but not equivelent. Cases:
2023 // NaNs for unordered operators
2024 // +0.0 vs 0.0 for all operators
2025 if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
2026 Cmp->getFastMathFlags().noNaNs()) ||
2027 Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
2028 Value *LHS = Cmp->getOperand(0);
2029 Value *RHS = Cmp->getOperand(1);
2030 // If we can prove either side non-zero, then equality must imply
2031 // equivalence.
2032 // FIXME: We should do this optimization if 'no signed zeros' is
2033 // applicable via an instruction-level fast-math-flag or some other
2034 // indicator that relaxed FP semantics are being used.
2035 if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
2036 return true;
2037 if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
2038 return true;
2039 // TODO: Handle vector floating point constants
2040 }
2041 return false;
2042 }
2043
2044
hasUsersIn(Value * V,BasicBlock * BB)2045 static bool hasUsersIn(Value *V, BasicBlock *BB) {
2046 return llvm::any_of(V->users(), [BB](User *U) {
2047 auto *I = dyn_cast<Instruction>(U);
2048 return I && I->getParent() == BB;
2049 });
2050 }
2051
processAssumeIntrinsic(AssumeInst * IntrinsicI)2052 bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2053 Value *V = IntrinsicI->getArgOperand(0);
2054
2055 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
2056 if (Cond->isZero()) {
2057 Type *Int8Ty = Type::getInt8Ty(V->getContext());
2058 Type *PtrTy = PointerType::get(V->getContext(), 0);
2059 // Insert a new store to null instruction before the load to indicate that
2060 // this code is not reachable. FIXME: We could insert unreachable
2061 // instruction directly because we can modify the CFG.
2062 auto *NewS =
2063 new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
2064 IntrinsicI->getIterator());
2065 if (MSSAU) {
2066 const MemoryUseOrDef *FirstNonDom = nullptr;
2067 const auto *AL =
2068 MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent());
2069
2070 // If there are accesses in the current basic block, find the first one
2071 // that does not come before NewS. The new memory access is inserted
2072 // after the found access or before the terminator if no such access is
2073 // found.
2074 if (AL) {
2075 for (const auto &Acc : *AL) {
2076 if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc))
2077 if (!Current->getMemoryInst()->comesBefore(NewS)) {
2078 FirstNonDom = Current;
2079 break;
2080 }
2081 }
2082 }
2083
2084 auto *NewDef =
2085 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2086 NewS, nullptr,
2087 const_cast<MemoryUseOrDef *>(FirstNonDom))
2088 : MSSAU->createMemoryAccessInBB(
2089 NewS, nullptr,
2090 NewS->getParent(), MemorySSA::BeforeTerminator);
2091
2092 MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
2093 }
2094 }
2095 if (isAssumeWithEmptyBundle(*IntrinsicI)) {
2096 markInstructionForDeletion(IntrinsicI);
2097 return true;
2098 }
2099 return false;
2100 }
2101
2102 if (isa<Constant>(V)) {
2103 // If it's not false, and constant, it must evaluate to true. This means our
2104 // assume is assume(true), and thus, pointless, and we don't want to do
2105 // anything more here.
2106 return false;
2107 }
2108
2109 Constant *True = ConstantInt::getTrue(V->getContext());
2110 bool Changed = false;
2111
2112 for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
2113 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
2114
2115 // This property is only true in dominated successors, propagateEquality
2116 // will check dominance for us.
2117 Changed |= propagateEquality(V, True, Edge, false);
2118 }
2119
2120 // We can replace assume value with true, which covers cases like this:
2121 // call void @llvm.assume(i1 %cmp)
2122 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2123 ReplaceOperandsWithMap[V] = True;
2124
2125 // Similarly, after assume(!NotV) we know that NotV == false.
2126 Value *NotV;
2127 if (match(V, m_Not(m_Value(NotV))))
2128 ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext());
2129
2130 // If we find an equality fact, canonicalize all dominated uses in this block
2131 // to one of the two values. We heuristically choice the "oldest" of the
2132 // two where age is determined by value number. (Note that propagateEquality
2133 // above handles the cross block case.)
2134 //
2135 // Key case to cover are:
2136 // 1)
2137 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2138 // call void @llvm.assume(i1 %cmp)
2139 // ret float %0 ; will change it to ret float 3.000000e+00
2140 // 2)
2141 // %load = load float, float* %addr
2142 // %cmp = fcmp oeq float %load, %0
2143 // call void @llvm.assume(i1 %cmp)
2144 // ret float %load ; will change it to ret float %0
2145 if (auto *CmpI = dyn_cast<CmpInst>(V)) {
2146 if (impliesEquivalanceIfTrue(CmpI)) {
2147 Value *CmpLHS = CmpI->getOperand(0);
2148 Value *CmpRHS = CmpI->getOperand(1);
2149 // Heuristically pick the better replacement -- the choice of heuristic
2150 // isn't terribly important here, but the fact we canonicalize on some
2151 // replacement is for exposing other simplifications.
2152 // TODO: pull this out as a helper function and reuse w/existing
2153 // (slightly different) logic.
2154 if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS))
2155 std::swap(CmpLHS, CmpRHS);
2156 if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))
2157 std::swap(CmpLHS, CmpRHS);
2158 if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) ||
2159 (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) {
2160 // Move the 'oldest' value to the right-hand side, using the value
2161 // number as a proxy for age.
2162 uint32_t LVN = VN.lookupOrAdd(CmpLHS);
2163 uint32_t RVN = VN.lookupOrAdd(CmpRHS);
2164 if (LVN < RVN)
2165 std::swap(CmpLHS, CmpRHS);
2166 }
2167
2168 // Handle degenerate case where we either haven't pruned a dead path or a
2169 // removed a trivial assume yet.
2170 if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS))
2171 return Changed;
2172
2173 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2174 << *CmpLHS << " with "
2175 << *CmpRHS << " in block "
2176 << IntrinsicI->getParent()->getName() << "\n");
2177
2178
2179 // Setup the replacement map - this handles uses within the same block
2180 if (hasUsersIn(CmpLHS, IntrinsicI->getParent()))
2181 ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
2182
2183 // NOTE: The non-block local cases are handled by the call to
2184 // propagateEquality above; this block is just about handling the block
2185 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
2186 // isn't duplicated for the block local case, can we share it somehow?
2187 }
2188 }
2189 return Changed;
2190 }
2191
patchAndReplaceAllUsesWith(Instruction * I,Value * Repl)2192 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2193 patchReplacementInstruction(I, Repl);
2194 I->replaceAllUsesWith(Repl);
2195 }
2196
2197 /// Attempt to eliminate a load, first by eliminating it
2198 /// locally, and then attempting non-local elimination if that fails.
processLoad(LoadInst * L)2199 bool GVNPass::processLoad(LoadInst *L) {
2200 if (!MD)
2201 return false;
2202
2203 // This code hasn't been audited for ordered or volatile memory access
2204 if (!L->isUnordered())
2205 return false;
2206
2207 if (L->use_empty()) {
2208 markInstructionForDeletion(L);
2209 return true;
2210 }
2211
2212 // ... to a pointer that has been loaded from before...
2213 MemDepResult Dep = MD->getDependency(L);
2214
2215 // If it is defined in another block, try harder.
2216 if (Dep.isNonLocal())
2217 return processNonLocalLoad(L);
2218
2219 // Only handle the local case below
2220 if (!Dep.isLocal()) {
2221 // This might be a NonFuncLocal or an Unknown
2222 LLVM_DEBUG(
2223 // fast print dep, using operator<< on instruction is too slow.
2224 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2225 dbgs() << " has unknown dependence\n";);
2226 return false;
2227 }
2228
2229 auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand());
2230 if (!AV)
2231 return false;
2232
2233 Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
2234
2235 // MaterializeAdjustedValue is responsible for combining metadata.
2236 ICF->removeUsersOf(L);
2237 L->replaceAllUsesWith(AvailableValue);
2238 markInstructionForDeletion(L);
2239 if (MSSAU)
2240 MSSAU->removeMemoryAccess(L);
2241 ++NumGVNLoad;
2242 reportLoadElim(L, AvailableValue, ORE);
2243 // Tell MDA to reexamine the reused pointer since we might have more
2244 // information after forwarding it.
2245 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2246 MD->invalidateCachedPointerInfo(AvailableValue);
2247 return true;
2248 }
2249
2250 /// Return a pair the first field showing the value number of \p Exp and the
2251 /// second field showing whether it is a value number newly created.
2252 std::pair<uint32_t, bool>
assignExpNewValueNum(Expression & Exp)2253 GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2254 uint32_t &e = expressionNumbering[Exp];
2255 bool CreateNewValNum = !e;
2256 if (CreateNewValNum) {
2257 Expressions.push_back(Exp);
2258 if (ExprIdx.size() < nextValueNumber + 1)
2259 ExprIdx.resize(nextValueNumber * 2);
2260 e = nextValueNumber;
2261 ExprIdx[nextValueNumber++] = nextExprNumber++;
2262 }
2263 return {e, CreateNewValNum};
2264 }
2265
2266 /// Return whether all the values related with the same \p num are
2267 /// defined in \p BB.
areAllValsInBB(uint32_t Num,const BasicBlock * BB,GVNPass & Gvn)2268 bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2269 GVNPass &Gvn) {
2270 return all_of(
2271 Gvn.LeaderTable.getLeaders(Num),
2272 [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2273 }
2274
2275 /// Wrap phiTranslateImpl to provide caching functionality.
phiTranslate(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2276 uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2277 const BasicBlock *PhiBlock,
2278 uint32_t Num, GVNPass &Gvn) {
2279 auto FindRes = PhiTranslateTable.find({Num, Pred});
2280 if (FindRes != PhiTranslateTable.end())
2281 return FindRes->second;
2282 uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
2283 PhiTranslateTable.insert({{Num, Pred}, NewNum});
2284 return NewNum;
2285 }
2286
2287 // Return true if the value number \p Num and NewNum have equal value.
2288 // Return false if the result is unknown.
areCallValsEqual(uint32_t Num,uint32_t NewNum,const BasicBlock * Pred,const BasicBlock * PhiBlock,GVNPass & Gvn)2289 bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2290 const BasicBlock *Pred,
2291 const BasicBlock *PhiBlock,
2292 GVNPass &Gvn) {
2293 CallInst *Call = nullptr;
2294 auto Leaders = Gvn.LeaderTable.getLeaders(Num);
2295 for (const auto &Entry : Leaders) {
2296 Call = dyn_cast<CallInst>(Entry.Val);
2297 if (Call && Call->getParent() == PhiBlock)
2298 break;
2299 }
2300
2301 if (AA->doesNotAccessMemory(Call))
2302 return true;
2303
2304 if (!MD || !AA->onlyReadsMemory(Call))
2305 return false;
2306
2307 MemDepResult local_dep = MD->getDependency(Call);
2308 if (!local_dep.isNonLocal())
2309 return false;
2310
2311 const MemoryDependenceResults::NonLocalDepInfo &deps =
2312 MD->getNonLocalCallDependency(Call);
2313
2314 // Check to see if the Call has no function local clobber.
2315 for (const NonLocalDepEntry &D : deps) {
2316 if (D.getResult().isNonFuncLocal())
2317 return true;
2318 }
2319 return false;
2320 }
2321
2322 /// Translate value number \p Num using phis, so that it has the values of
2323 /// the phis in BB.
phiTranslateImpl(const BasicBlock * Pred,const BasicBlock * PhiBlock,uint32_t Num,GVNPass & Gvn)2324 uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2325 const BasicBlock *PhiBlock,
2326 uint32_t Num, GVNPass &Gvn) {
2327 if (PHINode *PN = NumberingPhi[Num]) {
2328 for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
2329 if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
2330 if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
2331 return TransVal;
2332 }
2333 return Num;
2334 }
2335
2336 // If there is any value related with Num is defined in a BB other than
2337 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2338 // a backedge. We can do an early exit in that case to save compile time.
2339 if (!areAllValsInBB(Num, PhiBlock, Gvn))
2340 return Num;
2341
2342 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2343 return Num;
2344 Expression Exp = Expressions[ExprIdx[Num]];
2345
2346 for (unsigned i = 0; i < Exp.varargs.size(); i++) {
2347 // For InsertValue and ExtractValue, some varargs are index numbers
2348 // instead of value numbers. Those index numbers should not be
2349 // translated.
2350 if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
2351 (i > 0 && Exp.opcode == Instruction::ExtractValue) ||
2352 (i > 1 && Exp.opcode == Instruction::ShuffleVector))
2353 continue;
2354 Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
2355 }
2356
2357 if (Exp.commutative) {
2358 assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!");
2359 if (Exp.varargs[0] > Exp.varargs[1]) {
2360 std::swap(Exp.varargs[0], Exp.varargs[1]);
2361 uint32_t Opcode = Exp.opcode >> 8;
2362 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2363 Exp.opcode = (Opcode << 8) |
2364 CmpInst::getSwappedPredicate(
2365 static_cast<CmpInst::Predicate>(Exp.opcode & 255));
2366 }
2367 }
2368
2369 if (uint32_t NewNum = expressionNumbering[Exp]) {
2370 if (Exp.opcode == Instruction::Call && NewNum != Num)
2371 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num;
2372 return NewNum;
2373 }
2374 return Num;
2375 }
2376
2377 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2378 /// again.
eraseTranslateCacheEntry(uint32_t Num,const BasicBlock & CurrBlock)2379 void GVNPass::ValueTable::eraseTranslateCacheEntry(
2380 uint32_t Num, const BasicBlock &CurrBlock) {
2381 for (const BasicBlock *Pred : predecessors(&CurrBlock))
2382 PhiTranslateTable.erase({Num, Pred});
2383 }
2384
2385 // In order to find a leader for a given value number at a
2386 // specific basic block, we first obtain the list of all Values for that number,
2387 // and then scan the list to find one whose block dominates the block in
2388 // question. This is fast because dominator tree queries consist of only
2389 // a few comparisons of DFS numbers.
findLeader(const BasicBlock * BB,uint32_t num)2390 Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
2391 auto Leaders = LeaderTable.getLeaders(num);
2392 if (Leaders.empty())
2393 return nullptr;
2394
2395 Value *Val = nullptr;
2396 for (const auto &Entry : Leaders) {
2397 if (DT->dominates(Entry.BB, BB)) {
2398 Val = Entry.Val;
2399 if (isa<Constant>(Val))
2400 return Val;
2401 }
2402 }
2403
2404 return Val;
2405 }
2406
2407 /// There is an edge from 'Src' to 'Dst'. Return
2408 /// true if every path from the entry block to 'Dst' passes via this edge. In
2409 /// particular 'Dst' must not be reachable via another edge from 'Src'.
isOnlyReachableViaThisEdge(const BasicBlockEdge & E,DominatorTree * DT)2410 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2411 DominatorTree *DT) {
2412 // While in theory it is interesting to consider the case in which Dst has
2413 // more than one predecessor, because Dst might be part of a loop which is
2414 // only reachable from Src, in practice it is pointless since at the time
2415 // GVN runs all such loops have preheaders, which means that Dst will have
2416 // been changed to have only one predecessor, namely Src.
2417 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2418 assert((!Pred || Pred == E.getStart()) &&
2419 "No edge between these basic blocks!");
2420 return Pred != nullptr;
2421 }
2422
assignBlockRPONumber(Function & F)2423 void GVNPass::assignBlockRPONumber(Function &F) {
2424 BlockRPONumber.clear();
2425 uint32_t NextBlockNumber = 1;
2426 ReversePostOrderTraversal<Function *> RPOT(&F);
2427 for (BasicBlock *BB : RPOT)
2428 BlockRPONumber[BB] = NextBlockNumber++;
2429 InvalidBlockRPONumbers = false;
2430 }
2431
replaceOperandsForInBlockEquality(Instruction * Instr) const2432 bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
2433 bool Changed = false;
2434 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
2435 Value *Operand = Instr->getOperand(OpNum);
2436 auto it = ReplaceOperandsWithMap.find(Operand);
2437 if (it != ReplaceOperandsWithMap.end()) {
2438 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
2439 << *it->second << " in instruction " << *Instr << '\n');
2440 Instr->setOperand(OpNum, it->second);
2441 Changed = true;
2442 }
2443 }
2444 return Changed;
2445 }
2446
2447 /// The given values are known to be equal in every block
2448 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2449 /// 'RHS' everywhere in the scope. Returns whether a change was made.
2450 /// If DominatesByEdge is false, then it means that we will propagate the RHS
2451 /// value starting from the end of Root.Start.
propagateEquality(Value * LHS,Value * RHS,const BasicBlockEdge & Root,bool DominatesByEdge)2452 bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
2453 const BasicBlockEdge &Root,
2454 bool DominatesByEdge) {
2455 SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2456 Worklist.push_back(std::make_pair(LHS, RHS));
2457 bool Changed = false;
2458 // For speed, compute a conservative fast approximation to
2459 // DT->dominates(Root, Root.getEnd());
2460 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
2461
2462 while (!Worklist.empty()) {
2463 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2464 LHS = Item.first; RHS = Item.second;
2465
2466 if (LHS == RHS)
2467 continue;
2468 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2469
2470 // Don't try to propagate equalities between constants.
2471 if (isa<Constant>(LHS) && isa<Constant>(RHS))
2472 continue;
2473
2474 // Prefer a constant on the right-hand side, or an Argument if no constants.
2475 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
2476 std::swap(LHS, RHS);
2477 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2478 const DataLayout &DL =
2479 isa<Argument>(LHS)
2480 ? cast<Argument>(LHS)->getParent()->getDataLayout()
2481 : cast<Instruction>(LHS)->getDataLayout();
2482
2483 // If there is no obvious reason to prefer the left-hand side over the
2484 // right-hand side, ensure the longest lived term is on the right-hand side,
2485 // so the shortest lived term will be replaced by the longest lived.
2486 // This tends to expose more simplifications.
2487 uint32_t LVN = VN.lookupOrAdd(LHS);
2488 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
2489 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
2490 // Move the 'oldest' value to the right-hand side, using the value number
2491 // as a proxy for age.
2492 uint32_t RVN = VN.lookupOrAdd(RHS);
2493 if (LVN < RVN) {
2494 std::swap(LHS, RHS);
2495 LVN = RVN;
2496 }
2497 }
2498
2499 // If value numbering later sees that an instruction in the scope is equal
2500 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2501 // the invariant that instructions only occur in the leader table for their
2502 // own value number (this is used by removeFromLeaderTable), do not do this
2503 // if RHS is an instruction (if an instruction in the scope is morphed into
2504 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2505 // using the leader table is about compiling faster, not optimizing better).
2506 // The leader table only tracks basic blocks, not edges. Only add to if we
2507 // have the simple case where the edge dominates the end.
2508 if (RootDominatesEnd && !isa<Instruction>(RHS) &&
2509 canReplacePointersIfEqual(LHS, RHS, DL))
2510 LeaderTable.insert(LVN, RHS, Root.getEnd());
2511
2512 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2513 // LHS always has at least one use that is not dominated by Root, this will
2514 // never do anything if LHS has only one use.
2515 if (!LHS->hasOneUse()) {
2516 // Create a callback that captures the DL.
2517 auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2518 return canReplacePointersInUseIfEqual(U, To, DL);
2519 };
2520 unsigned NumReplacements =
2521 DominatesByEdge
2522 ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
2523 canReplacePointersCallBack)
2524 : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
2525 canReplacePointersCallBack);
2526
2527 if (NumReplacements > 0) {
2528 Changed = true;
2529 NumGVNEqProp += NumReplacements;
2530 // Cached information for anything that uses LHS will be invalid.
2531 if (MD)
2532 MD->invalidateCachedPointerInfo(LHS);
2533 }
2534 }
2535
2536 // Now try to deduce additional equalities from this one. For example, if
2537 // the known equality was "(A != B)" == "false" then it follows that A and B
2538 // are equal in the scope. Only boolean equalities with an explicit true or
2539 // false RHS are currently supported.
2540 if (!RHS->getType()->isIntegerTy(1))
2541 // Not a boolean equality - bail out.
2542 continue;
2543 ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
2544 if (!CI)
2545 // RHS neither 'true' nor 'false' - bail out.
2546 continue;
2547 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2548 bool isKnownTrue = CI->isMinusOne();
2549 bool isKnownFalse = !isKnownTrue;
2550
2551 // If "A && B" is known true then both A and B are known true. If "A || B"
2552 // is known false then both A and B are known false.
2553 Value *A, *B;
2554 if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) ||
2555 (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) {
2556 Worklist.push_back(std::make_pair(A, RHS));
2557 Worklist.push_back(std::make_pair(B, RHS));
2558 continue;
2559 }
2560
2561 // If we are propagating an equality like "(A == B)" == "true" then also
2562 // propagate the equality A == B. When propagating a comparison such as
2563 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2564 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) {
2565 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
2566
2567 // If "A == B" is known true, or "A != B" is known false, then replace
2568 // A with B everywhere in the scope. For floating point operations, we
2569 // have to be careful since equality does not always imply equivalance.
2570 if ((isKnownTrue && impliesEquivalanceIfTrue(Cmp)) ||
2571 (isKnownFalse && impliesEquivalanceIfFalse(Cmp)))
2572 Worklist.push_back(std::make_pair(Op0, Op1));
2573
2574 // If "A >= B" is known true, replace "A < B" with false everywhere.
2575 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2576 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
2577 // Since we don't have the instruction "A < B" immediately to hand, work
2578 // out the value number that it would have and use that to find an
2579 // appropriate instruction (if any).
2580 uint32_t NextNum = VN.getNextUnusedValueNumber();
2581 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1);
2582 // If the number we were assigned was brand new then there is no point in
2583 // looking for an instruction realizing it: there cannot be one!
2584 if (Num < NextNum) {
2585 Value *NotCmp = findLeader(Root.getEnd(), Num);
2586 if (NotCmp && isa<Instruction>(NotCmp)) {
2587 unsigned NumReplacements =
2588 DominatesByEdge
2589 ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
2590 : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
2591 Root.getStart());
2592 Changed |= NumReplacements > 0;
2593 NumGVNEqProp += NumReplacements;
2594 // Cached information for anything that uses NotCmp will be invalid.
2595 if (MD)
2596 MD->invalidateCachedPointerInfo(NotCmp);
2597 }
2598 }
2599 // Ensure that any instruction in scope that gets the "A < B" value number
2600 // is replaced with false.
2601 // The leader table only tracks basic blocks, not edges. Only add to if we
2602 // have the simple case where the edge dominates the end.
2603 if (RootDominatesEnd)
2604 LeaderTable.insert(Num, NotVal, Root.getEnd());
2605
2606 continue;
2607 }
2608 }
2609
2610 return Changed;
2611 }
2612
2613 /// When calculating availability, handle an instruction
2614 /// by inserting it into the appropriate sets
processInstruction(Instruction * I)2615 bool GVNPass::processInstruction(Instruction *I) {
2616 // Ignore dbg info intrinsics.
2617 if (isa<DbgInfoIntrinsic>(I))
2618 return false;
2619
2620 // If the instruction can be easily simplified then do so now in preference
2621 // to value numbering it. Value numbering often exposes redundancies, for
2622 // example if it determines that %y is equal to %x then the instruction
2623 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2624 const DataLayout &DL = I->getDataLayout();
2625 if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
2626 bool Changed = false;
2627 if (!I->use_empty()) {
2628 // Simplification can cause a special instruction to become not special.
2629 // For example, devirtualization to a willreturn function.
2630 ICF->removeUsersOf(I);
2631 I->replaceAllUsesWith(V);
2632 Changed = true;
2633 }
2634 if (isInstructionTriviallyDead(I, TLI)) {
2635 markInstructionForDeletion(I);
2636 Changed = true;
2637 }
2638 if (Changed) {
2639 if (MD && V->getType()->isPtrOrPtrVectorTy())
2640 MD->invalidateCachedPointerInfo(V);
2641 ++NumGVNSimpl;
2642 return true;
2643 }
2644 }
2645
2646 if (auto *Assume = dyn_cast<AssumeInst>(I))
2647 return processAssumeIntrinsic(Assume);
2648
2649 if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
2650 if (processLoad(Load))
2651 return true;
2652
2653 unsigned Num = VN.lookupOrAdd(Load);
2654 LeaderTable.insert(Num, Load, Load->getParent());
2655 return false;
2656 }
2657
2658 // For conditional branches, we can perform simple conditional propagation on
2659 // the condition value itself.
2660 if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
2661 if (!BI->isConditional())
2662 return false;
2663
2664 if (isa<Constant>(BI->getCondition()))
2665 return processFoldableCondBr(BI);
2666
2667 Value *BranchCond = BI->getCondition();
2668 BasicBlock *TrueSucc = BI->getSuccessor(0);
2669 BasicBlock *FalseSucc = BI->getSuccessor(1);
2670 // Avoid multiple edges early.
2671 if (TrueSucc == FalseSucc)
2672 return false;
2673
2674 BasicBlock *Parent = BI->getParent();
2675 bool Changed = false;
2676
2677 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
2678 BasicBlockEdge TrueE(Parent, TrueSucc);
2679 Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
2680
2681 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
2682 BasicBlockEdge FalseE(Parent, FalseSucc);
2683 Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
2684
2685 return Changed;
2686 }
2687
2688 // For switches, propagate the case values into the case destinations.
2689 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
2690 Value *SwitchCond = SI->getCondition();
2691 BasicBlock *Parent = SI->getParent();
2692 bool Changed = false;
2693
2694 // Remember how many outgoing edges there are to every successor.
2695 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2696 for (BasicBlock *Succ : successors(Parent))
2697 ++SwitchEdges[Succ];
2698
2699 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
2700 i != e; ++i) {
2701 BasicBlock *Dst = i->getCaseSuccessor();
2702 // If there is only a single edge, propagate the case value into it.
2703 if (SwitchEdges.lookup(Dst) == 1) {
2704 BasicBlockEdge E(Parent, Dst);
2705 Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
2706 }
2707 }
2708 return Changed;
2709 }
2710
2711 // Instructions with void type don't return a value, so there's
2712 // no point in trying to find redundancies in them.
2713 if (I->getType()->isVoidTy())
2714 return false;
2715
2716 uint32_t NextNum = VN.getNextUnusedValueNumber();
2717 unsigned Num = VN.lookupOrAdd(I);
2718
2719 // Allocations are always uniquely numbered, so we can save time and memory
2720 // by fast failing them.
2721 if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
2722 LeaderTable.insert(Num, I, I->getParent());
2723 return false;
2724 }
2725
2726 // If the number we were assigned was a brand new VN, then we don't
2727 // need to do a lookup to see if the number already exists
2728 // somewhere in the domtree: it can't!
2729 if (Num >= NextNum) {
2730 LeaderTable.insert(Num, I, I->getParent());
2731 return false;
2732 }
2733
2734 // Perform fast-path value-number based elimination of values inherited from
2735 // dominators.
2736 Value *Repl = findLeader(I->getParent(), Num);
2737 if (!Repl) {
2738 // Failure, just remember this instance for future use.
2739 LeaderTable.insert(Num, I, I->getParent());
2740 return false;
2741 }
2742
2743 if (Repl == I) {
2744 // If I was the result of a shortcut PRE, it might already be in the table
2745 // and the best replacement for itself. Nothing to do.
2746 return false;
2747 }
2748
2749 // Remove it!
2750 patchAndReplaceAllUsesWith(I, Repl);
2751 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2752 MD->invalidateCachedPointerInfo(Repl);
2753 markInstructionForDeletion(I);
2754 return true;
2755 }
2756
2757 /// runOnFunction - This is the main transformation entry point for a function.
runImpl(Function & F,AssumptionCache & RunAC,DominatorTree & RunDT,const TargetLibraryInfo & RunTLI,AAResults & RunAA,MemoryDependenceResults * RunMD,LoopInfo & LI,OptimizationRemarkEmitter * RunORE,MemorySSA * MSSA)2758 bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2759 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2760 MemoryDependenceResults *RunMD, LoopInfo &LI,
2761 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2762 AC = &RunAC;
2763 DT = &RunDT;
2764 VN.setDomTree(DT);
2765 TLI = &RunTLI;
2766 VN.setAliasAnalysis(&RunAA);
2767 MD = RunMD;
2768 ImplicitControlFlowTracking ImplicitCFT;
2769 ICF = &ImplicitCFT;
2770 this->LI = &LI;
2771 VN.setMemDep(MD);
2772 ORE = RunORE;
2773 InvalidBlockRPONumbers = true;
2774 MemorySSAUpdater Updater(MSSA);
2775 MSSAU = MSSA ? &Updater : nullptr;
2776
2777 bool Changed = false;
2778 bool ShouldContinue = true;
2779
2780 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2781 // Merge unconditional branches, allowing PRE to catch more
2782 // optimization opportunities.
2783 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
2784 bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD);
2785 if (removedBlock)
2786 ++NumGVNBlocks;
2787
2788 Changed |= removedBlock;
2789 }
2790 DTU.flush();
2791
2792 unsigned Iteration = 0;
2793 while (ShouldContinue) {
2794 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2795 (void) Iteration;
2796 ShouldContinue = iterateOnFunction(F);
2797 Changed |= ShouldContinue;
2798 ++Iteration;
2799 }
2800
2801 if (isPREEnabled()) {
2802 // Fabricate val-num for dead-code in order to suppress assertion in
2803 // performPRE().
2804 assignValNumForDeadCode();
2805 bool PREChanged = true;
2806 while (PREChanged) {
2807 PREChanged = performPRE(F);
2808 Changed |= PREChanged;
2809 }
2810 }
2811
2812 // FIXME: Should perform GVN again after PRE does something. PRE can move
2813 // computations into blocks where they become fully redundant. Note that
2814 // we can't do this until PRE's critical edge splitting updates memdep.
2815 // Actually, when this happens, we should just fully integrate PRE into GVN.
2816
2817 cleanupGlobalSets();
2818 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2819 // iteration.
2820 DeadBlocks.clear();
2821
2822 if (MSSA && VerifyMemorySSA)
2823 MSSA->verifyMemorySSA();
2824
2825 return Changed;
2826 }
2827
processBlock(BasicBlock * BB)2828 bool GVNPass::processBlock(BasicBlock *BB) {
2829 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
2830 // (and incrementing BI before processing an instruction).
2831 assert(InstrsToErase.empty() &&
2832 "We expect InstrsToErase to be empty across iterations");
2833 if (DeadBlocks.count(BB))
2834 return false;
2835
2836 // Clearing map before every BB because it can be used only for single BB.
2837 ReplaceOperandsWithMap.clear();
2838 bool ChangedFunction = false;
2839
2840 // Since we may not have visited the input blocks of the phis, we can't
2841 // use our normal hash approach for phis. Instead, simply look for
2842 // obvious duplicates. The first pass of GVN will tend to create
2843 // identical phis, and the second or later passes can eliminate them.
2844 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2845 ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove);
2846 for (PHINode *PN : PHINodesToRemove) {
2847 VN.erase(PN);
2848 removeInstruction(PN);
2849 }
2850
2851 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2852 BI != BE;) {
2853 if (!ReplaceOperandsWithMap.empty())
2854 ChangedFunction |= replaceOperandsForInBlockEquality(&*BI);
2855 ChangedFunction |= processInstruction(&*BI);
2856
2857 if (InstrsToErase.empty()) {
2858 ++BI;
2859 continue;
2860 }
2861
2862 // If we need some instructions deleted, do it now.
2863 NumGVNInstr += InstrsToErase.size();
2864
2865 // Avoid iterator invalidation.
2866 bool AtStart = BI == BB->begin();
2867 if (!AtStart)
2868 --BI;
2869
2870 for (auto *I : InstrsToErase) {
2871 assert(I->getParent() == BB && "Removing instruction from wrong block?");
2872 LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
2873 salvageKnowledge(I, AC);
2874 salvageDebugInfo(*I);
2875 removeInstruction(I);
2876 }
2877 InstrsToErase.clear();
2878
2879 if (AtStart)
2880 BI = BB->begin();
2881 else
2882 ++BI;
2883 }
2884
2885 return ChangedFunction;
2886 }
2887
2888 // Instantiate an expression in a predecessor that lacked it.
performScalarPREInsertion(Instruction * Instr,BasicBlock * Pred,BasicBlock * Curr,unsigned int ValNo)2889 bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2890 BasicBlock *Curr, unsigned int ValNo) {
2891 // Because we are going top-down through the block, all value numbers
2892 // will be available in the predecessor by the time we need them. Any
2893 // that weren't originally present will have been instantiated earlier
2894 // in this loop.
2895 bool success = true;
2896 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) {
2897 Value *Op = Instr->getOperand(i);
2898 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2899 continue;
2900 // This could be a newly inserted instruction, in which case, we won't
2901 // find a value number, and should give up before we hurt ourselves.
2902 // FIXME: Rewrite the infrastructure to let it easier to value number
2903 // and process newly inserted instructions.
2904 if (!VN.exists(Op)) {
2905 success = false;
2906 break;
2907 }
2908 uint32_t TValNo =
2909 VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
2910 if (Value *V = findLeader(Pred, TValNo)) {
2911 Instr->setOperand(i, V);
2912 } else {
2913 success = false;
2914 break;
2915 }
2916 }
2917
2918 // Fail out if we encounter an operand that is not available in
2919 // the PRE predecessor. This is typically because of loads which
2920 // are not value numbered precisely.
2921 if (!success)
2922 return false;
2923
2924 Instr->insertBefore(Pred->getTerminator());
2925 Instr->setName(Instr->getName() + ".pre");
2926 Instr->setDebugLoc(Instr->getDebugLoc());
2927
2928 ICF->insertInstructionTo(Instr, Pred);
2929
2930 unsigned Num = VN.lookupOrAdd(Instr);
2931 VN.add(Instr, Num);
2932
2933 // Update the availability map to include the new instruction.
2934 LeaderTable.insert(Num, Instr, Pred);
2935 return true;
2936 }
2937
performScalarPRE(Instruction * CurInst)2938 bool GVNPass::performScalarPRE(Instruction *CurInst) {
2939 if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
2940 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
2941 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2942 isa<DbgInfoIntrinsic>(CurInst))
2943 return false;
2944
2945 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2946 // sinking the compare again, and it would force the code generator to
2947 // move the i1 from processor flags or predicate registers into a general
2948 // purpose register.
2949 if (isa<CmpInst>(CurInst))
2950 return false;
2951
2952 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2953 // sinking the addressing mode computation back to its uses. Extending the
2954 // GEP's live range increases the register pressure, and therefore it can
2955 // introduce unnecessary spills.
2956 //
2957 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2958 // to the load by moving it to the predecessor block if necessary.
2959 if (isa<GetElementPtrInst>(CurInst))
2960 return false;
2961
2962 if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
2963 // We don't currently value number ANY inline asm calls.
2964 if (CallB->isInlineAsm())
2965 return false;
2966 }
2967
2968 uint32_t ValNo = VN.lookup(CurInst);
2969
2970 // Look for the predecessors for PRE opportunities. We're
2971 // only trying to solve the basic diamond case, where
2972 // a value is computed in the successor and one predecessor,
2973 // but not the other. We also explicitly disallow cases
2974 // where the successor is its own predecessor, because they're
2975 // more complicated to get right.
2976 unsigned NumWith = 0;
2977 unsigned NumWithout = 0;
2978 BasicBlock *PREPred = nullptr;
2979 BasicBlock *CurrentBlock = CurInst->getParent();
2980
2981 // Update the RPO numbers for this function.
2982 if (InvalidBlockRPONumbers)
2983 assignBlockRPONumber(*CurrentBlock->getParent());
2984
2985 SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
2986 for (BasicBlock *P : predecessors(CurrentBlock)) {
2987 // We're not interested in PRE where blocks with predecessors that are
2988 // not reachable.
2989 if (!DT->isReachableFromEntry(P)) {
2990 NumWithout = 2;
2991 break;
2992 }
2993 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
2994 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
2995 "Invalid BlockRPONumber map.");
2996 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
2997 NumWithout = 2;
2998 break;
2999 }
3000
3001 uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
3002 Value *predV = findLeader(P, TValNo);
3003 if (!predV) {
3004 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
3005 PREPred = P;
3006 ++NumWithout;
3007 } else if (predV == CurInst) {
3008 /* CurInst dominates this predecessor. */
3009 NumWithout = 2;
3010 break;
3011 } else {
3012 predMap.push_back(std::make_pair(predV, P));
3013 ++NumWith;
3014 }
3015 }
3016
3017 // Don't do PRE when it might increase code size, i.e. when
3018 // we would need to insert instructions in more than one pred.
3019 if (NumWithout > 1 || NumWith == 0)
3020 return false;
3021
3022 // We may have a case where all predecessors have the instruction,
3023 // and we just need to insert a phi node. Otherwise, perform
3024 // insertion.
3025 Instruction *PREInstr = nullptr;
3026
3027 if (NumWithout != 0) {
3028 if (!isSafeToSpeculativelyExecute(CurInst)) {
3029 // It is only valid to insert a new instruction if the current instruction
3030 // is always executed. An instruction with implicit control flow could
3031 // prevent us from doing it. If we cannot speculate the execution, then
3032 // PRE should be prohibited.
3033 if (ICF->isDominatedByICFIFromSameBlock(CurInst))
3034 return false;
3035 }
3036
3037 // Don't do PRE across indirect branch.
3038 if (isa<IndirectBrInst>(PREPred->getTerminator()))
3039 return false;
3040
3041 // We can't do PRE safely on a critical edge, so instead we schedule
3042 // the edge to be split and perform the PRE the next time we iterate
3043 // on the function.
3044 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
3045 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
3046 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
3047 return false;
3048 }
3049 // We need to insert somewhere, so let's give it a shot
3050 PREInstr = CurInst->clone();
3051 if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
3052 // If we failed insertion, make sure we remove the instruction.
3053 #ifndef NDEBUG
3054 verifyRemoved(PREInstr);
3055 #endif
3056 PREInstr->deleteValue();
3057 return false;
3058 }
3059 }
3060
3061 // Either we should have filled in the PRE instruction, or we should
3062 // not have needed insertions.
3063 assert(PREInstr != nullptr || NumWithout == 0);
3064
3065 ++NumGVNPRE;
3066
3067 // Create a PHI to make the value available in this block.
3068 PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(),
3069 CurInst->getName() + ".pre-phi");
3070 Phi->insertBefore(CurrentBlock->begin());
3071 for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
3072 if (Value *V = predMap[i].first) {
3073 // If we use an existing value in this phi, we have to patch the original
3074 // value because the phi will be used to replace a later value.
3075 patchReplacementInstruction(CurInst, V);
3076 Phi->addIncoming(V, predMap[i].second);
3077 } else
3078 Phi->addIncoming(PREInstr, PREPred);
3079 }
3080
3081 VN.add(Phi, ValNo);
3082 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3083 // be changed, so erase the related stale entries in phi translate cache.
3084 VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
3085 LeaderTable.insert(ValNo, Phi, CurrentBlock);
3086 Phi->setDebugLoc(CurInst->getDebugLoc());
3087 CurInst->replaceAllUsesWith(Phi);
3088 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3089 MD->invalidateCachedPointerInfo(Phi);
3090 VN.erase(CurInst);
3091 LeaderTable.erase(ValNo, CurInst, CurrentBlock);
3092
3093 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3094 removeInstruction(CurInst);
3095 ++NumGVNInstr;
3096
3097 return true;
3098 }
3099
3100 /// Perform a purely local form of PRE that looks for diamond
3101 /// control flow patterns and attempts to perform simple PRE at the join point.
performPRE(Function & F)3102 bool GVNPass::performPRE(Function &F) {
3103 bool Changed = false;
3104 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
3105 // Nothing to PRE in the entry block.
3106 if (CurrentBlock == &F.getEntryBlock())
3107 continue;
3108
3109 // Don't perform PRE on an EH pad.
3110 if (CurrentBlock->isEHPad())
3111 continue;
3112
3113 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3114 BE = CurrentBlock->end();
3115 BI != BE;) {
3116 Instruction *CurInst = &*BI++;
3117 Changed |= performScalarPRE(CurInst);
3118 }
3119 }
3120
3121 if (splitCriticalEdges())
3122 Changed = true;
3123
3124 return Changed;
3125 }
3126
3127 /// Split the critical edge connecting the given two blocks, and return
3128 /// the block inserted to the critical edge.
splitCriticalEdges(BasicBlock * Pred,BasicBlock * Succ)3129 BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3130 // GVN does not require loop-simplify, do not try to preserve it if it is not
3131 // possible.
3132 BasicBlock *BB = SplitCriticalEdge(
3133 Pred, Succ,
3134 CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3135 if (BB) {
3136 if (MD)
3137 MD->invalidateCachedPredecessors();
3138 InvalidBlockRPONumbers = true;
3139 }
3140 return BB;
3141 }
3142
3143 /// Split critical edges found during the previous
3144 /// iteration that may enable further optimization.
splitCriticalEdges()3145 bool GVNPass::splitCriticalEdges() {
3146 if (toSplit.empty())
3147 return false;
3148
3149 bool Changed = false;
3150 do {
3151 std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val();
3152 Changed |= SplitCriticalEdge(Edge.first, Edge.second,
3153 CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3154 nullptr;
3155 } while (!toSplit.empty());
3156 if (Changed) {
3157 if (MD)
3158 MD->invalidateCachedPredecessors();
3159 InvalidBlockRPONumbers = true;
3160 }
3161 return Changed;
3162 }
3163
3164 /// Executes one iteration of GVN
iterateOnFunction(Function & F)3165 bool GVNPass::iterateOnFunction(Function &F) {
3166 cleanupGlobalSets();
3167
3168 // Top-down walk of the dominator tree
3169 bool Changed = false;
3170 // Needed for value numbering with phi construction to work.
3171 // RPOT walks the graph in its constructor and will not be invalidated during
3172 // processBlock.
3173 ReversePostOrderTraversal<Function *> RPOT(&F);
3174
3175 for (BasicBlock *BB : RPOT)
3176 Changed |= processBlock(BB);
3177
3178 return Changed;
3179 }
3180
cleanupGlobalSets()3181 void GVNPass::cleanupGlobalSets() {
3182 VN.clear();
3183 LeaderTable.clear();
3184 BlockRPONumber.clear();
3185 ICF->clear();
3186 InvalidBlockRPONumbers = true;
3187 }
3188
removeInstruction(Instruction * I)3189 void GVNPass::removeInstruction(Instruction *I) {
3190 if (MD) MD->removeInstruction(I);
3191 if (MSSAU)
3192 MSSAU->removeMemoryAccess(I);
3193 #ifndef NDEBUG
3194 verifyRemoved(I);
3195 #endif
3196 ICF->removeInstruction(I);
3197 I->eraseFromParent();
3198 }
3199
3200 /// Verify that the specified instruction does not occur in our
3201 /// internal data structures.
verifyRemoved(const Instruction * Inst) const3202 void GVNPass::verifyRemoved(const Instruction *Inst) const {
3203 VN.verifyRemoved(Inst);
3204 LeaderTable.verifyRemoved(Inst);
3205 }
3206
3207 /// BB is declared dead, which implied other blocks become dead as well. This
3208 /// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3209 /// live successors, update their phi nodes by replacing the operands
3210 /// corresponding to dead blocks with UndefVal.
addDeadBlock(BasicBlock * BB)3211 void GVNPass::addDeadBlock(BasicBlock *BB) {
3212 SmallVector<BasicBlock *, 4> NewDead;
3213 SmallSetVector<BasicBlock *, 4> DF;
3214
3215 NewDead.push_back(BB);
3216 while (!NewDead.empty()) {
3217 BasicBlock *D = NewDead.pop_back_val();
3218 if (DeadBlocks.count(D))
3219 continue;
3220
3221 // All blocks dominated by D are dead.
3222 SmallVector<BasicBlock *, 8> Dom;
3223 DT->getDescendants(D, Dom);
3224 DeadBlocks.insert(Dom.begin(), Dom.end());
3225
3226 // Figure out the dominance-frontier(D).
3227 for (BasicBlock *B : Dom) {
3228 for (BasicBlock *S : successors(B)) {
3229 if (DeadBlocks.count(S))
3230 continue;
3231
3232 bool AllPredDead = true;
3233 for (BasicBlock *P : predecessors(S))
3234 if (!DeadBlocks.count(P)) {
3235 AllPredDead = false;
3236 break;
3237 }
3238
3239 if (!AllPredDead) {
3240 // S could be proved dead later on. That is why we don't update phi
3241 // operands at this moment.
3242 DF.insert(S);
3243 } else {
3244 // While S is not dominated by D, it is dead by now. This could take
3245 // place if S already have a dead predecessor before D is declared
3246 // dead.
3247 NewDead.push_back(S);
3248 }
3249 }
3250 }
3251 }
3252
3253 // For the dead blocks' live successors, update their phi nodes by replacing
3254 // the operands corresponding to dead blocks with UndefVal.
3255 for (BasicBlock *B : DF) {
3256 if (DeadBlocks.count(B))
3257 continue;
3258
3259 // First, split the critical edges. This might also create additional blocks
3260 // to preserve LoopSimplify form and adjust edges accordingly.
3261 SmallVector<BasicBlock *, 4> Preds(predecessors(B));
3262 for (BasicBlock *P : Preds) {
3263 if (!DeadBlocks.count(P))
3264 continue;
3265
3266 if (llvm::is_contained(successors(P), B) &&
3267 isCriticalEdge(P->getTerminator(), B)) {
3268 if (BasicBlock *S = splitCriticalEdges(P, B))
3269 DeadBlocks.insert(P = S);
3270 }
3271 }
3272
3273 // Now poison the incoming values from the dead predecessors.
3274 for (BasicBlock *P : predecessors(B)) {
3275 if (!DeadBlocks.count(P))
3276 continue;
3277 for (PHINode &Phi : B->phis()) {
3278 Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType()));
3279 if (MD)
3280 MD->invalidateCachedPointerInfo(&Phi);
3281 }
3282 }
3283 }
3284 }
3285
3286 // If the given branch is recognized as a foldable branch (i.e. conditional
3287 // branch with constant condition), it will perform following analyses and
3288 // transformation.
3289 // 1) If the dead out-coming edge is a critical-edge, split it. Let
3290 // R be the target of the dead out-coming edge.
3291 // 1) Identify the set of dead blocks implied by the branch's dead outcoming
3292 // edge. The result of this step will be {X| X is dominated by R}
3293 // 2) Identify those blocks which haves at least one dead predecessor. The
3294 // result of this step will be dominance-frontier(R).
3295 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3296 // dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3297 //
3298 // Return true iff *NEW* dead code are found.
processFoldableCondBr(BranchInst * BI)3299 bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3300 if (!BI || BI->isUnconditional())
3301 return false;
3302
3303 // If a branch has two identical successors, we cannot declare either dead.
3304 if (BI->getSuccessor(0) == BI->getSuccessor(1))
3305 return false;
3306
3307 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
3308 if (!Cond)
3309 return false;
3310
3311 BasicBlock *DeadRoot =
3312 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0);
3313 if (DeadBlocks.count(DeadRoot))
3314 return false;
3315
3316 if (!DeadRoot->getSinglePredecessor())
3317 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
3318
3319 addDeadBlock(DeadRoot);
3320 return true;
3321 }
3322
3323 // performPRE() will trigger assert if it comes across an instruction without
3324 // associated val-num. As it normally has far more live instructions than dead
3325 // instructions, it makes more sense just to "fabricate" a val-number for the
3326 // dead code than checking if instruction involved is dead or not.
assignValNumForDeadCode()3327 void GVNPass::assignValNumForDeadCode() {
3328 for (BasicBlock *BB : DeadBlocks) {
3329 for (Instruction &Inst : *BB) {
3330 unsigned ValNum = VN.lookupOrAdd(&Inst);
3331 LeaderTable.insert(ValNum, &Inst, BB);
3332 }
3333 }
3334 }
3335
3336 class llvm::gvn::GVNLegacyPass : public FunctionPass {
3337 public:
3338 static char ID; // Pass identification, replacement for typeid
3339
GVNLegacyPass(bool NoMemDepAnalysis=!GVNEnableMemDep)3340 explicit GVNLegacyPass(bool NoMemDepAnalysis = !GVNEnableMemDep)
3341 : FunctionPass(ID), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis)) {
3342 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3343 }
3344
runOnFunction(Function & F)3345 bool runOnFunction(Function &F) override {
3346 if (skipFunction(F))
3347 return false;
3348
3349 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3350 return Impl.runImpl(
3351 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3352 getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3353 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3354 getAnalysis<AAResultsWrapperPass>().getAAResults(),
3355 Impl.isMemDepEnabled()
3356 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3357 : nullptr,
3358 getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3359 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3360 MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3361 }
3362
getAnalysisUsage(AnalysisUsage & AU) const3363 void getAnalysisUsage(AnalysisUsage &AU) const override {
3364 AU.addRequired<AssumptionCacheTracker>();
3365 AU.addRequired<DominatorTreeWrapperPass>();
3366 AU.addRequired<TargetLibraryInfoWrapperPass>();
3367 AU.addRequired<LoopInfoWrapperPass>();
3368 if (Impl.isMemDepEnabled())
3369 AU.addRequired<MemoryDependenceWrapperPass>();
3370 AU.addRequired<AAResultsWrapperPass>();
3371 AU.addPreserved<DominatorTreeWrapperPass>();
3372 AU.addPreserved<GlobalsAAWrapperPass>();
3373 AU.addPreserved<TargetLibraryInfoWrapperPass>();
3374 AU.addPreserved<LoopInfoWrapperPass>();
3375 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3376 AU.addPreserved<MemorySSAWrapperPass>();
3377 }
3378
3379 private:
3380 GVNPass Impl;
3381 };
3382
3383 char GVNLegacyPass::ID = 0;
3384
3385 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)3386 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3387 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3388 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3389 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3390 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3391 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3392 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3393 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3394
3395 // The public interface to this file...
3396 FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) {
3397 return new GVNLegacyPass(NoMemDepAnalysis);
3398 }
3399