xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
18 ///                  \           /
19 ///            [ %e = phi i32 %a2, %c2 ]
20 ///            [ add i32 %e, 4         ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/Hashing.h"
40 #include "llvm/ADT/PostOrderIterator.h"
41 #include "llvm/ADT/STLExtras.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/Analysis/GlobalsModRef.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/CFG.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/PassManager.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/Use.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Support/Allocator.h"
58 #include "llvm/Support/ArrayRecycler.h"
59 #include "llvm/Support/AtomicOrdering.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar/GVN.h"
65 #include "llvm/Transforms/Scalar/GVNExpression.h"
66 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
67 #include "llvm/Transforms/Utils/Local.h"
68 #include "llvm/Transforms/Utils/LockstepReverseIterator.h"
69 #include <cassert>
70 #include <cstddef>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "gvn-sink"
78 
79 STATISTIC(NumRemoved, "Number of instructions removed");
80 
81 namespace llvm {
82 namespace GVNExpression {
83 
dump() const84 LLVM_DUMP_METHOD void Expression::dump() const {
85   print(dbgs());
86   dbgs() << "\n";
87 }
88 
89 } // end namespace GVNExpression
90 } // end namespace llvm
91 
92 namespace {
93 
isMemoryInst(const Instruction * I)94 static bool isMemoryInst(const Instruction *I) {
95   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
96          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
97          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
98 }
99 
100 //===----------------------------------------------------------------------===//
101 
102 /// Candidate solution for sinking. There may be different ways to
103 /// sink instructions, differing in the number of instructions sunk,
104 /// the number of predecessors sunk from and the number of PHIs
105 /// required.
106 struct SinkingInstructionCandidate {
107   unsigned NumBlocks;
108   unsigned NumInstructions;
109   unsigned NumPHIs;
110   unsigned NumMemoryInsts;
111   int Cost = -1;
112   SmallVector<BasicBlock *, 4> Blocks;
113 
calculateCost__anonac6bc9b30111::SinkingInstructionCandidate114   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
115     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
116     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
117     Cost = (NumInstructions * (NumBlocks - 1)) -
118            (NumExtraPHIs *
119             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
120            - SplitEdgeCost;
121   }
122 
operator >__anonac6bc9b30111::SinkingInstructionCandidate123   bool operator>(const SinkingInstructionCandidate &Other) const {
124     return Cost > Other.Cost;
125   }
126 };
127 
128 #ifndef NDEBUG
operator <<(raw_ostream & OS,const SinkingInstructionCandidate & C)129 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
130   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
131      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
132   return OS;
133 }
134 #endif
135 
136 //===----------------------------------------------------------------------===//
137 
138 /// Describes a PHI node that may or may not exist. These track the PHIs
139 /// that must be created if we sunk a sequence of instructions. It provides
140 /// a hash function for efficient equality comparisons.
141 class ModelledPHI {
142   SmallVector<Value *, 4> Values;
143   SmallVector<BasicBlock *, 4> Blocks;
144 
145 public:
146   ModelledPHI() = default;
147 
ModelledPHI(const PHINode * PN,const DenseMap<const BasicBlock *,unsigned> & BlockOrder)148   ModelledPHI(const PHINode *PN,
149               const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
150     // BasicBlock comes first so we sort by basic block pointer order,
151     // then by value pointer order. No need to call `verifyModelledPHI`
152     // As the Values and Blocks are populated in a deterministic order.
153     using OpsType = std::pair<BasicBlock *, Value *>;
154     SmallVector<OpsType, 4> Ops;
155     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
156       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
157 
158     auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
159       return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);
160     };
161     // Sort in a deterministic order.
162     llvm::sort(Ops, ComesBefore);
163 
164     for (auto &P : Ops) {
165       Blocks.push_back(P.first);
166       Values.push_back(P.second);
167     }
168   }
169 
170   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
171   /// without the same ID.
172   /// \note This is specifically for DenseMapInfo - do not use this!
createDummy(size_t ID)173   static ModelledPHI createDummy(size_t ID) {
174     ModelledPHI M;
175     M.Values.push_back(reinterpret_cast<Value*>(ID));
176     return M;
177   }
178 
179   void
verifyModelledPHI(const DenseMap<const BasicBlock *,unsigned> & BlockOrder)180   verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
181     assert(Values.size() > 1 && Blocks.size() > 1 &&
182            "Modelling PHI with less than 2 values");
183     auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
184                                     const BasicBlock *BB2) {
185       return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);
186     };
187     assert(llvm::is_sorted(Blocks, ComesBefore));
188     int C = 0;
189     for (const Value *V : Values) {
190       if (!isa<UndefValue>(V)) {
191         assert(cast<Instruction>(V)->getParent() == Blocks[C]);
192         (void)C;
193       }
194       C++;
195     }
196   }
197   /// Create a PHI from an array of incoming values and incoming blocks.
ModelledPHI(SmallVectorImpl<Instruction * > & V,SmallSetVector<BasicBlock *,4> & B,const DenseMap<const BasicBlock *,unsigned> & BlockOrder)198   ModelledPHI(SmallVectorImpl<Instruction *> &V,
199               SmallSetVector<BasicBlock *, 4> &B,
200               const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
201     // The order of Values and Blocks are already ordered by the caller.
202     llvm::append_range(Values, V);
203     llvm::append_range(Blocks, B);
204     verifyModelledPHI(BlockOrder);
205   }
206 
207   /// Create a PHI from [I[OpNum] for I in Insts].
208   /// TODO: Figure out a way to verifyModelledPHI in this constructor.
ModelledPHI(ArrayRef<Instruction * > Insts,unsigned OpNum,SmallSetVector<BasicBlock *,4> & B)209   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
210               SmallSetVector<BasicBlock *, 4> &B) {
211     llvm::append_range(Blocks, B);
212     for (auto *I : Insts)
213       Values.push_back(I->getOperand(OpNum));
214   }
215 
216   /// Restrict the PHI's contents down to only \c NewBlocks.
217   /// \c NewBlocks must be a subset of \c this->Blocks.
restrictToBlocks(const SmallSetVector<BasicBlock *,4> & NewBlocks)218   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
219     auto BI = Blocks.begin();
220     auto VI = Values.begin();
221     while (BI != Blocks.end()) {
222       assert(VI != Values.end());
223       if (!NewBlocks.contains(*BI)) {
224         BI = Blocks.erase(BI);
225         VI = Values.erase(VI);
226       } else {
227         ++BI;
228         ++VI;
229       }
230     }
231     assert(Blocks.size() == NewBlocks.size());
232   }
233 
getValues() const234   ArrayRef<Value *> getValues() const { return Values; }
235 
areAllIncomingValuesSame() const236   bool areAllIncomingValuesSame() const {
237     return llvm::all_equal(Values);
238   }
239 
areAllIncomingValuesSameType() const240   bool areAllIncomingValuesSameType() const {
241     return llvm::all_of(
242         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
243   }
244 
areAnyIncomingValuesConstant() const245   bool areAnyIncomingValuesConstant() const {
246     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
247   }
248 
249   // Hash functor
hash() const250   unsigned hash() const {
251     // Is deterministic because Values are saved in a specific order.
252     return (unsigned)hash_combine_range(Values);
253   }
254 
operator ==(const ModelledPHI & Other) const255   bool operator==(const ModelledPHI &Other) const {
256     return Values == Other.Values && Blocks == Other.Blocks;
257   }
258 };
259 
260 template <typename ModelledPHI> struct DenseMapInfo {
getEmptyKey__anonac6bc9b30111::DenseMapInfo261   static inline ModelledPHI &getEmptyKey() {
262     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
263     return Dummy;
264   }
265 
getTombstoneKey__anonac6bc9b30111::DenseMapInfo266   static inline ModelledPHI &getTombstoneKey() {
267     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
268     return Dummy;
269   }
270 
getHashValue__anonac6bc9b30111::DenseMapInfo271   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
272 
isEqual__anonac6bc9b30111::DenseMapInfo273   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
274     return LHS == RHS;
275   }
276 };
277 
278 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
279 
280 //===----------------------------------------------------------------------===//
281 //                             ValueTable
282 //===----------------------------------------------------------------------===//
283 // This is a value number table where the value number is a function of the
284 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
285 // that the program would be equivalent if we replaced A with PHI(A, B).
286 //===----------------------------------------------------------------------===//
287 
288 /// A GVN expression describing how an instruction is used. The operands
289 /// field of BasicExpression is used to store uses, not operands.
290 ///
291 /// This class also contains fields for discriminators used when determining
292 /// equivalence of instructions with sideeffects.
293 class InstructionUseExpr : public GVNExpression::BasicExpression {
294   unsigned MemoryUseOrder = -1;
295   bool Volatile = false;
296   ArrayRef<int> ShuffleMask;
297 
298 public:
InstructionUseExpr(Instruction * I,ArrayRecycler<Value * > & R,BumpPtrAllocator & A)299   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
300                      BumpPtrAllocator &A)
301       : GVNExpression::BasicExpression(I->getNumUses()) {
302     allocateOperands(R, A);
303     setOpcode(I->getOpcode());
304     setType(I->getType());
305 
306     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
307       ShuffleMask = SVI->getShuffleMask().copy(A);
308 
309     for (auto &U : I->uses())
310       op_push_back(U.getUser());
311     llvm::sort(op_begin(), op_end());
312   }
313 
setMemoryUseOrder(unsigned MUO)314   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
setVolatile(bool V)315   void setVolatile(bool V) { Volatile = V; }
316 
getHashValue() const317   hash_code getHashValue() const override {
318     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
319                         MemoryUseOrder, Volatile, ShuffleMask);
320   }
321 
getHashValue(Function MapFn)322   template <typename Function> hash_code getHashValue(Function MapFn) {
323     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
324                                ShuffleMask);
325     for (auto *V : operands())
326       H = hash_combine(H, MapFn(V));
327     return H;
328   }
329 };
330 
331 using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
332 
333 class ValueTable {
334   DenseMap<Value *, uint32_t> ValueNumbering;
335   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
336   DenseMap<size_t, uint32_t> HashNumbering;
337   BumpPtrAllocator Allocator;
338   ArrayRecycler<Value *> Recycler;
339   uint32_t nextValueNumber = 1;
340   BasicBlocksSet ReachableBBs;
341 
342   /// Create an expression for I based on its opcode and its uses. If I
343   /// touches or reads memory, the expression is also based upon its memory
344   /// order - see \c getMemoryUseOrder().
createExpr(Instruction * I)345   InstructionUseExpr *createExpr(Instruction *I) {
346     InstructionUseExpr *E =
347         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
348     if (isMemoryInst(I))
349       E->setMemoryUseOrder(getMemoryUseOrder(I));
350 
351     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
352       CmpInst::Predicate Predicate = C->getPredicate();
353       E->setOpcode((C->getOpcode() << 8) | Predicate);
354     }
355     return E;
356   }
357 
358   /// Helper to compute the value number for a memory instruction
359   /// (LoadInst/StoreInst), including checking the memory ordering and
360   /// volatility.
createMemoryExpr(Inst * I)361   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
362     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
363       return nullptr;
364     InstructionUseExpr *E = createExpr(I);
365     E->setVolatile(I->isVolatile());
366     return E;
367   }
368 
369 public:
370   ValueTable() = default;
371 
372   /// Set basic blocks reachable from entry block.
setReachableBBs(const BasicBlocksSet & ReachableBBs)373   void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
374     this->ReachableBBs = ReachableBBs;
375   }
376 
377   /// Returns the value number for the specified value, assigning
378   /// it a new number if it did not have one before.
lookupOrAdd(Value * V)379   uint32_t lookupOrAdd(Value *V) {
380     auto VI = ValueNumbering.find(V);
381     if (VI != ValueNumbering.end())
382       return VI->second;
383 
384     if (!isa<Instruction>(V)) {
385       ValueNumbering[V] = nextValueNumber;
386       return nextValueNumber++;
387     }
388 
389     Instruction *I = cast<Instruction>(V);
390     if (!ReachableBBs.contains(I->getParent()))
391       return ~0U;
392 
393     InstructionUseExpr *exp = nullptr;
394     switch (I->getOpcode()) {
395     case Instruction::Load:
396       exp = createMemoryExpr(cast<LoadInst>(I));
397       break;
398     case Instruction::Store:
399       exp = createMemoryExpr(cast<StoreInst>(I));
400       break;
401     case Instruction::Call:
402     case Instruction::Invoke:
403     case Instruction::FNeg:
404     case Instruction::Add:
405     case Instruction::FAdd:
406     case Instruction::Sub:
407     case Instruction::FSub:
408     case Instruction::Mul:
409     case Instruction::FMul:
410     case Instruction::UDiv:
411     case Instruction::SDiv:
412     case Instruction::FDiv:
413     case Instruction::URem:
414     case Instruction::SRem:
415     case Instruction::FRem:
416     case Instruction::Shl:
417     case Instruction::LShr:
418     case Instruction::AShr:
419     case Instruction::And:
420     case Instruction::Or:
421     case Instruction::Xor:
422     case Instruction::ICmp:
423     case Instruction::FCmp:
424     case Instruction::Trunc:
425     case Instruction::ZExt:
426     case Instruction::SExt:
427     case Instruction::FPToUI:
428     case Instruction::FPToSI:
429     case Instruction::UIToFP:
430     case Instruction::SIToFP:
431     case Instruction::FPTrunc:
432     case Instruction::FPExt:
433     case Instruction::PtrToInt:
434     case Instruction::IntToPtr:
435     case Instruction::BitCast:
436     case Instruction::AddrSpaceCast:
437     case Instruction::Select:
438     case Instruction::ExtractElement:
439     case Instruction::InsertElement:
440     case Instruction::ShuffleVector:
441     case Instruction::InsertValue:
442     case Instruction::GetElementPtr:
443       exp = createExpr(I);
444       break;
445     default:
446       break;
447     }
448 
449     if (!exp) {
450       ValueNumbering[V] = nextValueNumber;
451       return nextValueNumber++;
452     }
453 
454     uint32_t e = ExpressionNumbering[exp];
455     if (!e) {
456       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
457       auto [I, Inserted] = HashNumbering.try_emplace(H, nextValueNumber);
458       e = I->second;
459       if (Inserted)
460         ExpressionNumbering[exp] = nextValueNumber++;
461     }
462     ValueNumbering[V] = e;
463     return e;
464   }
465 
466   /// Returns the value number of the specified value. Fails if the value has
467   /// not yet been numbered.
lookup(Value * V) const468   uint32_t lookup(Value *V) const {
469     auto VI = ValueNumbering.find(V);
470     assert(VI != ValueNumbering.end() && "Value not numbered?");
471     return VI->second;
472   }
473 
474   /// Removes all value numberings and resets the value table.
clear()475   void clear() {
476     ValueNumbering.clear();
477     ExpressionNumbering.clear();
478     HashNumbering.clear();
479     Recycler.clear(Allocator);
480     nextValueNumber = 1;
481   }
482 
483   /// \c Inst uses or touches memory. Return an ID describing the memory state
484   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
485   /// the exact same memory operations happen after I1 and I2.
486   ///
487   /// This is a very hard problem in general, so we use domain-specific
488   /// knowledge that we only ever check for equivalence between blocks sharing a
489   /// single immediate successor that is common, and when determining if I1 ==
490   /// I2 we will have already determined that next(I1) == next(I2). This
491   /// inductive property allows us to simply return the value number of the next
492   /// instruction that defines memory.
getMemoryUseOrder(Instruction * Inst)493   uint32_t getMemoryUseOrder(Instruction *Inst) {
494     auto *BB = Inst->getParent();
495     for (auto I = std::next(Inst->getIterator()), E = BB->end();
496          I != E && !I->isTerminator(); ++I) {
497       if (!isMemoryInst(&*I))
498         continue;
499       if (isa<LoadInst>(&*I))
500         continue;
501       CallInst *CI = dyn_cast<CallInst>(&*I);
502       if (CI && CI->onlyReadsMemory())
503         continue;
504       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
505       if (II && II->onlyReadsMemory())
506         continue;
507       return lookupOrAdd(&*I);
508     }
509     return 0;
510   }
511 };
512 
513 //===----------------------------------------------------------------------===//
514 
515 class GVNSink {
516 public:
GVNSink()517   GVNSink() {}
518 
run(Function & F)519   bool run(Function &F) {
520     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
521                       << "\n");
522 
523     unsigned NumSunk = 0;
524     ReversePostOrderTraversal<Function*> RPOT(&F);
525     VN.setReachableBBs(BasicBlocksSet(llvm::from_range, RPOT));
526     // Populate reverse post-order to order basic blocks in deterministic
527     // order. Any arbitrary ordering will work in this case as long as they are
528     // deterministic. The node ordering of newly created basic blocks
529     // are irrelevant because RPOT(for computing sinkable candidates) is also
530     // obtained ahead of time and only their order are relevant for this pass.
531     unsigned NodeOrdering = 0;
532     RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
533     for (auto *BB : RPOT)
534       if (!pred_empty(BB))
535         RPOTOrder[BB] = ++NodeOrdering;
536     for (auto *N : RPOT)
537       NumSunk += sinkBB(N);
538 
539     return NumSunk > 0;
540   }
541 
542 private:
543   ValueTable VN;
544   DenseMap<const BasicBlock *, unsigned> RPOTOrder;
545 
shouldAvoidSinkingInstruction(Instruction * I)546   bool shouldAvoidSinkingInstruction(Instruction *I) {
547     // These instructions may change or break semantics if moved.
548     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
549         I->getType()->isTokenTy())
550       return true;
551     return false;
552   }
553 
554   /// The main heuristic function. Analyze the set of instructions pointed to by
555   /// LRI and return a candidate solution if these instructions can be sunk, or
556   /// std::nullopt otherwise.
557   std::optional<SinkingInstructionCandidate>
558   analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
559                                unsigned &InstNum, unsigned &MemoryInstNum,
560                                ModelledPHISet &NeededPHIs,
561                                SmallPtrSetImpl<Value *> &PHIContents);
562 
563   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
analyzeInitialPHIs(BasicBlock * BB,ModelledPHISet & PHIs,SmallPtrSetImpl<Value * > & PHIContents)564   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
565                           SmallPtrSetImpl<Value *> &PHIContents) {
566     for (PHINode &PN : BB->phis()) {
567       auto MPHI = ModelledPHI(&PN, RPOTOrder);
568       PHIs.insert(MPHI);
569       PHIContents.insert_range(MPHI.getValues());
570     }
571   }
572 
573   /// The main instruction sinking driver. Set up state and try and sink
574   /// instructions into BBEnd from its predecessors.
575   unsigned sinkBB(BasicBlock *BBEnd);
576 
577   /// Perform the actual mechanics of sinking an instruction from Blocks into
578   /// BBEnd, which is their only successor.
579   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
580 
581   /// Remove PHIs that all have the same incoming value.
foldPointlessPHINodes(BasicBlock * BB)582   void foldPointlessPHINodes(BasicBlock *BB) {
583     auto I = BB->begin();
584     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
585       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
586             return V == PN->getIncomingValue(0);
587           }))
588         continue;
589       if (PN->getIncomingValue(0) != PN)
590         PN->replaceAllUsesWith(PN->getIncomingValue(0));
591       else
592         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
593       PN->eraseFromParent();
594     }
595   }
596 };
597 
598 std::optional<SinkingInstructionCandidate>
analyzeInstructionForSinking(LockstepReverseIterator<false> & LRI,unsigned & InstNum,unsigned & MemoryInstNum,ModelledPHISet & NeededPHIs,SmallPtrSetImpl<Value * > & PHIContents)599 GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
600                                       unsigned &InstNum,
601                                       unsigned &MemoryInstNum,
602                                       ModelledPHISet &NeededPHIs,
603                                       SmallPtrSetImpl<Value *> &PHIContents) {
604   auto Insts = *LRI;
605   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
606                                                                   : Insts) {
607     I->dump();
608   } dbgs() << " ]\n";);
609 
610   DenseMap<uint32_t, unsigned> VNums;
611   for (auto *I : Insts) {
612     uint32_t N = VN.lookupOrAdd(I);
613     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
614     if (N == ~0U)
615       return std::nullopt;
616     VNums[N]++;
617   }
618   unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;
619 
620   if (VNums[VNumToSink] == 1)
621     // Can't sink anything!
622     return std::nullopt;
623 
624   // Now restrict the number of incoming blocks down to only those with
625   // VNumToSink.
626   auto &ActivePreds = LRI.getActiveBlocks();
627   unsigned InitialActivePredSize = ActivePreds.size();
628   SmallVector<Instruction *, 4> NewInsts;
629   for (auto *I : Insts) {
630     if (VN.lookup(I) != VNumToSink)
631       ActivePreds.remove(I->getParent());
632     else
633       NewInsts.push_back(I);
634   }
635   for (auto *I : NewInsts)
636     if (shouldAvoidSinkingInstruction(I))
637       return std::nullopt;
638 
639   // If we've restricted the incoming blocks, restrict all needed PHIs also
640   // to that set.
641   bool RecomputePHIContents = false;
642   if (ActivePreds.size() != InitialActivePredSize) {
643     ModelledPHISet NewNeededPHIs;
644     for (auto P : NeededPHIs) {
645       P.restrictToBlocks(ActivePreds);
646       NewNeededPHIs.insert(P);
647     }
648     NeededPHIs = NewNeededPHIs;
649     LRI.restrictToBlocks(ActivePreds);
650     RecomputePHIContents = true;
651   }
652 
653   // The sunk instruction's results.
654   ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
655 
656   // Does sinking this instruction render previous PHIs redundant?
657   if (NeededPHIs.erase(NewPHI))
658     RecomputePHIContents = true;
659 
660   if (RecomputePHIContents) {
661     // The needed PHIs have changed, so recompute the set of all needed
662     // values.
663     PHIContents.clear();
664     for (auto &PHI : NeededPHIs)
665       PHIContents.insert_range(PHI.getValues());
666   }
667 
668   // Is this instruction required by a later PHI that doesn't match this PHI?
669   // if so, we can't sink this instruction.
670   for (auto *V : NewPHI.getValues())
671     if (PHIContents.count(V))
672       // V exists in this PHI, but the whole PHI is different to NewPHI
673       // (else it would have been removed earlier). We cannot continue
674       // because this isn't representable.
675       return std::nullopt;
676 
677   // Which operands need PHIs?
678   // FIXME: If any of these fail, we should partition up the candidates to
679   // try and continue making progress.
680   Instruction *I0 = NewInsts[0];
681 
682   auto isNotSameOperation = [&I0](Instruction *I) {
683     return !I0->isSameOperationAs(I);
684   };
685 
686   if (any_of(NewInsts, isNotSameOperation))
687     return std::nullopt;
688 
689   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
690     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
691     if (PHI.areAllIncomingValuesSame())
692       continue;
693     if (!canReplaceOperandWithVariable(I0, OpNum))
694       // We can 't create a PHI from this instruction!
695       return std::nullopt;
696     if (NeededPHIs.count(PHI))
697       continue;
698     if (!PHI.areAllIncomingValuesSameType())
699       return std::nullopt;
700     // Don't create indirect calls! The called value is the final operand.
701     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
702         PHI.areAnyIncomingValuesConstant())
703       return std::nullopt;
704 
705     NeededPHIs.reserve(NeededPHIs.size());
706     NeededPHIs.insert(PHI);
707     PHIContents.insert_range(PHI.getValues());
708   }
709 
710   if (isMemoryInst(NewInsts[0]))
711     ++MemoryInstNum;
712 
713   SinkingInstructionCandidate Cand;
714   Cand.NumInstructions = ++InstNum;
715   Cand.NumMemoryInsts = MemoryInstNum;
716   Cand.NumBlocks = ActivePreds.size();
717   Cand.NumPHIs = NeededPHIs.size();
718   append_range(Cand.Blocks, ActivePreds);
719 
720   return Cand;
721 }
722 
sinkBB(BasicBlock * BBEnd)723 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
724   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
725              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
726   SmallVector<BasicBlock *, 4> Preds;
727   for (auto *B : predecessors(BBEnd)) {
728     // Bailout on basic blocks without predecessor(PR42346).
729     if (!RPOTOrder.count(B))
730       return 0;
731     auto *T = B->getTerminator();
732     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
733       Preds.push_back(B);
734     else
735       return 0;
736   }
737   if (Preds.size() < 2)
738     return 0;
739   auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
740     return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);
741   };
742   // Sort in a deterministic order.
743   llvm::sort(Preds, ComesBefore);
744 
745   unsigned NumOrigPreds = Preds.size();
746   // We can only sink instructions through unconditional branches.
747   llvm::erase_if(Preds, [](BasicBlock *BB) {
748     return BB->getTerminator()->getNumSuccessors() != 1;
749   });
750 
751   LockstepReverseIterator<false> LRI(Preds);
752   SmallVector<SinkingInstructionCandidate, 4> Candidates;
753   unsigned InstNum = 0, MemoryInstNum = 0;
754   ModelledPHISet NeededPHIs;
755   SmallPtrSet<Value *, 4> PHIContents;
756   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
757   unsigned NumOrigPHIs = NeededPHIs.size();
758 
759   while (LRI.isValid()) {
760     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
761                                              NeededPHIs, PHIContents);
762     if (!Cand)
763       break;
764     Cand->calculateCost(NumOrigPHIs, Preds.size());
765     Candidates.emplace_back(*Cand);
766     --LRI;
767   }
768 
769   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
770   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
771                                                          : Candidates) dbgs()
772                                                     << "  " << C << "\n";);
773 
774   // Pick the top candidate, as long it is positive!
775   if (Candidates.empty() || Candidates.front().Cost <= 0)
776     return 0;
777   auto C = Candidates.front();
778 
779   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
780   BasicBlock *InsertBB = BBEnd;
781   if (C.Blocks.size() < NumOrigPreds) {
782     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
783                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
784     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
785     if (!InsertBB) {
786       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
787       // Edge couldn't be split.
788       return 0;
789     }
790   }
791 
792   for (unsigned I = 0; I < C.NumInstructions; ++I)
793     sinkLastInstruction(C.Blocks, InsertBB);
794 
795   return C.NumInstructions;
796 }
797 
sinkLastInstruction(ArrayRef<BasicBlock * > Blocks,BasicBlock * BBEnd)798 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
799                                   BasicBlock *BBEnd) {
800   SmallVector<Instruction *, 4> Insts;
801   for (BasicBlock *BB : Blocks)
802     Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
803   Instruction *I0 = Insts.front();
804 
805   SmallVector<Value *, 4> NewOperands;
806   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
807     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
808       return I->getOperand(O) != I0->getOperand(O);
809     });
810     if (!NeedPHI) {
811       NewOperands.push_back(I0->getOperand(O));
812       continue;
813     }
814 
815     // Create a new PHI in the successor block and populate it.
816     auto *Op = I0->getOperand(O);
817     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
818     auto *PN =
819         PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
820     PN->insertBefore(BBEnd->begin());
821     for (auto *I : Insts)
822       PN->addIncoming(I->getOperand(O), I->getParent());
823     NewOperands.push_back(PN);
824   }
825 
826   // Arbitrarily use I0 as the new "common" instruction; remap its operands
827   // and move it to the start of the successor block.
828   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
829     I0->getOperandUse(O).set(NewOperands[O]);
830   I0->moveBefore(BBEnd->getFirstInsertionPt());
831 
832   // Update metadata and IR flags.
833   for (auto *I : Insts)
834     if (I != I0) {
835       combineMetadataForCSE(I0, I, true);
836       I0->andIRFlags(I);
837     }
838 
839   for (auto *I : Insts)
840     if (I != I0) {
841       I->replaceAllUsesWith(I0);
842       I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
843     }
844   foldPointlessPHINodes(BBEnd);
845 
846   // Finally nuke all instructions apart from the common instruction.
847   for (auto *I : Insts)
848     if (I != I0)
849       I->eraseFromParent();
850 
851   NumRemoved += Insts.size() - 1;
852 }
853 
854 } // end anonymous namespace
855 
run(Function & F,FunctionAnalysisManager & AM)856 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
857   GVNSink G;
858   if (!G.run(F))
859     return PreservedAnalyses::all();
860 
861   return PreservedAnalyses::none();
862 }
863