xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp (revision f976241773df2260e6170317080761d1c5814fe5)
1 //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //! \file
10 //! This pass performs merges of loads and stores on both sides of a
11 //  diamond (hammock). It hoists the loads and sinks the stores.
12 //
13 // The algorithm iteratively hoists two loads to the same address out of a
14 // diamond (hammock) and merges them into a single load in the header. Similar
15 // it sinks and merges two stores to the tail block (footer). The algorithm
16 // iterates over the instructions of one side of the diamond and attempts to
17 // find a matching load/store on the other side. It hoists / sinks when it
18 // thinks it safe to do so.  This optimization helps with eg. hiding load
19 // latencies, triggering if-conversion, and reducing static code size.
20 //
21 // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
22 //
23 //===----------------------------------------------------------------------===//
24 //
25 //
26 // Example:
27 // Diamond shaped code before merge:
28 //
29 //            header:
30 //                     br %cond, label %if.then, label %if.else
31 //                        +                    +
32 //                       +                      +
33 //                      +                        +
34 //            if.then:                         if.else:
35 //               %lt = load %addr_l               %le = load %addr_l
36 //               <use %lt>                        <use %le>
37 //               <...>                            <...>
38 //               store %st, %addr_s               store %se, %addr_s
39 //               br label %if.end                 br label %if.end
40 //                     +                         +
41 //                      +                       +
42 //                       +                     +
43 //            if.end ("footer"):
44 //                     <...>
45 //
46 // Diamond shaped code after merge:
47 //
48 //            header:
49 //                     %l = load %addr_l
50 //                     br %cond, label %if.then, label %if.else
51 //                        +                    +
52 //                       +                      +
53 //                      +                        +
54 //            if.then:                         if.else:
55 //               <use %l>                         <use %l>
56 //               <...>                            <...>
57 //               br label %if.end                 br label %if.end
58 //                      +                        +
59 //                       +                      +
60 //                        +                    +
61 //            if.end ("footer"):
62 //                     %s.sink = phi [%st, if.then], [%se, if.else]
63 //                     <...>
64 //                     store %s.sink, %addr_s
65 //                     <...>
66 //
67 //
68 //===----------------------- TODO -----------------------------------------===//
69 //
70 // 1) Generalize to regions other than diamonds
71 // 2) Be more aggressive merging memory operations
72 // Note that both changes require register pressure control
73 //
74 //===----------------------------------------------------------------------===//
75 
76 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
77 #include "llvm/ADT/Statistic.h"
78 #include "llvm/Analysis/AliasAnalysis.h"
79 #include "llvm/Analysis/CFG.h"
80 #include "llvm/Analysis/GlobalsModRef.h"
81 #include "llvm/Analysis/Loads.h"
82 #include "llvm/Analysis/ValueTracking.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/raw_ostream.h"
86 #include "llvm/Transforms/Scalar.h"
87 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "mldst-motion"
92 
93 namespace {
94 //===----------------------------------------------------------------------===//
95 //                         MergedLoadStoreMotion Pass
96 //===----------------------------------------------------------------------===//
97 class MergedLoadStoreMotion {
98   AliasAnalysis *AA = nullptr;
99 
100   // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
101   // where Size0 and Size1 are the #instructions on the two sides of
102   // the diamond. The constant chosen here is arbitrary. Compiler Time
103   // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
104   const int MagicCompileTimeControl = 250;
105 
106 public:
107   bool run(Function &F, AliasAnalysis &AA);
108 
109 private:
110   BasicBlock *getDiamondTail(BasicBlock *BB);
111   bool isDiamondHead(BasicBlock *BB);
112   // Routines for sinking stores
113   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
114   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
115   bool isStoreSinkBarrierInRange(const Instruction &Start,
116                                  const Instruction &End, MemoryLocation Loc);
117   bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
118   bool mergeStores(BasicBlock *BB);
119 };
120 } // end anonymous namespace
121 
122 ///
123 /// Return tail block of a diamond.
124 ///
125 BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
126   assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
127   return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
128 }
129 
130 ///
131 /// True when BB is the head of a diamond (hammock)
132 ///
133 bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
134   if (!BB)
135     return false;
136   auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
137   if (!BI || !BI->isConditional())
138     return false;
139 
140   BasicBlock *Succ0 = BI->getSuccessor(0);
141   BasicBlock *Succ1 = BI->getSuccessor(1);
142 
143   if (!Succ0->getSinglePredecessor())
144     return false;
145   if (!Succ1->getSinglePredecessor())
146     return false;
147 
148   BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
149   BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
150   // Ignore triangles.
151   if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
152     return false;
153   return true;
154 }
155 
156 
157 ///
158 /// True when instruction is a sink barrier for a store
159 /// located in Loc
160 ///
161 /// Whenever an instruction could possibly read or modify the
162 /// value being stored or protect against the store from
163 /// happening it is considered a sink barrier.
164 ///
165 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
166                                                       const Instruction &End,
167                                                       MemoryLocation Loc) {
168   for (const Instruction &Inst :
169        make_range(Start.getIterator(), End.getIterator()))
170     if (Inst.mayThrow())
171       return true;
172   return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef);
173 }
174 
175 ///
176 /// Check if \p BB contains a store to the same address as \p SI
177 ///
178 /// \return The store in \p  when it is safe to sink. Otherwise return Null.
179 ///
180 StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
181                                                    StoreInst *Store0) {
182   LLVM_DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
183   BasicBlock *BB0 = Store0->getParent();
184   for (Instruction &Inst : reverse(*BB1)) {
185     auto *Store1 = dyn_cast<StoreInst>(&Inst);
186     if (!Store1)
187       continue;
188 
189     MemoryLocation Loc0 = MemoryLocation::get(Store0);
190     MemoryLocation Loc1 = MemoryLocation::get(Store1);
191     if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
192         !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
193         !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
194       return Store1;
195     }
196   }
197   return nullptr;
198 }
199 
200 ///
201 /// Create a PHI node in BB for the operands of S0 and S1
202 ///
203 PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
204                                               StoreInst *S1) {
205   // Create a phi if the values mismatch.
206   Value *Opd1 = S0->getValueOperand();
207   Value *Opd2 = S1->getValueOperand();
208   if (Opd1 == Opd2)
209     return nullptr;
210 
211   auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
212                                 &BB->front());
213   NewPN->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
214   NewPN->addIncoming(Opd1, S0->getParent());
215   NewPN->addIncoming(Opd2, S1->getParent());
216   return NewPN;
217 }
218 
219 ///
220 /// Merge two stores to same address and sink into \p BB
221 ///
222 /// Also sinks GEP instruction computing the store address
223 ///
224 bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
225                                       StoreInst *S1) {
226   // Only one definition?
227   auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
228   auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
229   if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
230       (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
231       (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
232     LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
233                dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
234                dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
235     // Hoist the instruction.
236     BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
237     // Intersect optional metadata.
238     S0->andIRFlags(S1);
239     S0->dropUnknownNonDebugMetadata();
240 
241     // Create the new store to be inserted at the join point.
242     StoreInst *SNew = cast<StoreInst>(S0->clone());
243     Instruction *ANew = A0->clone();
244     SNew->insertBefore(&*InsertPt);
245     ANew->insertBefore(SNew);
246 
247     assert(S0->getParent() == A0->getParent());
248     assert(S1->getParent() == A1->getParent());
249 
250     // New PHI operand? Use it.
251     if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
252       SNew->setOperand(0, NewPN);
253     S0->eraseFromParent();
254     S1->eraseFromParent();
255     A0->replaceAllUsesWith(ANew);
256     A0->eraseFromParent();
257     A1->replaceAllUsesWith(ANew);
258     A1->eraseFromParent();
259     return true;
260   }
261   return false;
262 }
263 
264 ///
265 /// True when two stores are equivalent and can sink into the footer
266 ///
267 /// Starting from a diamond tail block, iterate over the instructions in one
268 /// predecessor block and try to match a store in the second predecessor.
269 ///
270 bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
271 
272   bool MergedStores = false;
273   assert(T && "Footer of a diamond cannot be empty");
274 
275   pred_iterator PI = pred_begin(T), E = pred_end(T);
276   assert(PI != E);
277   BasicBlock *Pred0 = *PI;
278   ++PI;
279   BasicBlock *Pred1 = *PI;
280   ++PI;
281   // tail block  of a diamond/hammock?
282   if (Pred0 == Pred1)
283     return false; // No.
284   if (PI != E)
285     return false; // No. More than 2 predecessors.
286 
287   // #Instructions in Succ1 for Compile Time Control
288   auto InstsNoDbg = Pred1->instructionsWithoutDebug();
289   int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end());
290   int NStores = 0;
291 
292   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
293        RBI != RBE;) {
294 
295     Instruction *I = &*RBI;
296     ++RBI;
297 
298     // Don't sink non-simple (atomic, volatile) stores.
299     auto *S0 = dyn_cast<StoreInst>(I);
300     if (!S0 || !S0->isSimple())
301       continue;
302 
303     ++NStores;
304     if (NStores * Size1 >= MagicCompileTimeControl)
305       break;
306     if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
307       bool Res = sinkStore(T, S0, S1);
308       MergedStores |= Res;
309       // Don't attempt to sink below stores that had to stick around
310       // But after removal of a store and some of its feeding
311       // instruction search again from the beginning since the iterator
312       // is likely stale at this point.
313       if (!Res)
314         break;
315       RBI = Pred0->rbegin();
316       RBE = Pred0->rend();
317       LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
318     }
319   }
320   return MergedStores;
321 }
322 
323 bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) {
324   this->AA = &AA;
325 
326   bool Changed = false;
327   LLVM_DEBUG(dbgs() << "Instruction Merger\n");
328 
329   // Merge unconditional branches, allowing PRE to catch more
330   // optimization opportunities.
331   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
332     BasicBlock *BB = &*FI++;
333 
334     // Hoist equivalent loads and sink stores
335     // outside diamonds when possible
336     if (isDiamondHead(BB)) {
337       Changed |= mergeStores(getDiamondTail(BB));
338     }
339   }
340   return Changed;
341 }
342 
343 namespace {
344 class MergedLoadStoreMotionLegacyPass : public FunctionPass {
345 public:
346   static char ID; // Pass identification, replacement for typeid
347   MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) {
348     initializeMergedLoadStoreMotionLegacyPassPass(
349         *PassRegistry::getPassRegistry());
350   }
351 
352   ///
353   /// Run the transformation for each function
354   ///
355   bool runOnFunction(Function &F) override {
356     if (skipFunction(F))
357       return false;
358     MergedLoadStoreMotion Impl;
359     return Impl.run(F, getAnalysis<AAResultsWrapperPass>().getAAResults());
360   }
361 
362 private:
363   void getAnalysisUsage(AnalysisUsage &AU) const override {
364     AU.setPreservesCFG();
365     AU.addRequired<AAResultsWrapperPass>();
366     AU.addPreserved<GlobalsAAWrapperPass>();
367   }
368 };
369 
370 char MergedLoadStoreMotionLegacyPass::ID = 0;
371 } // anonymous namespace
372 
373 ///
374 /// createMergedLoadStoreMotionPass - The public interface to this file.
375 ///
376 FunctionPass *llvm::createMergedLoadStoreMotionPass() {
377   return new MergedLoadStoreMotionLegacyPass();
378 }
379 
380 INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion",
381                       "MergedLoadStoreMotion", false, false)
382 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
383 INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
384                     "MergedLoadStoreMotion", false, false)
385 
386 PreservedAnalyses
387 MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
388   MergedLoadStoreMotion Impl;
389   auto &AA = AM.getResult<AAManager>(F);
390   if (!Impl.run(F, AA))
391     return PreservedAnalyses::all();
392 
393   PreservedAnalyses PA;
394   PA.preserveSet<CFGAnalyses>();
395   PA.preserve<GlobalsAA>();
396   return PA;
397 }
398