xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
13 ///
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
17 ///
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
29 ///
30 ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31 ///     NumLoops * NumLoops)
32 ///
33 /// This pass is similar to what the Relooper [1] does. Both identify looping
34 /// code that requires multiple entries, and resolve it in a similar way (in
35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36 /// also that like the Relooper, we implement a "minimal" intervention: we only
37 /// use the "label" helper for the blocks we absolutely must and no others. We
38 /// also prioritize code size and do not duplicate code in order to resolve
39 /// irreducibility. The graph algorithms for finding loops and entries and so
40 /// forth are also similar to the Relooper. The main differences between this
41 /// pass and the Relooper are:
42 ///
43 ///  * We just care about irreducibility, so we just look at loops.
44 ///  * The Relooper emits structured control flow (with ifs etc.), while we
45 ///    emit a CFG.
46 ///
47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48 /// Proceedings of the ACM international conference companion on Object oriented
49 /// programming systems languages and applications companion (SPLASH '11). ACM,
50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51 /// http://doi.acm.org/10.1145/2048147.2048224
52 ///
53 //===----------------------------------------------------------------------===//
54 
55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56 #include "WebAssembly.h"
57 #include "WebAssemblySubtarget.h"
58 #include "llvm/CodeGen/MachineInstrBuilder.h"
59 using namespace llvm;
60 
61 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
62 
63 namespace {
64 
65 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
66 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
67 
68 // Calculates reachability in a region. Ignores branches to blocks outside of
69 // the region, and ignores branches to the region entry (for the case where
70 // the region is the inner part of a loop).
71 class ReachabilityGraph {
72 public:
73   ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
74       : Entry(Entry), Blocks(Blocks) {
75 #ifndef NDEBUG
76     // The region must have a single entry.
77     for (auto *MBB : Blocks) {
78       if (MBB != Entry) {
79         for (auto *Pred : MBB->predecessors()) {
80           assert(inRegion(Pred));
81         }
82       }
83     }
84 #endif
85     calculate();
86   }
87 
88   bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
89     assert(inRegion(From) && inRegion(To));
90     auto I = Reachable.find(From);
91     if (I == Reachable.end())
92       return false;
93     return I->second.count(To);
94   }
95 
96   // "Loopers" are blocks that are in a loop. We detect these by finding blocks
97   // that can reach themselves.
98   const BlockSet &getLoopers() const { return Loopers; }
99 
100   // Get all blocks that are loop entries.
101   const BlockSet &getLoopEntries() const { return LoopEntries; }
102 
103   // Get all blocks that enter a particular loop from outside.
104   const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
105     assert(inRegion(LoopEntry));
106     auto I = LoopEnterers.find(LoopEntry);
107     assert(I != LoopEnterers.end());
108     return I->second;
109   }
110 
111 private:
112   MachineBasicBlock *Entry;
113   const BlockSet &Blocks;
114 
115   BlockSet Loopers, LoopEntries;
116   DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
117 
118   bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
119 
120   // Maps a block to all the other blocks it can reach.
121   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
122 
123   void calculate() {
124     // Reachability computation work list. Contains pairs of recent additions
125     // (A, B) where we just added a link A => B.
126     using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
127     SmallVector<BlockPair, 4> WorkList;
128 
129     // Add all relevant direct branches.
130     for (auto *MBB : Blocks) {
131       for (auto *Succ : MBB->successors()) {
132         if (Succ != Entry && inRegion(Succ)) {
133           Reachable[MBB].insert(Succ);
134           WorkList.emplace_back(MBB, Succ);
135         }
136       }
137     }
138 
139     while (!WorkList.empty()) {
140       MachineBasicBlock *MBB, *Succ;
141       std::tie(MBB, Succ) = WorkList.pop_back_val();
142       assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
143       if (MBB != Entry) {
144         // We recently added MBB => Succ, and that means we may have enabled
145         // Pred => MBB => Succ.
146         for (auto *Pred : MBB->predecessors()) {
147           if (Reachable[Pred].insert(Succ).second) {
148             WorkList.emplace_back(Pred, Succ);
149           }
150         }
151       }
152     }
153 
154     // Blocks that can return to themselves are in a loop.
155     for (auto *MBB : Blocks) {
156       if (canReach(MBB, MBB)) {
157         Loopers.insert(MBB);
158       }
159     }
160     assert(!Loopers.count(Entry));
161 
162     // Find the loop entries - loopers reachable from blocks not in that loop -
163     // and those outside blocks that reach them, the "loop enterers".
164     for (auto *Looper : Loopers) {
165       for (auto *Pred : Looper->predecessors()) {
166         // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
167         // otherwise, it is a block that enters into the loop.
168         if (!canReach(Looper, Pred)) {
169           LoopEntries.insert(Looper);
170           LoopEnterers[Looper].insert(Pred);
171         }
172       }
173     }
174   }
175 };
176 
177 // Finds the blocks in a single-entry loop, given the loop entry and the
178 // list of blocks that enter the loop.
179 class LoopBlocks {
180 public:
181   LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
182       : Entry(Entry), Enterers(Enterers) {
183     calculate();
184   }
185 
186   BlockSet &getBlocks() { return Blocks; }
187 
188 private:
189   MachineBasicBlock *Entry;
190   const BlockSet &Enterers;
191 
192   BlockSet Blocks;
193 
194   void calculate() {
195     // Going backwards from the loop entry, if we ignore the blocks entering
196     // from outside, we will traverse all the blocks in the loop.
197     BlockVector WorkList;
198     BlockSet AddedToWorkList;
199     Blocks.insert(Entry);
200     for (auto *Pred : Entry->predecessors()) {
201       if (!Enterers.count(Pred)) {
202         WorkList.push_back(Pred);
203         AddedToWorkList.insert(Pred);
204       }
205     }
206 
207     while (!WorkList.empty()) {
208       auto *MBB = WorkList.pop_back_val();
209       assert(!Enterers.count(MBB));
210       if (Blocks.insert(MBB).second) {
211         for (auto *Pred : MBB->predecessors()) {
212           if (!AddedToWorkList.count(Pred)) {
213             WorkList.push_back(Pred);
214             AddedToWorkList.insert(Pred);
215           }
216         }
217       }
218     }
219   }
220 };
221 
222 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
223   StringRef getPassName() const override {
224     return "WebAssembly Fix Irreducible Control Flow";
225   }
226 
227   bool runOnMachineFunction(MachineFunction &MF) override;
228 
229   bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
230                      MachineFunction &MF);
231 
232   void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
233                            MachineFunction &MF, const ReachabilityGraph &Graph);
234 
235 public:
236   static char ID; // Pass identification, replacement for typeid
237   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
238 };
239 
240 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
241     MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
242   bool Changed = false;
243 
244   // Remove irreducibility before processing child loops, which may take
245   // multiple iterations.
246   while (true) {
247     ReachabilityGraph Graph(Entry, Blocks);
248 
249     bool FoundIrreducibility = false;
250 
251     for (auto *LoopEntry : Graph.getLoopEntries()) {
252       // Find mutual entries - all entries which can reach this one, and
253       // are reached by it (that always includes LoopEntry itself). All mutual
254       // entries must be in the same loop, so if we have more than one, then we
255       // have irreducible control flow.
256       //
257       // Note that irreducibility may involve inner loops, e.g. imagine A
258       // starts one loop, and it has B inside it which starts an inner loop.
259       // If we add a branch from all the way on the outside to B, then in a
260       // sense B is no longer an "inner" loop, semantically speaking. We will
261       // fix that irreducibility by adding a block that dispatches to either
262       // either A or B, so B will no longer be an inner loop in our output.
263       // (A fancier approach might try to keep it as such.)
264       //
265       // Note that we still need to recurse into inner loops later, to handle
266       // the case where the irreducibility is entirely nested - we would not
267       // be able to identify that at this point, since the enclosing loop is
268       // a group of blocks all of whom can reach each other. (We'll see the
269       // irreducibility after removing branches to the top of that enclosing
270       // loop.)
271       BlockSet MutualLoopEntries;
272       MutualLoopEntries.insert(LoopEntry);
273       for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
274         if (OtherLoopEntry != LoopEntry &&
275             Graph.canReach(LoopEntry, OtherLoopEntry) &&
276             Graph.canReach(OtherLoopEntry, LoopEntry)) {
277           MutualLoopEntries.insert(OtherLoopEntry);
278         }
279       }
280 
281       if (MutualLoopEntries.size() > 1) {
282         makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
283         FoundIrreducibility = true;
284         Changed = true;
285         break;
286       }
287     }
288     // Only go on to actually process the inner loops when we are done
289     // removing irreducible control flow and changing the graph. Modifying
290     // the graph as we go is possible, and that might let us avoid looking at
291     // the already-fixed loops again if we are careful, but all that is
292     // complex and bug-prone. Since irreducible loops are rare, just starting
293     // another iteration is best.
294     if (FoundIrreducibility) {
295       continue;
296     }
297 
298     for (auto *LoopEntry : Graph.getLoopEntries()) {
299       LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
300       // Each of these calls to processRegion may change the graph, but are
301       // guaranteed not to interfere with each other. The only changes we make
302       // to the graph are to add blocks on the way to a loop entry. As the
303       // loops are disjoint, that means we may only alter branches that exit
304       // another loop, which are ignored when recursing into that other loop
305       // anyhow.
306       if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
307         Changed = true;
308       }
309     }
310 
311     return Changed;
312   }
313 }
314 
315 // Given a set of entries to a single loop, create a single entry for that
316 // loop by creating a dispatch block for them, routing control flow using
317 // a helper variable. Also updates Blocks with any new blocks created, so
318 // that we properly track all the blocks in the region. But this does not update
319 // ReachabilityGraph; this will be updated in the caller of this function as
320 // needed.
321 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
322     BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
323     const ReachabilityGraph &Graph) {
324   assert(Entries.size() >= 2);
325 
326   // Sort the entries to ensure a deterministic build.
327   BlockVector SortedEntries(Entries.begin(), Entries.end());
328   llvm::sort(SortedEntries,
329              [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
330                auto ANum = A->getNumber();
331                auto BNum = B->getNumber();
332                return ANum < BNum;
333              });
334 
335 #ifndef NDEBUG
336   for (auto Block : SortedEntries)
337     assert(Block->getNumber() != -1);
338   if (SortedEntries.size() > 1) {
339     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
340          ++I) {
341       auto ANum = (*I)->getNumber();
342       auto BNum = (*(std::next(I)))->getNumber();
343       assert(ANum != BNum);
344     }
345   }
346 #endif
347 
348   // Create a dispatch block which will contain a jump table to the entries.
349   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
350   MF.insert(MF.end(), Dispatch);
351   Blocks.insert(Dispatch);
352 
353   // Add the jump table.
354   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
355   MachineInstrBuilder MIB =
356       BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
357 
358   // Add the register which will be used to tell the jump table which block to
359   // jump to.
360   MachineRegisterInfo &MRI = MF.getRegInfo();
361   unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
362   MIB.addReg(Reg);
363 
364   // Compute the indices in the superheader, one for each bad block, and
365   // add them as successors.
366   DenseMap<MachineBasicBlock *, unsigned> Indices;
367   for (auto *Entry : SortedEntries) {
368     auto Pair = Indices.insert(std::make_pair(Entry, 0));
369     assert(Pair.second);
370 
371     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
372     Pair.first->second = Index;
373 
374     MIB.addMBB(Entry);
375     Dispatch->addSuccessor(Entry);
376   }
377 
378   // Rewrite the problematic successors for every block that wants to reach
379   // the bad blocks. For simplicity, we just introduce a new block for every
380   // edge we need to rewrite. (Fancier things are possible.)
381 
382   BlockVector AllPreds;
383   for (auto *Entry : SortedEntries) {
384     for (auto *Pred : Entry->predecessors()) {
385       if (Pred != Dispatch) {
386         AllPreds.push_back(Pred);
387       }
388     }
389   }
390 
391   // This set stores predecessors within this loop.
392   DenseSet<MachineBasicBlock *> InLoop;
393   for (auto *Pred : AllPreds) {
394     for (auto *Entry : Pred->successors()) {
395       if (!Entries.count(Entry))
396         continue;
397       if (Graph.canReach(Entry, Pred)) {
398         InLoop.insert(Pred);
399         break;
400       }
401     }
402   }
403 
404   // Record if each entry has a layout predecessor. This map stores
405   // <<Predecessor is within the loop?, loop entry>, layout predecessor>
406   std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
407       EntryToLayoutPred;
408   for (auto *Pred : AllPreds)
409     for (auto *Entry : Pred->successors())
410       if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
411         EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
412 
413   // We need to create at most two routing blocks per entry: one for
414   // predecessors outside the loop and one for predecessors inside the loop.
415   // This map stores
416   // <<Predecessor is within the loop?, loop entry>, routing block>
417   std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
418   for (auto *Pred : AllPreds) {
419     bool PredInLoop = InLoop.count(Pred);
420     for (auto *Entry : Pred->successors()) {
421       if (!Entries.count(Entry) ||
422           Map.count(std::make_pair(InLoop.count(Pred), Entry)))
423         continue;
424       // If there exists a layout predecessor of this entry and this predecessor
425       // is not that, we rather create a routing block after that layout
426       // predecessor to save a branch.
427       if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
428           EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
429         continue;
430 
431       // This is a successor we need to rewrite.
432       MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
433       MF.insert(Pred->isLayoutSuccessor(Entry)
434                     ? MachineFunction::iterator(Entry)
435                     : MF.end(),
436                 Routing);
437       Blocks.insert(Routing);
438 
439       // Set the jump table's register of the index of the block we wish to
440       // jump to, and jump to the jump table.
441       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
442           .addImm(Indices[Entry]);
443       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
444       Routing->addSuccessor(Dispatch);
445       Map[std::make_pair(PredInLoop, Entry)] = Routing;
446     }
447   }
448 
449   for (auto *Pred : AllPreds) {
450     bool PredInLoop = InLoop.count(Pred);
451     // Remap the terminator operands and the successor list.
452     for (MachineInstr &Term : Pred->terminators())
453       for (auto &Op : Term.explicit_uses())
454         if (Op.isMBB() && Indices.count(Op.getMBB()))
455           Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
456 
457     for (auto *Succ : Pred->successors()) {
458       if (!Entries.count(Succ))
459         continue;
460       auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
461       Pred->replaceSuccessor(Succ, Routing);
462     }
463   }
464 
465   // Create a fake default label, because br_table requires one.
466   MIB.addMBB(MIB.getInstr()
467                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
468                  .getMBB());
469 }
470 
471 } // end anonymous namespace
472 
473 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
474 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
475                 "Removes irreducible control flow", false, false)
476 
477 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
478   return new WebAssemblyFixIrreducibleControlFlow();
479 }
480 
481 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
482     MachineFunction &MF) {
483   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
484                        "********** Function: "
485                     << MF.getName() << '\n');
486 
487   // Start the recursive process on the entire function body.
488   BlockSet AllBlocks;
489   for (auto &MBB : MF) {
490     AllBlocks.insert(&MBB);
491   }
492 
493   if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
494     // We rewrote part of the function; recompute relevant things.
495     MF.getRegInfo().invalidateLiveness();
496     MF.RenumberBlocks();
497     return true;
498   }
499 
500   return false;
501 }
502