xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp (revision 39ae24e3bf1c8e7d053d0249a6bc88f65eff6de1)
1  //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file implements a pass that removes irreducible control flow.
11  /// Irreducible control flow means multiple-entry loops, which this pass
12  /// transforms to have a single entry.
13  ///
14  /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15  /// it linearizes control flow, turning diamonds into two triangles, which is
16  /// both unnecessary and undesirable for WebAssembly.
17  ///
18  /// The big picture: We recursively process each "region", defined as a group
19  /// of blocks with a single entry and no branches back to that entry. A region
20  /// may be the entire function body, or the inner part of a loop, i.e., the
21  /// loop's body without branches back to the loop entry. In each region we fix
22  /// up multi-entry loops by adding a new block that can dispatch to each of the
23  /// loop entries, based on the value of a label "helper" variable, and we
24  /// replace direct branches to the entries with assignments to the label
25  /// variable and a branch to the dispatch block. Then the dispatch block is the
26  /// single entry in the loop containing the previous multiple entries. After
27  /// ensuring all the loops in a region are reducible, we recurse into them. The
28  /// total time complexity of this pass is:
29  ///
30  ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31  ///     NumLoops * NumLoops)
32  ///
33  /// This pass is similar to what the Relooper [1] does. Both identify looping
34  /// code that requires multiple entries, and resolve it in a similar way (in
35  /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36  /// also that like the Relooper, we implement a "minimal" intervention: we only
37  /// use the "label" helper for the blocks we absolutely must and no others. We
38  /// also prioritize code size and do not duplicate code in order to resolve
39  /// irreducibility. The graph algorithms for finding loops and entries and so
40  /// forth are also similar to the Relooper. The main differences between this
41  /// pass and the Relooper are:
42  ///
43  ///  * We just care about irreducibility, so we just look at loops.
44  ///  * The Relooper emits structured control flow (with ifs etc.), while we
45  ///    emit a CFG.
46  ///
47  /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48  /// Proceedings of the ACM international conference companion on Object oriented
49  /// programming systems languages and applications companion (SPLASH '11). ACM,
50  /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51  /// http://doi.acm.org/10.1145/2048147.2048224
52  ///
53  //===----------------------------------------------------------------------===//
54  
55  #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56  #include "WebAssembly.h"
57  #include "WebAssemblySubtarget.h"
58  #include "llvm/CodeGen/MachineFunctionPass.h"
59  #include "llvm/CodeGen/MachineInstrBuilder.h"
60  #include "llvm/Support/Debug.h"
61  using namespace llvm;
62  
63  #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64  
65  namespace {
66  
67  using BlockVector = SmallVector<MachineBasicBlock *, 4>;
68  using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69  
70  static BlockVector getSortedEntries(const BlockSet &Entries) {
71    BlockVector SortedEntries(Entries.begin(), Entries.end());
72    llvm::sort(SortedEntries,
73               [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
74                 auto ANum = A->getNumber();
75                 auto BNum = B->getNumber();
76                 return ANum < BNum;
77               });
78    return SortedEntries;
79  }
80  
81  // Calculates reachability in a region. Ignores branches to blocks outside of
82  // the region, and ignores branches to the region entry (for the case where
83  // the region is the inner part of a loop).
84  class ReachabilityGraph {
85  public:
86    ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
87        : Entry(Entry), Blocks(Blocks) {
88  #ifndef NDEBUG
89      // The region must have a single entry.
90      for (auto *MBB : Blocks) {
91        if (MBB != Entry) {
92          for (auto *Pred : MBB->predecessors()) {
93            assert(inRegion(Pred));
94          }
95        }
96      }
97  #endif
98      calculate();
99    }
100  
101    bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
102      assert(inRegion(From) && inRegion(To));
103      auto I = Reachable.find(From);
104      if (I == Reachable.end())
105        return false;
106      return I->second.count(To);
107    }
108  
109    // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110    // that can reach themselves.
111    const BlockSet &getLoopers() const { return Loopers; }
112  
113    // Get all blocks that are loop entries.
114    const BlockSet &getLoopEntries() const { return LoopEntries; }
115  
116    // Get all blocks that enter a particular loop from outside.
117    const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
118      assert(inRegion(LoopEntry));
119      auto I = LoopEnterers.find(LoopEntry);
120      assert(I != LoopEnterers.end());
121      return I->second;
122    }
123  
124  private:
125    MachineBasicBlock *Entry;
126    const BlockSet &Blocks;
127  
128    BlockSet Loopers, LoopEntries;
129    DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
130  
131    bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
132  
133    // Maps a block to all the other blocks it can reach.
134    DenseMap<MachineBasicBlock *, BlockSet> Reachable;
135  
136    void calculate() {
137      // Reachability computation work list. Contains pairs of recent additions
138      // (A, B) where we just added a link A => B.
139      using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
140      SmallVector<BlockPair, 4> WorkList;
141  
142      // Add all relevant direct branches.
143      for (auto *MBB : Blocks) {
144        for (auto *Succ : MBB->successors()) {
145          if (Succ != Entry && inRegion(Succ)) {
146            Reachable[MBB].insert(Succ);
147            WorkList.emplace_back(MBB, Succ);
148          }
149        }
150      }
151  
152      while (!WorkList.empty()) {
153        MachineBasicBlock *MBB, *Succ;
154        std::tie(MBB, Succ) = WorkList.pop_back_val();
155        assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
156        if (MBB != Entry) {
157          // We recently added MBB => Succ, and that means we may have enabled
158          // Pred => MBB => Succ.
159          for (auto *Pred : MBB->predecessors()) {
160            if (Reachable[Pred].insert(Succ).second) {
161              WorkList.emplace_back(Pred, Succ);
162            }
163          }
164        }
165      }
166  
167      // Blocks that can return to themselves are in a loop.
168      for (auto *MBB : Blocks) {
169        if (canReach(MBB, MBB)) {
170          Loopers.insert(MBB);
171        }
172      }
173      assert(!Loopers.count(Entry));
174  
175      // Find the loop entries - loopers reachable from blocks not in that loop -
176      // and those outside blocks that reach them, the "loop enterers".
177      for (auto *Looper : Loopers) {
178        for (auto *Pred : Looper->predecessors()) {
179          // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180          // otherwise, it is a block that enters into the loop.
181          if (!canReach(Looper, Pred)) {
182            LoopEntries.insert(Looper);
183            LoopEnterers[Looper].insert(Pred);
184          }
185        }
186      }
187    }
188  };
189  
190  // Finds the blocks in a single-entry loop, given the loop entry and the
191  // list of blocks that enter the loop.
192  class LoopBlocks {
193  public:
194    LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
195        : Entry(Entry), Enterers(Enterers) {
196      calculate();
197    }
198  
199    BlockSet &getBlocks() { return Blocks; }
200  
201  private:
202    MachineBasicBlock *Entry;
203    const BlockSet &Enterers;
204  
205    BlockSet Blocks;
206  
207    void calculate() {
208      // Going backwards from the loop entry, if we ignore the blocks entering
209      // from outside, we will traverse all the blocks in the loop.
210      BlockVector WorkList;
211      BlockSet AddedToWorkList;
212      Blocks.insert(Entry);
213      for (auto *Pred : Entry->predecessors()) {
214        if (!Enterers.count(Pred)) {
215          WorkList.push_back(Pred);
216          AddedToWorkList.insert(Pred);
217        }
218      }
219  
220      while (!WorkList.empty()) {
221        auto *MBB = WorkList.pop_back_val();
222        assert(!Enterers.count(MBB));
223        if (Blocks.insert(MBB).second) {
224          for (auto *Pred : MBB->predecessors()) {
225            if (AddedToWorkList.insert(Pred).second)
226              WorkList.push_back(Pred);
227          }
228        }
229      }
230    }
231  };
232  
233  class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
234    StringRef getPassName() const override {
235      return "WebAssembly Fix Irreducible Control Flow";
236    }
237  
238    bool runOnMachineFunction(MachineFunction &MF) override;
239  
240    bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
241                       MachineFunction &MF);
242  
243    void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
244                             MachineFunction &MF, const ReachabilityGraph &Graph);
245  
246  public:
247    static char ID; // Pass identification, replacement for typeid
248    WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
249  };
250  
251  bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252      MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
253    bool Changed = false;
254    // Remove irreducibility before processing child loops, which may take
255    // multiple iterations.
256    while (true) {
257      ReachabilityGraph Graph(Entry, Blocks);
258  
259      bool FoundIrreducibility = false;
260  
261      for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
262        // Find mutual entries - all entries which can reach this one, and
263        // are reached by it (that always includes LoopEntry itself). All mutual
264        // entries must be in the same loop, so if we have more than one, then we
265        // have irreducible control flow.
266        //
267        // (Note that we need to sort the entries here, as otherwise the order can
268        // matter: being mutual is a symmetric relationship, and each set of
269        // mutuals will be handled properly no matter which we see first. However,
270        // there can be multiple disjoint sets of mutuals, and which we process
271        // first changes the output.)
272        //
273        // Note that irreducibility may involve inner loops, e.g. imagine A
274        // starts one loop, and it has B inside it which starts an inner loop.
275        // If we add a branch from all the way on the outside to B, then in a
276        // sense B is no longer an "inner" loop, semantically speaking. We will
277        // fix that irreducibility by adding a block that dispatches to either
278        // either A or B, so B will no longer be an inner loop in our output.
279        // (A fancier approach might try to keep it as such.)
280        //
281        // Note that we still need to recurse into inner loops later, to handle
282        // the case where the irreducibility is entirely nested - we would not
283        // be able to identify that at this point, since the enclosing loop is
284        // a group of blocks all of whom can reach each other. (We'll see the
285        // irreducibility after removing branches to the top of that enclosing
286        // loop.)
287        BlockSet MutualLoopEntries;
288        MutualLoopEntries.insert(LoopEntry);
289        for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
290          if (OtherLoopEntry != LoopEntry &&
291              Graph.canReach(LoopEntry, OtherLoopEntry) &&
292              Graph.canReach(OtherLoopEntry, LoopEntry)) {
293            MutualLoopEntries.insert(OtherLoopEntry);
294          }
295        }
296  
297        if (MutualLoopEntries.size() > 1) {
298          makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
299          FoundIrreducibility = true;
300          Changed = true;
301          break;
302        }
303      }
304      // Only go on to actually process the inner loops when we are done
305      // removing irreducible control flow and changing the graph. Modifying
306      // the graph as we go is possible, and that might let us avoid looking at
307      // the already-fixed loops again if we are careful, but all that is
308      // complex and bug-prone. Since irreducible loops are rare, just starting
309      // another iteration is best.
310      if (FoundIrreducibility) {
311        continue;
312      }
313  
314      for (auto *LoopEntry : Graph.getLoopEntries()) {
315        LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
316        // Each of these calls to processRegion may change the graph, but are
317        // guaranteed not to interfere with each other. The only changes we make
318        // to the graph are to add blocks on the way to a loop entry. As the
319        // loops are disjoint, that means we may only alter branches that exit
320        // another loop, which are ignored when recursing into that other loop
321        // anyhow.
322        if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
323          Changed = true;
324        }
325      }
326  
327      return Changed;
328    }
329  }
330  
331  // Given a set of entries to a single loop, create a single entry for that
332  // loop by creating a dispatch block for them, routing control flow using
333  // a helper variable. Also updates Blocks with any new blocks created, so
334  // that we properly track all the blocks in the region. But this does not update
335  // ReachabilityGraph; this will be updated in the caller of this function as
336  // needed.
337  void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338      BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
339      const ReachabilityGraph &Graph) {
340    assert(Entries.size() >= 2);
341  
342    // Sort the entries to ensure a deterministic build.
343    BlockVector SortedEntries = getSortedEntries(Entries);
344  
345  #ifndef NDEBUG
346    for (auto *Block : SortedEntries)
347      assert(Block->getNumber() != -1);
348    if (SortedEntries.size() > 1) {
349      for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
350           ++I) {
351        auto ANum = (*I)->getNumber();
352        auto BNum = (*(std::next(I)))->getNumber();
353        assert(ANum != BNum);
354      }
355    }
356  #endif
357  
358    // Create a dispatch block which will contain a jump table to the entries.
359    MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
360    MF.insert(MF.end(), Dispatch);
361    Blocks.insert(Dispatch);
362  
363    // Add the jump table.
364    const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
365    MachineInstrBuilder MIB =
366        BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
367  
368    // Add the register which will be used to tell the jump table which block to
369    // jump to.
370    MachineRegisterInfo &MRI = MF.getRegInfo();
371    Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
372    MIB.addReg(Reg);
373  
374    // Compute the indices in the superheader, one for each bad block, and
375    // add them as successors.
376    DenseMap<MachineBasicBlock *, unsigned> Indices;
377    for (auto *Entry : SortedEntries) {
378      auto Pair = Indices.insert(std::make_pair(Entry, 0));
379      assert(Pair.second);
380  
381      unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
382      Pair.first->second = Index;
383  
384      MIB.addMBB(Entry);
385      Dispatch->addSuccessor(Entry);
386    }
387  
388    // Rewrite the problematic successors for every block that wants to reach
389    // the bad blocks. For simplicity, we just introduce a new block for every
390    // edge we need to rewrite. (Fancier things are possible.)
391  
392    BlockVector AllPreds;
393    for (auto *Entry : SortedEntries) {
394      for (auto *Pred : Entry->predecessors()) {
395        if (Pred != Dispatch) {
396          AllPreds.push_back(Pred);
397        }
398      }
399    }
400  
401    // This set stores predecessors within this loop.
402    DenseSet<MachineBasicBlock *> InLoop;
403    for (auto *Pred : AllPreds) {
404      for (auto *Entry : Pred->successors()) {
405        if (!Entries.count(Entry))
406          continue;
407        if (Graph.canReach(Entry, Pred)) {
408          InLoop.insert(Pred);
409          break;
410        }
411      }
412    }
413  
414    // Record if each entry has a layout predecessor. This map stores
415    // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416    DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
417        EntryToLayoutPred;
418    for (auto *Pred : AllPreds) {
419      bool PredInLoop = InLoop.count(Pred);
420      for (auto *Entry : Pred->successors())
421        if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
422          EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
423    }
424  
425    // We need to create at most two routing blocks per entry: one for
426    // predecessors outside the loop and one for predecessors inside the loop.
427    // This map stores
428    // <<loop entry, Predecessor is within the loop?>, routing block>
429    DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
430        Map;
431    for (auto *Pred : AllPreds) {
432      bool PredInLoop = InLoop.count(Pred);
433      for (auto *Entry : Pred->successors()) {
434        if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
435          continue;
436        // If there exists a layout predecessor of this entry and this predecessor
437        // is not that, we rather create a routing block after that layout
438        // predecessor to save a branch.
439        if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
440          if (OtherPred != Pred)
441            continue;
442  
443        // This is a successor we need to rewrite.
444        MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
445        MF.insert(Pred->isLayoutSuccessor(Entry)
446                      ? MachineFunction::iterator(Entry)
447                      : MF.end(),
448                  Routing);
449        Blocks.insert(Routing);
450  
451        // Set the jump table's register of the index of the block we wish to
452        // jump to, and jump to the jump table.
453        BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
454            .addImm(Indices[Entry]);
455        BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
456        Routing->addSuccessor(Dispatch);
457        Map[{Entry, PredInLoop}] = Routing;
458      }
459    }
460  
461    for (auto *Pred : AllPreds) {
462      bool PredInLoop = InLoop.count(Pred);
463      // Remap the terminator operands and the successor list.
464      for (MachineInstr &Term : Pred->terminators())
465        for (auto &Op : Term.explicit_uses())
466          if (Op.isMBB() && Indices.count(Op.getMBB()))
467            Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
468  
469      for (auto *Succ : Pred->successors()) {
470        if (!Entries.count(Succ))
471          continue;
472        auto *Routing = Map[{Succ, PredInLoop}];
473        Pred->replaceSuccessor(Succ, Routing);
474      }
475    }
476  
477    // Create a fake default label, because br_table requires one.
478    MIB.addMBB(MIB.getInstr()
479                   ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
480                   .getMBB());
481  }
482  
483  } // end anonymous namespace
484  
485  char WebAssemblyFixIrreducibleControlFlow::ID = 0;
486  INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
487                  "Removes irreducible control flow", false, false)
488  
489  FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
490    return new WebAssemblyFixIrreducibleControlFlow();
491  }
492  
493  // Test whether the given register has an ARGUMENT def.
494  static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
495    for (const auto &Def : MRI.def_instructions(Reg))
496      if (WebAssembly::isArgument(Def.getOpcode()))
497        return true;
498    return false;
499  }
500  
501  // Add a register definition with IMPLICIT_DEFs for every register to cover for
502  // register uses that don't have defs in every possible path.
503  // TODO: This is fairly heavy-handed; find a better approach.
504  static void addImplicitDefs(MachineFunction &MF) {
505    const MachineRegisterInfo &MRI = MF.getRegInfo();
506    const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
507    MachineBasicBlock &Entry = *MF.begin();
508    for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
509      Register Reg = Register::index2VirtReg(I);
510  
511      // Skip unused registers.
512      if (MRI.use_nodbg_empty(Reg))
513        continue;
514  
515      // Skip registers that have an ARGUMENT definition.
516      if (hasArgumentDef(Reg, MRI))
517        continue;
518  
519      BuildMI(Entry, Entry.begin(), DebugLoc(),
520              TII.get(WebAssembly::IMPLICIT_DEF), Reg);
521    }
522  
523    // Move ARGUMENT_* instructions to the top of the entry block, so that their
524    // liveness reflects the fact that these really are live-in values.
525    for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
526      if (WebAssembly::isArgument(MI.getOpcode())) {
527        MI.removeFromParent();
528        Entry.insert(Entry.begin(), &MI);
529      }
530    }
531  }
532  
533  bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534      MachineFunction &MF) {
535    LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536                         "********** Function: "
537                      << MF.getName() << '\n');
538  
539    // Start the recursive process on the entire function body.
540    BlockSet AllBlocks;
541    for (auto &MBB : MF) {
542      AllBlocks.insert(&MBB);
543    }
544  
545    if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
546      // We rewrote part of the function; recompute relevant things.
547      MF.RenumberBlocks();
548      // Now we've inserted dispatch blocks, some register uses can have incoming
549      // paths without a def. For example, before this pass register %a was
550      // defined in BB1 and used in BB2, and there was only one path from BB1 and
551      // BB2. But if this pass inserts a dispatch block having multiple
552      // predecessors between the two BBs, now there are paths to BB2 without
553      // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554      // IMPLICIT_DEFs to all regs is one simple way to fix it.
555      addImplicitDefs(MF);
556      return true;
557    }
558  
559    return false;
560  }
561